]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/btrfs-util.c
Merge pull request #7695 from yuwata/transient-socket
[thirdparty/systemd.git] / src / basic / btrfs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2014 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <inttypes.h>
24 #include <linux/fs.h>
25 #include <linux/loop.h>
26 #include <stddef.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/ioctl.h>
31 #include <sys/stat.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 #include <unistd.h>
35
36 #if HAVE_LINUX_BTRFS_H
37 #include <linux/btrfs.h>
38 #endif
39
40 #include "alloc-util.h"
41 #include "btrfs-ctree.h"
42 #include "btrfs-util.h"
43 #include "chattr-util.h"
44 #include "copy.h"
45 #include "device-nodes.h"
46 #include "fd-util.h"
47 #include "fileio.h"
48 #include "io-util.h"
49 #include "macro.h"
50 #include "missing.h"
51 #include "path-util.h"
52 #include "rm-rf.h"
53 #include "smack-util.h"
54 #include "sparse-endian.h"
55 #include "stat-util.h"
56 #include "string-util.h"
57 #include "time-util.h"
58 #include "util.h"
59
60 /* WARNING: Be careful with file system ioctls! When we get an fd, we
61 * need to make sure it either refers to only a regular file or
62 * directory, or that it is located on btrfs, before invoking any
63 * btrfs ioctls. The ioctl numbers are reused by some device drivers
64 * (such as DRM), and hence might have bad effects when invoked on
65 * device nodes (that reference drivers) rather than fds to normal
66 * files or directories. */
67
68 static int validate_subvolume_name(const char *name) {
69
70 if (!filename_is_valid(name))
71 return -EINVAL;
72
73 if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
74 return -E2BIG;
75
76 return 0;
77 }
78
79 static int open_parent(const char *path, int flags) {
80 _cleanup_free_ char *parent = NULL;
81 int fd;
82
83 assert(path);
84
85 parent = dirname_malloc(path);
86 if (!parent)
87 return -ENOMEM;
88
89 fd = open(parent, flags);
90 if (fd < 0)
91 return -errno;
92
93 return fd;
94 }
95
96 static int extract_subvolume_name(const char *path, const char **subvolume) {
97 const char *fn;
98 int r;
99
100 assert(path);
101 assert(subvolume);
102
103 fn = basename(path);
104
105 r = validate_subvolume_name(fn);
106 if (r < 0)
107 return r;
108
109 *subvolume = fn;
110 return 0;
111 }
112
113 int btrfs_is_filesystem(int fd) {
114 struct statfs sfs;
115
116 assert(fd >= 0);
117
118 if (fstatfs(fd, &sfs) < 0)
119 return -errno;
120
121 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
122 }
123
124 int btrfs_is_subvol_fd(int fd) {
125 struct stat st;
126
127 assert(fd >= 0);
128
129 /* On btrfs subvolumes always have the inode 256 */
130
131 if (fstat(fd, &st) < 0)
132 return -errno;
133
134 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
135 return 0;
136
137 return btrfs_is_filesystem(fd);
138 }
139
140 int btrfs_is_subvol(const char *path) {
141 _cleanup_close_ int fd = -1;
142
143 assert(path);
144
145 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
146 if (fd < 0)
147 return -errno;
148
149 return btrfs_is_subvol_fd(fd);
150 }
151
152 int btrfs_subvol_make(const char *path) {
153 struct btrfs_ioctl_vol_args args = {};
154 _cleanup_close_ int fd = -1;
155 const char *subvolume;
156 int r;
157
158 assert(path);
159
160 r = extract_subvolume_name(path, &subvolume);
161 if (r < 0)
162 return r;
163
164 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
165 if (fd < 0)
166 return fd;
167
168 strncpy(args.name, subvolume, sizeof(args.name)-1);
169
170 if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
171 return -errno;
172
173 return 0;
174 }
175
176 int btrfs_subvol_set_read_only_fd(int fd, bool b) {
177 uint64_t flags, nflags;
178 struct stat st;
179
180 assert(fd >= 0);
181
182 if (fstat(fd, &st) < 0)
183 return -errno;
184
185 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
186 return -EINVAL;
187
188 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
189 return -errno;
190
191 if (b)
192 nflags = flags | BTRFS_SUBVOL_RDONLY;
193 else
194 nflags = flags & ~BTRFS_SUBVOL_RDONLY;
195
196 if (flags == nflags)
197 return 0;
198
199 if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
200 return -errno;
201
202 return 0;
203 }
204
205 int btrfs_subvol_set_read_only(const char *path, bool b) {
206 _cleanup_close_ int fd = -1;
207
208 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
209 if (fd < 0)
210 return -errno;
211
212 return btrfs_subvol_set_read_only_fd(fd, b);
213 }
214
215 int btrfs_subvol_get_read_only_fd(int fd) {
216 uint64_t flags;
217 struct stat st;
218
219 assert(fd >= 0);
220
221 if (fstat(fd, &st) < 0)
222 return -errno;
223
224 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
225 return -EINVAL;
226
227 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
228 return -errno;
229
230 return !!(flags & BTRFS_SUBVOL_RDONLY);
231 }
232
233 int btrfs_reflink(int infd, int outfd) {
234 struct stat st;
235 int r;
236
237 assert(infd >= 0);
238 assert(outfd >= 0);
239
240 /* Make sure we invoke the ioctl on a regular file, so that no
241 * device driver accidentally gets it. */
242
243 if (fstat(outfd, &st) < 0)
244 return -errno;
245
246 if (!S_ISREG(st.st_mode))
247 return -EINVAL;
248
249 r = ioctl(outfd, BTRFS_IOC_CLONE, infd);
250 if (r < 0)
251 return -errno;
252
253 return 0;
254 }
255
256 int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
257 struct btrfs_ioctl_clone_range_args args = {
258 .src_fd = infd,
259 .src_offset = in_offset,
260 .src_length = sz,
261 .dest_offset = out_offset,
262 };
263 struct stat st;
264 int r;
265
266 assert(infd >= 0);
267 assert(outfd >= 0);
268 assert(sz > 0);
269
270 if (fstat(outfd, &st) < 0)
271 return -errno;
272
273 if (!S_ISREG(st.st_mode))
274 return -EINVAL;
275
276 r = ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args);
277 if (r < 0)
278 return -errno;
279
280 return 0;
281 }
282
283 int btrfs_get_block_device_fd(int fd, dev_t *dev) {
284 struct btrfs_ioctl_fs_info_args fsi = {};
285 uint64_t id;
286 int r;
287
288 assert(fd >= 0);
289 assert(dev);
290
291 r = btrfs_is_filesystem(fd);
292 if (r < 0)
293 return r;
294 if (!r)
295 return -ENOTTY;
296
297 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
298 return -errno;
299
300 /* We won't do this for btrfs RAID */
301 if (fsi.num_devices != 1)
302 return 0;
303
304 for (id = 1; id <= fsi.max_id; id++) {
305 struct btrfs_ioctl_dev_info_args di = {
306 .devid = id,
307 };
308 struct stat st;
309
310 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
311 if (errno == ENODEV)
312 continue;
313
314 return -errno;
315 }
316
317 if (stat((char*) di.path, &st) < 0)
318 return -errno;
319
320 if (!S_ISBLK(st.st_mode))
321 return -ENODEV;
322
323 if (major(st.st_rdev) == 0)
324 return -ENODEV;
325
326 *dev = st.st_rdev;
327 return 1;
328 }
329
330 return -ENODEV;
331 }
332
333 int btrfs_get_block_device(const char *path, dev_t *dev) {
334 _cleanup_close_ int fd = -1;
335
336 assert(path);
337 assert(dev);
338
339 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
340 if (fd < 0)
341 return -errno;
342
343 return btrfs_get_block_device_fd(fd, dev);
344 }
345
346 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
347 struct btrfs_ioctl_ino_lookup_args args = {
348 .objectid = BTRFS_FIRST_FREE_OBJECTID
349 };
350 int r;
351
352 assert(fd >= 0);
353 assert(ret);
354
355 r = btrfs_is_filesystem(fd);
356 if (r < 0)
357 return r;
358 if (!r)
359 return -ENOTTY;
360
361 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
362 return -errno;
363
364 *ret = args.treeid;
365 return 0;
366 }
367
368 int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
369 _cleanup_close_ int subvol_fd = -1;
370
371 assert(fd >= 0);
372 assert(ret);
373
374 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
375 if (subvol_fd < 0)
376 return -errno;
377
378 return btrfs_subvol_get_id_fd(subvol_fd, ret);
379 }
380
381 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
382 assert(args);
383
384 /* the objectid, type, offset together make up the btrfs key,
385 * which is considered a single 136byte integer when
386 * comparing. This call increases the counter by one, dealing
387 * with the overflow between the overflows */
388
389 if (args->key.min_offset < (uint64_t) -1) {
390 args->key.min_offset++;
391 return true;
392 }
393
394 if (args->key.min_type < (uint8_t) -1) {
395 args->key.min_type++;
396 args->key.min_offset = 0;
397 return true;
398 }
399
400 if (args->key.min_objectid < (uint64_t) -1) {
401 args->key.min_objectid++;
402 args->key.min_offset = 0;
403 args->key.min_type = 0;
404 return true;
405 }
406
407 return 0;
408 }
409
410 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
411 assert(args);
412 assert(h);
413
414 args->key.min_objectid = h->objectid;
415 args->key.min_type = h->type;
416 args->key.min_offset = h->offset;
417 }
418
419 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
420 assert(args);
421
422 /* Compare min and max */
423
424 if (args->key.min_objectid < args->key.max_objectid)
425 return -1;
426 if (args->key.min_objectid > args->key.max_objectid)
427 return 1;
428
429 if (args->key.min_type < args->key.max_type)
430 return -1;
431 if (args->key.min_type > args->key.max_type)
432 return 1;
433
434 if (args->key.min_offset < args->key.max_offset)
435 return -1;
436 if (args->key.min_offset > args->key.max_offset)
437 return 1;
438
439 return 0;
440 }
441
442 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
443 for ((i) = 0, \
444 (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
445 (i) < (args).key.nr_items; \
446 (i)++, \
447 (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
448
449 #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
450 ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
451
452 int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
453 struct btrfs_ioctl_search_args args = {
454 /* Tree of tree roots */
455 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
456
457 /* Look precisely for the subvolume items */
458 .key.min_type = BTRFS_ROOT_ITEM_KEY,
459 .key.max_type = BTRFS_ROOT_ITEM_KEY,
460
461 .key.min_offset = 0,
462 .key.max_offset = (uint64_t) -1,
463
464 /* No restrictions on the other components */
465 .key.min_transid = 0,
466 .key.max_transid = (uint64_t) -1,
467 };
468
469 bool found = false;
470 int r;
471
472 assert(fd >= 0);
473 assert(ret);
474
475 if (subvol_id == 0) {
476 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
477 if (r < 0)
478 return r;
479 } else {
480 r = btrfs_is_filesystem(fd);
481 if (r < 0)
482 return r;
483 if (!r)
484 return -ENOTTY;
485 }
486
487 args.key.min_objectid = args.key.max_objectid = subvol_id;
488
489 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
490 const struct btrfs_ioctl_search_header *sh;
491 unsigned i;
492
493 args.key.nr_items = 256;
494 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
495 return -errno;
496
497 if (args.key.nr_items <= 0)
498 break;
499
500 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
501
502 const struct btrfs_root_item *ri;
503
504 /* Make sure we start the next search at least from this entry */
505 btrfs_ioctl_search_args_set(&args, sh);
506
507 if (sh->objectid != subvol_id)
508 continue;
509 if (sh->type != BTRFS_ROOT_ITEM_KEY)
510 continue;
511
512 /* Older versions of the struct lacked the otime setting */
513 if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
514 continue;
515
516 ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
517
518 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
519 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
520
521 ret->subvol_id = subvol_id;
522 ret->read_only = !!(le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY);
523
524 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
525 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
526 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
527
528 found = true;
529 goto finish;
530 }
531
532 /* Increase search key by one, to read the next item, if we can. */
533 if (!btrfs_ioctl_search_args_inc(&args))
534 break;
535 }
536
537 finish:
538 if (!found)
539 return -ENODATA;
540
541 return 0;
542 }
543
544 int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
545
546 struct btrfs_ioctl_search_args args = {
547 /* Tree of quota items */
548 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
549
550 /* The object ID is always 0 */
551 .key.min_objectid = 0,
552 .key.max_objectid = 0,
553
554 /* Look precisely for the quota items */
555 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
556 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
557
558 /* No restrictions on the other components */
559 .key.min_transid = 0,
560 .key.max_transid = (uint64_t) -1,
561 };
562
563 bool found_info = false, found_limit = false;
564 int r;
565
566 assert(fd >= 0);
567 assert(ret);
568
569 if (qgroupid == 0) {
570 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
571 if (r < 0)
572 return r;
573 } else {
574 r = btrfs_is_filesystem(fd);
575 if (r < 0)
576 return r;
577 if (!r)
578 return -ENOTTY;
579 }
580
581 args.key.min_offset = args.key.max_offset = qgroupid;
582
583 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
584 const struct btrfs_ioctl_search_header *sh;
585 unsigned i;
586
587 args.key.nr_items = 256;
588 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
589 if (errno == ENOENT) /* quota tree is missing: quota disabled */
590 break;
591
592 return -errno;
593 }
594
595 if (args.key.nr_items <= 0)
596 break;
597
598 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
599
600 /* Make sure we start the next search at least from this entry */
601 btrfs_ioctl_search_args_set(&args, sh);
602
603 if (sh->objectid != 0)
604 continue;
605 if (sh->offset != qgroupid)
606 continue;
607
608 if (sh->type == BTRFS_QGROUP_INFO_KEY) {
609 const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
610
611 ret->referenced = le64toh(qii->rfer);
612 ret->exclusive = le64toh(qii->excl);
613
614 found_info = true;
615
616 } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
617 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
618
619 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
620 ret->referenced_max = le64toh(qli->max_rfer);
621 else
622 ret->referenced_max = (uint64_t) -1;
623
624 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
625 ret->exclusive_max = le64toh(qli->max_excl);
626 else
627 ret->exclusive_max = (uint64_t) -1;
628
629 found_limit = true;
630 }
631
632 if (found_info && found_limit)
633 goto finish;
634 }
635
636 /* Increase search key by one, to read the next item, if we can. */
637 if (!btrfs_ioctl_search_args_inc(&args))
638 break;
639 }
640
641 finish:
642 if (!found_limit && !found_info)
643 return -ENODATA;
644
645 if (!found_info) {
646 ret->referenced = (uint64_t) -1;
647 ret->exclusive = (uint64_t) -1;
648 }
649
650 if (!found_limit) {
651 ret->referenced_max = (uint64_t) -1;
652 ret->exclusive_max = (uint64_t) -1;
653 }
654
655 return 0;
656 }
657
658 int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
659 _cleanup_close_ int fd = -1;
660
661 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
662 if (fd < 0)
663 return -errno;
664
665 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
666 }
667
668 int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
669 uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0;
670 _cleanup_free_ uint64_t *qgroups = NULL;
671 int r, n, i;
672
673 assert(fd >= 0);
674 assert(ret);
675
676 /* This finds the "subtree" qgroup for a specific
677 * subvolume. This only works for subvolumes that have been
678 * prepared with btrfs_subvol_auto_qgroup_fd() with
679 * insert_intermediary_qgroup=true (or equivalent). For others
680 * it will return the leaf qgroup instead. The two cases may
681 * be distuingished via the return value, which is 1 in case
682 * an appropriate "subtree" qgroup was found, and 0
683 * otherwise. */
684
685 if (subvol_id == 0) {
686 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
687 if (r < 0)
688 return r;
689 }
690
691 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
692 if (r < 0)
693 return r;
694 if (level != 0) /* Input must be a leaf qgroup */
695 return -EINVAL;
696
697 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
698 if (n < 0)
699 return n;
700
701 for (i = 0; i < n; i++) {
702 uint64_t id;
703
704 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
705 if (r < 0)
706 return r;
707
708 if (id != subvol_id)
709 continue;
710
711 if (lowest == (uint64_t) -1 || level < lowest) {
712 lowest_qgroupid = qgroups[i];
713 lowest = level;
714 }
715 }
716
717 if (lowest == (uint64_t) -1) {
718 /* No suitable higher-level qgroup found, let's return
719 * the leaf qgroup instead, and indicate that with the
720 * return value. */
721
722 *ret = subvol_id;
723 return 0;
724 }
725
726 *ret = lowest_qgroupid;
727 return 1;
728 }
729
730 int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
731 uint64_t qgroupid;
732 int r;
733
734 assert(fd >= 0);
735 assert(ret);
736
737 /* This determines the quota data of the qgroup with the
738 * lowest level, that shares the id part with the specified
739 * subvolume. This is useful for determining the quota data
740 * for entire subvolume subtrees, as long as the subtrees have
741 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
742 * compatible way */
743
744 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
745 if (r < 0)
746 return r;
747
748 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
749 }
750
751 int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
752 _cleanup_close_ int fd = -1;
753
754 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
755 if (fd < 0)
756 return -errno;
757
758 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
759 }
760
761 int btrfs_defrag_fd(int fd) {
762 struct stat st;
763
764 assert(fd >= 0);
765
766 if (fstat(fd, &st) < 0)
767 return -errno;
768
769 if (!S_ISREG(st.st_mode))
770 return -EINVAL;
771
772 if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
773 return -errno;
774
775 return 0;
776 }
777
778 int btrfs_defrag(const char *p) {
779 _cleanup_close_ int fd = -1;
780
781 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
782 if (fd < 0)
783 return -errno;
784
785 return btrfs_defrag_fd(fd);
786 }
787
788 int btrfs_quota_enable_fd(int fd, bool b) {
789 struct btrfs_ioctl_quota_ctl_args args = {
790 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
791 };
792 int r;
793
794 assert(fd >= 0);
795
796 r = btrfs_is_filesystem(fd);
797 if (r < 0)
798 return r;
799 if (!r)
800 return -ENOTTY;
801
802 if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
803 return -errno;
804
805 return 0;
806 }
807
808 int btrfs_quota_enable(const char *path, bool b) {
809 _cleanup_close_ int fd = -1;
810
811 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
812 if (fd < 0)
813 return -errno;
814
815 return btrfs_quota_enable_fd(fd, b);
816 }
817
818 int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
819
820 struct btrfs_ioctl_qgroup_limit_args args = {
821 .lim.max_rfer = referenced_max,
822 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
823 };
824 unsigned c;
825 int r;
826
827 assert(fd >= 0);
828
829 if (qgroupid == 0) {
830 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
831 if (r < 0)
832 return r;
833 } else {
834 r = btrfs_is_filesystem(fd);
835 if (r < 0)
836 return r;
837 if (!r)
838 return -ENOTTY;
839 }
840
841 args.qgroupid = qgroupid;
842
843 for (c = 0;; c++) {
844 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
845
846 if (errno == EBUSY && c < 10) {
847 (void) btrfs_quota_scan_wait(fd);
848 continue;
849 }
850
851 return -errno;
852 }
853
854 break;
855 }
856
857 return 0;
858 }
859
860 int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
861 _cleanup_close_ int fd = -1;
862
863 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
864 if (fd < 0)
865 return -errno;
866
867 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
868 }
869
870 int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
871 uint64_t qgroupid;
872 int r;
873
874 assert(fd >= 0);
875
876 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
877 if (r < 0)
878 return r;
879
880 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
881 }
882
883 int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
884 _cleanup_close_ int fd = -1;
885
886 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
887 if (fd < 0)
888 return -errno;
889
890 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
891 }
892
893 int btrfs_resize_loopback_fd(int fd, uint64_t new_size, bool grow_only) {
894 struct btrfs_ioctl_vol_args args = {};
895 char p[SYS_BLOCK_PATH_MAX("/loop/backing_file")];
896 _cleanup_free_ char *backing = NULL;
897 _cleanup_close_ int loop_fd = -1, backing_fd = -1;
898 struct stat st;
899 dev_t dev = 0;
900 int r;
901
902 /* In contrast to btrfs quota ioctls ftruncate() cannot make sense of "infinity" or file sizes > 2^31 */
903 if (!FILE_SIZE_VALID(new_size))
904 return -EINVAL;
905
906 /* btrfs cannot handle file systems < 16M, hence use this as minimum */
907 if (new_size < 16*1024*1024)
908 new_size = 16*1024*1024;
909
910 r = btrfs_get_block_device_fd(fd, &dev);
911 if (r < 0)
912 return r;
913 if (r == 0)
914 return -ENODEV;
915
916 xsprintf_sys_block_path(p, "/loop/backing_file", dev);
917 r = read_one_line_file(p, &backing);
918 if (r == -ENOENT)
919 return -ENODEV;
920 if (r < 0)
921 return r;
922 if (isempty(backing) || !path_is_absolute(backing))
923 return -ENODEV;
924
925 backing_fd = open(backing, O_RDWR|O_CLOEXEC|O_NOCTTY);
926 if (backing_fd < 0)
927 return -errno;
928
929 if (fstat(backing_fd, &st) < 0)
930 return -errno;
931 if (!S_ISREG(st.st_mode))
932 return -ENODEV;
933
934 if (new_size == (uint64_t) st.st_size)
935 return 0;
936
937 if (grow_only && new_size < (uint64_t) st.st_size)
938 return -EINVAL;
939
940 xsprintf_sys_block_path(p, NULL, dev);
941 loop_fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY);
942 if (loop_fd < 0)
943 return -errno;
944
945 if (snprintf(args.name, sizeof(args.name), "%" PRIu64, new_size) >= (int) sizeof(args.name))
946 return -EINVAL;
947
948 if (new_size < (uint64_t) st.st_size) {
949 /* Decrease size: first decrease btrfs size, then shorten loopback */
950 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
951 return -errno;
952 }
953
954 if (ftruncate(backing_fd, new_size) < 0)
955 return -errno;
956
957 if (ioctl(loop_fd, LOOP_SET_CAPACITY, 0) < 0)
958 return -errno;
959
960 if (new_size > (uint64_t) st.st_size) {
961 /* Increase size: first enlarge loopback, then increase btrfs size */
962 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
963 return -errno;
964 }
965
966 /* Make sure the free disk space is correctly updated for both file systems */
967 (void) fsync(fd);
968 (void) fsync(backing_fd);
969
970 return 1;
971 }
972
973 int btrfs_resize_loopback(const char *p, uint64_t new_size, bool grow_only) {
974 _cleanup_close_ int fd = -1;
975
976 fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
977 if (fd < 0)
978 return -errno;
979
980 return btrfs_resize_loopback_fd(fd, new_size, grow_only);
981 }
982
983 int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
984 assert(ret);
985
986 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
987 return -EINVAL;
988
989 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
990 return -EINVAL;
991
992 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
993 return 0;
994 }
995
996 int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
997 assert(level || id);
998
999 if (level)
1000 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
1001
1002 if (id)
1003 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
1004
1005 return 0;
1006 }
1007
1008 static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
1009
1010 struct btrfs_ioctl_qgroup_create_args args = {
1011 .create = b,
1012 .qgroupid = qgroupid,
1013 };
1014 unsigned c;
1015 int r;
1016
1017 r = btrfs_is_filesystem(fd);
1018 if (r < 0)
1019 return r;
1020 if (r == 0)
1021 return -ENOTTY;
1022
1023 for (c = 0;; c++) {
1024 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
1025
1026 /* If quota is not enabled, we get EINVAL. Turn this into a recognizable error */
1027 if (errno == EINVAL)
1028 return -ENOPROTOOPT;
1029
1030 if (errno == EBUSY && c < 10) {
1031 (void) btrfs_quota_scan_wait(fd);
1032 continue;
1033 }
1034
1035 return -errno;
1036 }
1037
1038 break;
1039 }
1040
1041 return 0;
1042 }
1043
1044 int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
1045 return qgroup_create_or_destroy(fd, true, qgroupid);
1046 }
1047
1048 int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
1049 return qgroup_create_or_destroy(fd, false, qgroupid);
1050 }
1051
1052 int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
1053 _cleanup_free_ uint64_t *qgroups = NULL;
1054 uint64_t subvol_id;
1055 int i, n, r;
1056
1057 /* Destroys the specified qgroup, but unassigns it from all
1058 * its parents first. Also, it recursively destroys all
1059 * qgroups it is assgined to that have the same id part of the
1060 * qgroupid as the specified group. */
1061
1062 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
1063 if (r < 0)
1064 return r;
1065
1066 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
1067 if (n < 0)
1068 return n;
1069
1070 for (i = 0; i < n; i++) {
1071 uint64_t id;
1072
1073 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
1074 if (r < 0)
1075 return r;
1076
1077 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
1078 if (r < 0)
1079 return r;
1080
1081 if (id != subvol_id)
1082 continue;
1083
1084 /* The parent qgroupid shares the same id part with
1085 * us? If so, destroy it too. */
1086
1087 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
1088 }
1089
1090 return btrfs_qgroup_destroy(fd, qgroupid);
1091 }
1092
1093 int btrfs_quota_scan_start(int fd) {
1094 struct btrfs_ioctl_quota_rescan_args args = {};
1095
1096 assert(fd >= 0);
1097
1098 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
1099 return -errno;
1100
1101 return 0;
1102 }
1103
1104 int btrfs_quota_scan_wait(int fd) {
1105 assert(fd >= 0);
1106
1107 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
1108 return -errno;
1109
1110 return 0;
1111 }
1112
1113 int btrfs_quota_scan_ongoing(int fd) {
1114 struct btrfs_ioctl_quota_rescan_args args = {};
1115
1116 assert(fd >= 0);
1117
1118 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
1119 return -errno;
1120
1121 return !!args.flags;
1122 }
1123
1124 static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
1125 struct btrfs_ioctl_qgroup_assign_args args = {
1126 .assign = b,
1127 .src = child,
1128 .dst = parent,
1129 };
1130 unsigned c;
1131 int r;
1132
1133 r = btrfs_is_filesystem(fd);
1134 if (r < 0)
1135 return r;
1136 if (r == 0)
1137 return -ENOTTY;
1138
1139 for (c = 0;; c++) {
1140 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
1141 if (r < 0) {
1142 if (errno == EBUSY && c < 10) {
1143 (void) btrfs_quota_scan_wait(fd);
1144 continue;
1145 }
1146
1147 return -errno;
1148 }
1149
1150 if (r == 0)
1151 return 0;
1152
1153 /* If the return value is > 0, we need to request a rescan */
1154
1155 (void) btrfs_quota_scan_start(fd);
1156 return 1;
1157 }
1158 }
1159
1160 int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
1161 return qgroup_assign_or_unassign(fd, true, child, parent);
1162 }
1163
1164 int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
1165 return qgroup_assign_or_unassign(fd, false, child, parent);
1166 }
1167
1168 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
1169 struct btrfs_ioctl_search_args args = {
1170 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1171
1172 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1173 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1174
1175 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1176 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1177
1178 .key.min_transid = 0,
1179 .key.max_transid = (uint64_t) -1,
1180 };
1181
1182 struct btrfs_ioctl_vol_args vol_args = {};
1183 _cleanup_close_ int subvol_fd = -1;
1184 struct stat st;
1185 bool made_writable = false;
1186 int r;
1187
1188 assert(fd >= 0);
1189 assert(subvolume);
1190
1191 if (fstat(fd, &st) < 0)
1192 return -errno;
1193
1194 if (!S_ISDIR(st.st_mode))
1195 return -EINVAL;
1196
1197 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1198 if (subvol_fd < 0)
1199 return -errno;
1200
1201 if (subvol_id == 0) {
1202 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
1203 if (r < 0)
1204 return r;
1205 }
1206
1207 /* First, try to remove the subvolume. If it happens to be
1208 * already empty, this will just work. */
1209 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1210 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
1211 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
1212 return 0;
1213 }
1214 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
1215 return -errno;
1216
1217 /* OK, the subvolume is not empty, let's look for child
1218 * subvolumes, and remove them, first */
1219
1220 args.key.min_offset = args.key.max_offset = subvol_id;
1221
1222 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1223 const struct btrfs_ioctl_search_header *sh;
1224 unsigned i;
1225
1226 args.key.nr_items = 256;
1227 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1228 return -errno;
1229
1230 if (args.key.nr_items <= 0)
1231 break;
1232
1233 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1234 _cleanup_free_ char *p = NULL;
1235 const struct btrfs_root_ref *ref;
1236 struct btrfs_ioctl_ino_lookup_args ino_args;
1237
1238 btrfs_ioctl_search_args_set(&args, sh);
1239
1240 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1241 continue;
1242 if (sh->offset != subvol_id)
1243 continue;
1244
1245 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1246
1247 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1248 if (!p)
1249 return -ENOMEM;
1250
1251 zero(ino_args);
1252 ino_args.treeid = subvol_id;
1253 ino_args.objectid = htole64(ref->dirid);
1254
1255 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1256 return -errno;
1257
1258 if (!made_writable) {
1259 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1260 if (r < 0)
1261 return r;
1262
1263 made_writable = true;
1264 }
1265
1266 if (isempty(ino_args.name))
1267 /* Subvolume is in the top-level
1268 * directory of the subvolume. */
1269 r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
1270 else {
1271 _cleanup_close_ int child_fd = -1;
1272
1273 /* Subvolume is somewhere further down,
1274 * hence we need to open the
1275 * containing directory first */
1276
1277 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1278 if (child_fd < 0)
1279 return -errno;
1280
1281 r = subvol_remove_children(child_fd, p, sh->objectid, flags);
1282 }
1283 if (r < 0)
1284 return r;
1285 }
1286
1287 /* Increase search key by one, to read the next item, if we can. */
1288 if (!btrfs_ioctl_search_args_inc(&args))
1289 break;
1290 }
1291
1292 /* OK, the child subvolumes should all be gone now, let's try
1293 * again to remove the subvolume */
1294 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1295 return -errno;
1296
1297 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
1298 return 0;
1299 }
1300
1301 int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
1302 _cleanup_close_ int fd = -1;
1303 const char *subvolume;
1304 int r;
1305
1306 assert(path);
1307
1308 r = extract_subvolume_name(path, &subvolume);
1309 if (r < 0)
1310 return r;
1311
1312 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1313 if (fd < 0)
1314 return fd;
1315
1316 return subvol_remove_children(fd, subvolume, 0, flags);
1317 }
1318
1319 int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
1320 return subvol_remove_children(fd, subvolume, 0, flags);
1321 }
1322
1323 int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1324
1325 struct btrfs_ioctl_search_args args = {
1326 /* Tree of quota items */
1327 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1328
1329 /* The object ID is always 0 */
1330 .key.min_objectid = 0,
1331 .key.max_objectid = 0,
1332
1333 /* Look precisely for the quota items */
1334 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1335 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1336
1337 /* For our qgroup */
1338 .key.min_offset = old_qgroupid,
1339 .key.max_offset = old_qgroupid,
1340
1341 /* No restrictions on the other components */
1342 .key.min_transid = 0,
1343 .key.max_transid = (uint64_t) -1,
1344 };
1345
1346 int r;
1347
1348 r = btrfs_is_filesystem(fd);
1349 if (r < 0)
1350 return r;
1351 if (!r)
1352 return -ENOTTY;
1353
1354 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1355 const struct btrfs_ioctl_search_header *sh;
1356 unsigned i;
1357
1358 args.key.nr_items = 256;
1359 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1360 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1361 break;
1362
1363 return -errno;
1364 }
1365
1366 if (args.key.nr_items <= 0)
1367 break;
1368
1369 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1370 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1371 struct btrfs_ioctl_qgroup_limit_args qargs;
1372 unsigned c;
1373
1374 /* Make sure we start the next search at least from this entry */
1375 btrfs_ioctl_search_args_set(&args, sh);
1376
1377 if (sh->objectid != 0)
1378 continue;
1379 if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
1380 continue;
1381 if (sh->offset != old_qgroupid)
1382 continue;
1383
1384 /* We found the entry, now copy things over. */
1385
1386 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1387 .qgroupid = new_qgroupid,
1388
1389 .lim.max_rfer = le64toh(qli->max_rfer),
1390 .lim.max_excl = le64toh(qli->max_excl),
1391 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1392 .lim.rsv_excl = le64toh(qli->rsv_excl),
1393
1394 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1395 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1396 BTRFS_QGROUP_LIMIT_RSV_RFER|
1397 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1398 };
1399
1400 for (c = 0;; c++) {
1401 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1402 if (errno == EBUSY && c < 10) {
1403 (void) btrfs_quota_scan_wait(fd);
1404 continue;
1405 }
1406 return -errno;
1407 }
1408
1409 break;
1410 }
1411
1412 return 1;
1413 }
1414
1415 /* Increase search key by one, to read the next item, if we can. */
1416 if (!btrfs_ioctl_search_args_inc(&args))
1417 break;
1418 }
1419
1420 return 0;
1421 }
1422
1423 static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1424 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1425 bool copy_from_parent = false, insert_intermediary_qgroup = false;
1426 int n_old_qgroups, n_old_parent_qgroups, r, i;
1427 uint64_t old_parent_id;
1428
1429 assert(fd >= 0);
1430
1431 /* Copies a reduced form of quota information from the old to
1432 * the new subvolume. */
1433
1434 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1435 if (n_old_qgroups <= 0) /* Nothing to copy */
1436 return n_old_qgroups;
1437
1438 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
1439 if (r == -ENXIO)
1440 /* We have no parent, hence nothing to copy. */
1441 n_old_parent_qgroups = 0;
1442 else if (r < 0)
1443 return r;
1444 else {
1445 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1446 if (n_old_parent_qgroups < 0)
1447 return n_old_parent_qgroups;
1448 }
1449
1450 for (i = 0; i < n_old_qgroups; i++) {
1451 uint64_t id;
1452 int j;
1453
1454 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1455 if (r < 0)
1456 return r;
1457
1458 if (id == old_subvol_id) {
1459 /* The old subvolume was member of a qgroup
1460 * that had the same id, but a different level
1461 * as it self. Let's set up something similar
1462 * in the destination. */
1463 insert_intermediary_qgroup = true;
1464 break;
1465 }
1466
1467 for (j = 0; j < n_old_parent_qgroups; j++)
1468 if (old_parent_qgroups[j] == old_qgroups[i]) {
1469 /* The old subvolume shared a common
1470 * parent qgroup with its parent
1471 * subvolume. Let's set up something
1472 * similar in the destination. */
1473 copy_from_parent = true;
1474 }
1475 }
1476
1477 if (!insert_intermediary_qgroup && !copy_from_parent)
1478 return 0;
1479
1480 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1481 }
1482
1483 static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1484 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1485 bool changed;
1486 int r;
1487
1488 /* First copy the leaf limits */
1489 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1490 if (r < 0)
1491 return r;
1492 changed = r > 0;
1493
1494 /* Then, try to copy the subtree limits, if there are any. */
1495 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1496 if (r < 0)
1497 return r;
1498 if (r == 0)
1499 return changed;
1500
1501 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1502 if (r < 0)
1503 return r;
1504 if (r == 0)
1505 return changed;
1506
1507 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1508 if (r != 0)
1509 return r;
1510
1511 return changed;
1512 }
1513
1514 static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolume, uint64_t old_subvol_id, BtrfsSnapshotFlags flags) {
1515
1516 struct btrfs_ioctl_search_args args = {
1517 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1518
1519 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1520 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1521
1522 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1523 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1524
1525 .key.min_transid = 0,
1526 .key.max_transid = (uint64_t) -1,
1527 };
1528
1529 struct btrfs_ioctl_vol_args_v2 vol_args = {
1530 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1531 .fd = old_fd,
1532 };
1533 _cleanup_close_ int subvolume_fd = -1;
1534 uint64_t new_subvol_id;
1535 int r;
1536
1537 assert(old_fd >= 0);
1538 assert(new_fd >= 0);
1539 assert(subvolume);
1540
1541 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1542
1543 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1544 return -errno;
1545
1546 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1547 !(flags & BTRFS_SNAPSHOT_QUOTA))
1548 return 0;
1549
1550 if (old_subvol_id == 0) {
1551 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
1552 if (r < 0)
1553 return r;
1554 }
1555
1556 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1557 if (r < 0)
1558 return r;
1559
1560 if (flags & BTRFS_SNAPSHOT_QUOTA)
1561 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1562
1563 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1564
1565 if (flags & BTRFS_SNAPSHOT_QUOTA)
1566 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1567
1568 return 0;
1569 }
1570
1571 args.key.min_offset = args.key.max_offset = old_subvol_id;
1572
1573 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1574 const struct btrfs_ioctl_search_header *sh;
1575 unsigned i;
1576
1577 args.key.nr_items = 256;
1578 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1579 return -errno;
1580
1581 if (args.key.nr_items <= 0)
1582 break;
1583
1584 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1585 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
1586 struct btrfs_ioctl_ino_lookup_args ino_args;
1587 const struct btrfs_root_ref *ref;
1588 _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
1589
1590 btrfs_ioctl_search_args_set(&args, sh);
1591
1592 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1593 continue;
1594
1595 /* Avoid finding the source subvolume a second
1596 * time */
1597 if (sh->offset != old_subvol_id)
1598 continue;
1599
1600 /* Avoid running into loops if the new
1601 * subvolume is below the old one. */
1602 if (sh->objectid == new_subvol_id)
1603 continue;
1604
1605 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1606 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1607 if (!p)
1608 return -ENOMEM;
1609
1610 zero(ino_args);
1611 ino_args.treeid = old_subvol_id;
1612 ino_args.objectid = htole64(ref->dirid);
1613
1614 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1615 return -errno;
1616
1617 /* The kernel returns an empty name if the
1618 * subvolume is in the top-level directory,
1619 * and otherwise appends a slash, so that we
1620 * can just concatenate easily here, without
1621 * adding a slash. */
1622 c = strappend(ino_args.name, p);
1623 if (!c)
1624 return -ENOMEM;
1625
1626 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1627 if (old_child_fd < 0)
1628 return -errno;
1629
1630 np = strjoin(subvolume, "/", ino_args.name);
1631 if (!np)
1632 return -ENOMEM;
1633
1634 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1635 if (new_child_fd < 0)
1636 return -errno;
1637
1638 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1639 /* If the snapshot is read-only we
1640 * need to mark it writable
1641 * temporarily, to put the subsnapshot
1642 * into place. */
1643
1644 if (subvolume_fd < 0) {
1645 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1646 if (subvolume_fd < 0)
1647 return -errno;
1648 }
1649
1650 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1651 if (r < 0)
1652 return r;
1653 }
1654
1655 /* When btrfs clones the subvolumes, child
1656 * subvolumes appear as empty directories. Remove
1657 * them, so that we can create a new snapshot
1658 * in their place */
1659 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1660 int k = -errno;
1661
1662 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1663 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1664
1665 return k;
1666 }
1667
1668 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
1669
1670 /* Restore the readonly flag */
1671 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1672 int k;
1673
1674 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1675 if (r >= 0 && k < 0)
1676 return k;
1677 }
1678
1679 if (r < 0)
1680 return r;
1681 }
1682
1683 /* Increase search key by one, to read the next item, if we can. */
1684 if (!btrfs_ioctl_search_args_inc(&args))
1685 break;
1686 }
1687
1688 if (flags & BTRFS_SNAPSHOT_QUOTA)
1689 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1690
1691 return 0;
1692 }
1693
1694 int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags) {
1695 _cleanup_close_ int new_fd = -1;
1696 const char *subvolume;
1697 int r;
1698
1699 assert(old_fd >= 0);
1700 assert(new_path);
1701
1702 r = btrfs_is_subvol_fd(old_fd);
1703 if (r < 0)
1704 return r;
1705 if (r == 0) {
1706 bool plain_directory = false;
1707
1708 /* If the source isn't a proper subvolume, fail unless fallback is requested */
1709 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1710 return -EISDIR;
1711
1712 r = btrfs_subvol_make(new_path);
1713 if (r == -ENOTTY && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
1714 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1715 if (mkdir(new_path, 0755) < 0)
1716 return r;
1717
1718 plain_directory = true;
1719 } else if (r < 0)
1720 return r;
1721
1722 r = copy_directory_fd(old_fd, new_path, COPY_MERGE|COPY_REFLINK);
1723 if (r < 0)
1724 goto fallback_fail;
1725
1726 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1727
1728 if (plain_directory) {
1729 /* Plain directories have no recursive read-only flag, but something pretty close to
1730 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1731
1732 if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
1733 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
1734 } else {
1735 r = btrfs_subvol_set_read_only(new_path, true);
1736 if (r < 0)
1737 goto fallback_fail;
1738 }
1739 }
1740
1741 return 0;
1742
1743 fallback_fail:
1744 (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
1745 return r;
1746 }
1747
1748 r = extract_subvolume_name(new_path, &subvolume);
1749 if (r < 0)
1750 return r;
1751
1752 new_fd = open_parent(new_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1753 if (new_fd < 0)
1754 return new_fd;
1755
1756 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1757 }
1758
1759 int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags) {
1760 _cleanup_close_ int old_fd = -1;
1761
1762 assert(old_path);
1763 assert(new_path);
1764
1765 old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1766 if (old_fd < 0)
1767 return -errno;
1768
1769 return btrfs_subvol_snapshot_fd(old_fd, new_path, flags);
1770 }
1771
1772 int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1773
1774 struct btrfs_ioctl_search_args args = {
1775 /* Tree of quota items */
1776 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1777
1778 /* Look precisely for the quota relation items */
1779 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1780 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1781
1782 /* No restrictions on the other components */
1783 .key.min_offset = 0,
1784 .key.max_offset = (uint64_t) -1,
1785
1786 .key.min_transid = 0,
1787 .key.max_transid = (uint64_t) -1,
1788 };
1789
1790 _cleanup_free_ uint64_t *items = NULL;
1791 size_t n_items = 0, n_allocated = 0;
1792 int r;
1793
1794 assert(fd >= 0);
1795 assert(ret);
1796
1797 if (qgroupid == 0) {
1798 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1799 if (r < 0)
1800 return r;
1801 } else {
1802 r = btrfs_is_filesystem(fd);
1803 if (r < 0)
1804 return r;
1805 if (!r)
1806 return -ENOTTY;
1807 }
1808
1809 args.key.min_objectid = args.key.max_objectid = qgroupid;
1810
1811 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1812 const struct btrfs_ioctl_search_header *sh;
1813 unsigned i;
1814
1815 args.key.nr_items = 256;
1816 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1817 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1818 break;
1819
1820 return -errno;
1821 }
1822
1823 if (args.key.nr_items <= 0)
1824 break;
1825
1826 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1827
1828 /* Make sure we start the next search at least from this entry */
1829 btrfs_ioctl_search_args_set(&args, sh);
1830
1831 if (sh->type != BTRFS_QGROUP_RELATION_KEY)
1832 continue;
1833 if (sh->offset < sh->objectid)
1834 continue;
1835 if (sh->objectid != qgroupid)
1836 continue;
1837
1838 if (!GREEDY_REALLOC(items, n_allocated, n_items+1))
1839 return -ENOMEM;
1840
1841 items[n_items++] = sh->offset;
1842 }
1843
1844 /* Increase search key by one, to read the next item, if we can. */
1845 if (!btrfs_ioctl_search_args_inc(&args))
1846 break;
1847 }
1848
1849 if (n_items <= 0) {
1850 *ret = NULL;
1851 return 0;
1852 }
1853
1854 *ret = items;
1855 items = NULL;
1856
1857 return (int) n_items;
1858 }
1859
1860 int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1861 _cleanup_free_ uint64_t *qgroups = NULL;
1862 uint64_t parent_subvol;
1863 bool changed = false;
1864 int n = 0, r;
1865
1866 assert(fd >= 0);
1867
1868 /*
1869 * Sets up the specified subvolume's qgroup automatically in
1870 * one of two ways:
1871 *
1872 * If insert_intermediary_qgroup is false, the subvolume's
1873 * leaf qgroup will be assigned to the same parent qgroups as
1874 * the subvolume's parent subvolume.
1875 *
1876 * If insert_intermediary_qgroup is true a new intermediary
1877 * higher-level qgroup is created, with a higher level number,
1878 * but reusing the id of the subvolume. The level number is
1879 * picked as one smaller than the lowest level qgroup the
1880 * parent subvolume is a member of. If the parent subvolume's
1881 * leaf qgroup is assigned to no higher-level qgroup a new
1882 * qgroup of level 255 is created instead. Either way, the new
1883 * qgroup is then assigned to the parent's higher-level
1884 * qgroup, and the subvolume itself is assigned to it.
1885 *
1886 * If the subvolume is already assigned to a higher level
1887 * qgroup, no operation is executed.
1888 *
1889 * Effectively this means: regardless if
1890 * insert_intermediary_qgroup is true or not, after this
1891 * function is invoked the subvolume will be accounted within
1892 * the same qgroups as the parent. However, if it is true, it
1893 * will also get its own higher-level qgroup, which may in
1894 * turn be used by subvolumes created beneath this subvolume
1895 * later on.
1896 *
1897 * This hence defines a simple default qgroup setup for
1898 * subvolumes, as long as this function is invoked on each
1899 * created subvolume: each subvolume is always accounting
1900 * together with its immediate parents. Optionally, if
1901 * insert_intermediary_qgroup is true, it will also get a
1902 * qgroup that then includes all its own child subvolumes.
1903 */
1904
1905 if (subvol_id == 0) {
1906 r = btrfs_is_subvol_fd(fd);
1907 if (r < 0)
1908 return r;
1909 if (!r)
1910 return -ENOTTY;
1911
1912 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1913 if (r < 0)
1914 return r;
1915 }
1916
1917 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1918 if (n < 0)
1919 return n;
1920 if (n > 0) /* already parent qgroups set up, let's bail */
1921 return 0;
1922
1923 qgroups = mfree(qgroups);
1924
1925 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
1926 if (r == -ENXIO)
1927 /* No parent, hence no qgroup memberships */
1928 n = 0;
1929 else if (r < 0)
1930 return r;
1931 else {
1932 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1933 if (n < 0)
1934 return n;
1935 }
1936
1937 if (insert_intermediary_qgroup) {
1938 uint64_t lowest = 256, new_qgroupid;
1939 bool created = false;
1940 int i;
1941
1942 /* Determine the lowest qgroup that the parent
1943 * subvolume is assigned to. */
1944
1945 for (i = 0; i < n; i++) {
1946 uint64_t level;
1947
1948 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1949 if (r < 0)
1950 return r;
1951
1952 if (level < lowest)
1953 lowest = level;
1954 }
1955
1956 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1957 return -EBUSY;
1958
1959 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1960 if (r < 0)
1961 return r;
1962
1963 /* Create the new intermediary group, unless it already exists */
1964 r = btrfs_qgroup_create(fd, new_qgroupid);
1965 if (r < 0 && r != -EEXIST)
1966 return r;
1967 if (r >= 0)
1968 changed = created = true;
1969
1970 for (i = 0; i < n; i++) {
1971 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1972 if (r < 0 && r != -EEXIST) {
1973 if (created)
1974 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1975
1976 return r;
1977 }
1978 if (r >= 0)
1979 changed = true;
1980 }
1981
1982 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1983 if (r < 0 && r != -EEXIST) {
1984 if (created)
1985 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1986 return r;
1987 }
1988 if (r >= 0)
1989 changed = true;
1990
1991 } else {
1992 int i;
1993
1994 /* Assign our subvolume to all the same qgroups as the parent */
1995
1996 for (i = 0; i < n; i++) {
1997 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1998 if (r < 0 && r != -EEXIST)
1999 return r;
2000 if (r >= 0)
2001 changed = true;
2002 }
2003 }
2004
2005 return changed;
2006 }
2007
2008 int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
2009 _cleanup_close_ int fd = -1;
2010
2011 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
2012 if (fd < 0)
2013 return -errno;
2014
2015 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
2016 }
2017
2018 int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
2019
2020 struct btrfs_ioctl_search_args args = {
2021 /* Tree of tree roots */
2022 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
2023
2024 /* Look precisely for the subvolume items */
2025 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
2026 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
2027
2028 /* No restrictions on the other components */
2029 .key.min_offset = 0,
2030 .key.max_offset = (uint64_t) -1,
2031
2032 .key.min_transid = 0,
2033 .key.max_transid = (uint64_t) -1,
2034 };
2035 int r;
2036
2037 assert(fd >= 0);
2038 assert(ret);
2039
2040 if (subvol_id == 0) {
2041 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
2042 if (r < 0)
2043 return r;
2044 } else {
2045 r = btrfs_is_filesystem(fd);
2046 if (r < 0)
2047 return r;
2048 if (!r)
2049 return -ENOTTY;
2050 }
2051
2052 args.key.min_objectid = args.key.max_objectid = subvol_id;
2053
2054 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
2055 const struct btrfs_ioctl_search_header *sh;
2056 unsigned i;
2057
2058 args.key.nr_items = 256;
2059 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
2060 return negative_errno();
2061
2062 if (args.key.nr_items <= 0)
2063 break;
2064
2065 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
2066
2067 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
2068 continue;
2069 if (sh->objectid != subvol_id)
2070 continue;
2071
2072 *ret = sh->offset;
2073 return 0;
2074 }
2075 }
2076
2077 return -ENXIO;
2078 }