]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/btrfs-util.c
machined: add early checks for unrealistically large image/pool sizes
[thirdparty/systemd.git] / src / basic / btrfs-util.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2014 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <linux/loop.h>
26 #include <stddef.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/ioctl.h>
31 #include <sys/stat.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 #include <unistd.h>
35
36 #ifdef HAVE_LINUX_BTRFS_H
37 #include <linux/btrfs.h>
38 #endif
39
40 #include "alloc-util.h"
41 #include "btrfs-ctree.h"
42 #include "btrfs-util.h"
43 #include "copy.h"
44 #include "fd-util.h"
45 #include "fileio.h"
46 #include "io-util.h"
47 #include "macro.h"
48 #include "missing.h"
49 #include "path-util.h"
50 #include "selinux-util.h"
51 #include "smack-util.h"
52 #include "sparse-endian.h"
53 #include "stat-util.h"
54 #include "string-util.h"
55 #include "time-util.h"
56 #include "util.h"
57
58 /* WARNING: Be careful with file system ioctls! When we get an fd, we
59 * need to make sure it either refers to only a regular file or
60 * directory, or that it is located on btrfs, before invoking any
61 * btrfs ioctls. The ioctl numbers are reused by some device drivers
62 * (such as DRM), and hence might have bad effects when invoked on
63 * device nodes (that reference drivers) rather than fds to normal
64 * files or directories. */
65
66 static int validate_subvolume_name(const char *name) {
67
68 if (!filename_is_valid(name))
69 return -EINVAL;
70
71 if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
72 return -E2BIG;
73
74 return 0;
75 }
76
77 static int open_parent(const char *path, int flags) {
78 _cleanup_free_ char *parent = NULL;
79 int fd;
80
81 assert(path);
82
83 parent = dirname_malloc(path);
84 if (!parent)
85 return -ENOMEM;
86
87 fd = open(parent, flags);
88 if (fd < 0)
89 return -errno;
90
91 return fd;
92 }
93
94 static int extract_subvolume_name(const char *path, const char **subvolume) {
95 const char *fn;
96 int r;
97
98 assert(path);
99 assert(subvolume);
100
101 fn = basename(path);
102
103 r = validate_subvolume_name(fn);
104 if (r < 0)
105 return r;
106
107 *subvolume = fn;
108 return 0;
109 }
110
111 int btrfs_is_filesystem(int fd) {
112 struct statfs sfs;
113
114 assert(fd >= 0);
115
116 if (fstatfs(fd, &sfs) < 0)
117 return -errno;
118
119 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
120 }
121
122 int btrfs_is_subvol_fd(int fd) {
123 struct stat st;
124
125 assert(fd >= 0);
126
127 /* On btrfs subvolumes always have the inode 256 */
128
129 if (fstat(fd, &st) < 0)
130 return -errno;
131
132 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
133 return 0;
134
135 return btrfs_is_filesystem(fd);
136 }
137
138 int btrfs_is_subvol(const char *path) {
139 _cleanup_close_ int fd = -1;
140
141 assert(path);
142
143 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
144 if (fd < 0)
145 return -errno;
146
147 return btrfs_is_subvol_fd(fd);
148 }
149
150 int btrfs_subvol_make(const char *path) {
151 struct btrfs_ioctl_vol_args args = {};
152 _cleanup_close_ int fd = -1;
153 const char *subvolume;
154 int r;
155
156 assert(path);
157
158 r = extract_subvolume_name(path, &subvolume);
159 if (r < 0)
160 return r;
161
162 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
163 if (fd < 0)
164 return fd;
165
166 strncpy(args.name, subvolume, sizeof(args.name)-1);
167
168 if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
169 return -errno;
170
171 return 0;
172 }
173
174 int btrfs_subvol_make_label(const char *path) {
175 int r;
176
177 assert(path);
178
179 r = mac_selinux_create_file_prepare(path, S_IFDIR);
180 if (r < 0)
181 return r;
182
183 r = btrfs_subvol_make(path);
184 mac_selinux_create_file_clear();
185
186 if (r < 0)
187 return r;
188
189 return mac_smack_fix(path, false, false);
190 }
191
192 int btrfs_subvol_set_read_only_fd(int fd, bool b) {
193 uint64_t flags, nflags;
194 struct stat st;
195
196 assert(fd >= 0);
197
198 if (fstat(fd, &st) < 0)
199 return -errno;
200
201 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
202 return -EINVAL;
203
204 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
205 return -errno;
206
207 if (b)
208 nflags = flags | BTRFS_SUBVOL_RDONLY;
209 else
210 nflags = flags & ~BTRFS_SUBVOL_RDONLY;
211
212 if (flags == nflags)
213 return 0;
214
215 if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
216 return -errno;
217
218 return 0;
219 }
220
221 int btrfs_subvol_set_read_only(const char *path, bool b) {
222 _cleanup_close_ int fd = -1;
223
224 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
225 if (fd < 0)
226 return -errno;
227
228 return btrfs_subvol_set_read_only_fd(fd, b);
229 }
230
231 int btrfs_subvol_get_read_only_fd(int fd) {
232 uint64_t flags;
233 struct stat st;
234
235 assert(fd >= 0);
236
237 if (fstat(fd, &st) < 0)
238 return -errno;
239
240 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
241 return -EINVAL;
242
243 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
244 return -errno;
245
246 return !!(flags & BTRFS_SUBVOL_RDONLY);
247 }
248
249 int btrfs_reflink(int infd, int outfd) {
250 struct stat st;
251 int r;
252
253 assert(infd >= 0);
254 assert(outfd >= 0);
255
256 /* Make sure we invoke the ioctl on a regular file, so that no
257 * device driver accidentally gets it. */
258
259 if (fstat(outfd, &st) < 0)
260 return -errno;
261
262 if (!S_ISREG(st.st_mode))
263 return -EINVAL;
264
265 r = ioctl(outfd, BTRFS_IOC_CLONE, infd);
266 if (r < 0)
267 return -errno;
268
269 return 0;
270 }
271
272 int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
273 struct btrfs_ioctl_clone_range_args args = {
274 .src_fd = infd,
275 .src_offset = in_offset,
276 .src_length = sz,
277 .dest_offset = out_offset,
278 };
279 struct stat st;
280 int r;
281
282 assert(infd >= 0);
283 assert(outfd >= 0);
284 assert(sz > 0);
285
286 if (fstat(outfd, &st) < 0)
287 return -errno;
288
289 if (!S_ISREG(st.st_mode))
290 return -EINVAL;
291
292 r = ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args);
293 if (r < 0)
294 return -errno;
295
296 return 0;
297 }
298
299 int btrfs_get_block_device_fd(int fd, dev_t *dev) {
300 struct btrfs_ioctl_fs_info_args fsi = {};
301 uint64_t id;
302 int r;
303
304 assert(fd >= 0);
305 assert(dev);
306
307 r = btrfs_is_filesystem(fd);
308 if (r < 0)
309 return r;
310 if (!r)
311 return -ENOTTY;
312
313 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
314 return -errno;
315
316 /* We won't do this for btrfs RAID */
317 if (fsi.num_devices != 1)
318 return 0;
319
320 for (id = 1; id <= fsi.max_id; id++) {
321 struct btrfs_ioctl_dev_info_args di = {
322 .devid = id,
323 };
324 struct stat st;
325
326 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
327 if (errno == ENODEV)
328 continue;
329
330 return -errno;
331 }
332
333 if (stat((char*) di.path, &st) < 0)
334 return -errno;
335
336 if (!S_ISBLK(st.st_mode))
337 return -ENODEV;
338
339 if (major(st.st_rdev) == 0)
340 return -ENODEV;
341
342 *dev = st.st_rdev;
343 return 1;
344 }
345
346 return -ENODEV;
347 }
348
349 int btrfs_get_block_device(const char *path, dev_t *dev) {
350 _cleanup_close_ int fd = -1;
351
352 assert(path);
353 assert(dev);
354
355 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
356 if (fd < 0)
357 return -errno;
358
359 return btrfs_get_block_device_fd(fd, dev);
360 }
361
362 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
363 struct btrfs_ioctl_ino_lookup_args args = {
364 .objectid = BTRFS_FIRST_FREE_OBJECTID
365 };
366 int r;
367
368 assert(fd >= 0);
369 assert(ret);
370
371 r = btrfs_is_filesystem(fd);
372 if (r < 0)
373 return r;
374 if (!r)
375 return -ENOTTY;
376
377 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
378 return -errno;
379
380 *ret = args.treeid;
381 return 0;
382 }
383
384 int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
385 _cleanup_close_ int subvol_fd = -1;
386
387 assert(fd >= 0);
388 assert(ret);
389
390 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
391 if (subvol_fd < 0)
392 return -errno;
393
394 return btrfs_subvol_get_id_fd(subvol_fd, ret);
395 }
396
397 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
398 assert(args);
399
400 /* the objectid, type, offset together make up the btrfs key,
401 * which is considered a single 136byte integer when
402 * comparing. This call increases the counter by one, dealing
403 * with the overflow between the overflows */
404
405 if (args->key.min_offset < (uint64_t) -1) {
406 args->key.min_offset++;
407 return true;
408 }
409
410 if (args->key.min_type < (uint8_t) -1) {
411 args->key.min_type++;
412 args->key.min_offset = 0;
413 return true;
414 }
415
416 if (args->key.min_objectid < (uint64_t) -1) {
417 args->key.min_objectid++;
418 args->key.min_offset = 0;
419 args->key.min_type = 0;
420 return true;
421 }
422
423 return 0;
424 }
425
426 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
427 assert(args);
428 assert(h);
429
430 args->key.min_objectid = h->objectid;
431 args->key.min_type = h->type;
432 args->key.min_offset = h->offset;
433 }
434
435 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
436 assert(args);
437
438 /* Compare min and max */
439
440 if (args->key.min_objectid < args->key.max_objectid)
441 return -1;
442 if (args->key.min_objectid > args->key.max_objectid)
443 return 1;
444
445 if (args->key.min_type < args->key.max_type)
446 return -1;
447 if (args->key.min_type > args->key.max_type)
448 return 1;
449
450 if (args->key.min_offset < args->key.max_offset)
451 return -1;
452 if (args->key.min_offset > args->key.max_offset)
453 return 1;
454
455 return 0;
456 }
457
458 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
459 for ((i) = 0, \
460 (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
461 (i) < (args).key.nr_items; \
462 (i)++, \
463 (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
464
465 #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
466 ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
467
468 int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
469 struct btrfs_ioctl_search_args args = {
470 /* Tree of tree roots */
471 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
472
473 /* Look precisely for the subvolume items */
474 .key.min_type = BTRFS_ROOT_ITEM_KEY,
475 .key.max_type = BTRFS_ROOT_ITEM_KEY,
476
477 .key.min_offset = 0,
478 .key.max_offset = (uint64_t) -1,
479
480 /* No restrictions on the other components */
481 .key.min_transid = 0,
482 .key.max_transid = (uint64_t) -1,
483 };
484
485 bool found = false;
486 int r;
487
488 assert(fd >= 0);
489 assert(ret);
490
491 if (subvol_id == 0) {
492 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
493 if (r < 0)
494 return r;
495 } else {
496 r = btrfs_is_filesystem(fd);
497 if (r < 0)
498 return r;
499 if (!r)
500 return -ENOTTY;
501 }
502
503 args.key.min_objectid = args.key.max_objectid = subvol_id;
504
505 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
506 const struct btrfs_ioctl_search_header *sh;
507 unsigned i;
508
509 args.key.nr_items = 256;
510 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
511 return -errno;
512
513 if (args.key.nr_items <= 0)
514 break;
515
516 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
517
518 const struct btrfs_root_item *ri;
519
520 /* Make sure we start the next search at least from this entry */
521 btrfs_ioctl_search_args_set(&args, sh);
522
523 if (sh->objectid != subvol_id)
524 continue;
525 if (sh->type != BTRFS_ROOT_ITEM_KEY)
526 continue;
527
528 /* Older versions of the struct lacked the otime setting */
529 if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
530 continue;
531
532 ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
533
534 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
535 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
536
537 ret->subvol_id = subvol_id;
538 ret->read_only = !!(le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY);
539
540 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
541 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
542 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
543
544 found = true;
545 goto finish;
546 }
547
548 /* Increase search key by one, to read the next item, if we can. */
549 if (!btrfs_ioctl_search_args_inc(&args))
550 break;
551 }
552
553 finish:
554 if (!found)
555 return -ENODATA;
556
557 return 0;
558 }
559
560 int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
561
562 struct btrfs_ioctl_search_args args = {
563 /* Tree of quota items */
564 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
565
566 /* The object ID is always 0 */
567 .key.min_objectid = 0,
568 .key.max_objectid = 0,
569
570 /* Look precisely for the quota items */
571 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
572 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
573
574 /* No restrictions on the other components */
575 .key.min_transid = 0,
576 .key.max_transid = (uint64_t) -1,
577 };
578
579 bool found_info = false, found_limit = false;
580 int r;
581
582 assert(fd >= 0);
583 assert(ret);
584
585 if (qgroupid == 0) {
586 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
587 if (r < 0)
588 return r;
589 } else {
590 r = btrfs_is_filesystem(fd);
591 if (r < 0)
592 return r;
593 if (!r)
594 return -ENOTTY;
595 }
596
597 args.key.min_offset = args.key.max_offset = qgroupid;
598
599 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
600 const struct btrfs_ioctl_search_header *sh;
601 unsigned i;
602
603 args.key.nr_items = 256;
604 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
605 if (errno == ENOENT) /* quota tree is missing: quota disabled */
606 break;
607
608 return -errno;
609 }
610
611 if (args.key.nr_items <= 0)
612 break;
613
614 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
615
616 /* Make sure we start the next search at least from this entry */
617 btrfs_ioctl_search_args_set(&args, sh);
618
619 if (sh->objectid != 0)
620 continue;
621 if (sh->offset != qgroupid)
622 continue;
623
624 if (sh->type == BTRFS_QGROUP_INFO_KEY) {
625 const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
626
627 ret->referenced = le64toh(qii->rfer);
628 ret->exclusive = le64toh(qii->excl);
629
630 found_info = true;
631
632 } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
633 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
634
635 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
636 ret->referenced_max = le64toh(qli->max_rfer);
637 else
638 ret->referenced_max = (uint64_t) -1;
639
640 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
641 ret->exclusive_max = le64toh(qli->max_excl);
642 else
643 ret->exclusive_max = (uint64_t) -1;
644
645 found_limit = true;
646 }
647
648 if (found_info && found_limit)
649 goto finish;
650 }
651
652 /* Increase search key by one, to read the next item, if we can. */
653 if (!btrfs_ioctl_search_args_inc(&args))
654 break;
655 }
656
657 finish:
658 if (!found_limit && !found_info)
659 return -ENODATA;
660
661 if (!found_info) {
662 ret->referenced = (uint64_t) -1;
663 ret->exclusive = (uint64_t) -1;
664 }
665
666 if (!found_limit) {
667 ret->referenced_max = (uint64_t) -1;
668 ret->exclusive_max = (uint64_t) -1;
669 }
670
671 return 0;
672 }
673
674 int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
675 _cleanup_close_ int fd = -1;
676
677 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
678 if (fd < 0)
679 return -errno;
680
681 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
682 }
683
684 int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
685 uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0;
686 _cleanup_free_ uint64_t *qgroups = NULL;
687 int r, n, i;
688
689 assert(fd >= 0);
690 assert(ret);
691
692 /* This finds the "subtree" qgroup for a specific
693 * subvolume. This only works for subvolumes that have been
694 * prepared with btrfs_subvol_auto_qgroup_fd() with
695 * insert_intermediary_qgroup=true (or equivalent). For others
696 * it will return the leaf qgroup instead. The two cases may
697 * be distuingished via the return value, which is 1 in case
698 * an appropriate "subtree" qgroup was found, and 0
699 * otherwise. */
700
701 if (subvol_id == 0) {
702 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
703 if (r < 0)
704 return r;
705 }
706
707 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
708 if (r < 0)
709 return r;
710 if (level != 0) /* Input must be a leaf qgroup */
711 return -EINVAL;
712
713 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
714 if (n < 0)
715 return n;
716
717 for (i = 0; i < n; i++) {
718 uint64_t id;
719
720 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
721 if (r < 0)
722 return r;
723
724 if (id != subvol_id)
725 continue;
726
727 if (lowest == (uint64_t) -1 || level < lowest) {
728 lowest_qgroupid = qgroups[i];
729 lowest = level;
730 }
731 }
732
733 if (lowest == (uint64_t) -1) {
734 /* No suitable higher-level qgroup found, let's return
735 * the leaf qgroup instead, and indicate that with the
736 * return value. */
737
738 *ret = subvol_id;
739 return 0;
740 }
741
742 *ret = lowest_qgroupid;
743 return 1;
744 }
745
746 int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
747 uint64_t qgroupid;
748 int r;
749
750 assert(fd >= 0);
751 assert(ret);
752
753 /* This determines the quota data of the qgroup with the
754 * lowest level, that shares the id part with the specified
755 * subvolume. This is useful for determining the quota data
756 * for entire subvolume subtrees, as long as the subtrees have
757 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
758 * compatible way */
759
760 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
761 if (r < 0)
762 return r;
763
764 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
765 }
766
767 int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
768 _cleanup_close_ int fd = -1;
769
770 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
771 if (fd < 0)
772 return -errno;
773
774 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
775 }
776
777 int btrfs_defrag_fd(int fd) {
778 struct stat st;
779
780 assert(fd >= 0);
781
782 if (fstat(fd, &st) < 0)
783 return -errno;
784
785 if (!S_ISREG(st.st_mode))
786 return -EINVAL;
787
788 if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
789 return -errno;
790
791 return 0;
792 }
793
794 int btrfs_defrag(const char *p) {
795 _cleanup_close_ int fd = -1;
796
797 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
798 if (fd < 0)
799 return -errno;
800
801 return btrfs_defrag_fd(fd);
802 }
803
804 int btrfs_quota_enable_fd(int fd, bool b) {
805 struct btrfs_ioctl_quota_ctl_args args = {
806 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
807 };
808 int r;
809
810 assert(fd >= 0);
811
812 r = btrfs_is_filesystem(fd);
813 if (r < 0)
814 return r;
815 if (!r)
816 return -ENOTTY;
817
818 if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
819 return -errno;
820
821 return 0;
822 }
823
824 int btrfs_quota_enable(const char *path, bool b) {
825 _cleanup_close_ int fd = -1;
826
827 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
828 if (fd < 0)
829 return -errno;
830
831 return btrfs_quota_enable_fd(fd, b);
832 }
833
834 int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
835
836 struct btrfs_ioctl_qgroup_limit_args args = {
837 .lim.max_rfer = referenced_max,
838 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
839 };
840 unsigned c;
841 int r;
842
843 assert(fd >= 0);
844
845 if (qgroupid == 0) {
846 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
847 if (r < 0)
848 return r;
849 } else {
850 r = btrfs_is_filesystem(fd);
851 if (r < 0)
852 return r;
853 if (!r)
854 return -ENOTTY;
855 }
856
857 args.qgroupid = qgroupid;
858
859 for (c = 0;; c++) {
860 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
861
862 if (errno == EBUSY && c < 10) {
863 (void) btrfs_quota_scan_wait(fd);
864 continue;
865 }
866
867 return -errno;
868 }
869
870 break;
871 }
872
873 return 0;
874 }
875
876 int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
877 _cleanup_close_ int fd = -1;
878
879 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
880 if (fd < 0)
881 return -errno;
882
883 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
884 }
885
886 int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
887 uint64_t qgroupid;
888 int r;
889
890 assert(fd >= 0);
891
892 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
893 if (r < 0)
894 return r;
895
896 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
897 }
898
899 int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
900 _cleanup_close_ int fd = -1;
901
902 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
903 if (fd < 0)
904 return -errno;
905
906 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
907 }
908
909 int btrfs_resize_loopback_fd(int fd, uint64_t new_size, bool grow_only) {
910 struct btrfs_ioctl_vol_args args = {};
911 _cleanup_free_ char *p = NULL, *loop = NULL, *backing = NULL;
912 _cleanup_close_ int loop_fd = -1, backing_fd = -1;
913 struct stat st;
914 dev_t dev = 0;
915 int r;
916
917 /* In contrast to btrfs quota ioctls ftruncate() cannot make sense of "infinity" or file sizes > 2^31 */
918 if (!FILE_SIZE_VALID(new_size))
919 return -EINVAL;
920
921 /* btrfs cannot handle file systems < 16M, hence use this as minimum */
922 if (new_size < 16*1024*1024)
923 new_size = 16*1024*1024;
924
925 r = btrfs_get_block_device_fd(fd, &dev);
926 if (r < 0)
927 return r;
928 if (r == 0)
929 return -ENODEV;
930
931 if (asprintf(&p, "/sys/dev/block/%u:%u/loop/backing_file", major(dev), minor(dev)) < 0)
932 return -ENOMEM;
933 r = read_one_line_file(p, &backing);
934 if (r == -ENOENT)
935 return -ENODEV;
936 if (r < 0)
937 return r;
938 if (isempty(backing) || !path_is_absolute(backing))
939 return -ENODEV;
940
941 backing_fd = open(backing, O_RDWR|O_CLOEXEC|O_NOCTTY);
942 if (backing_fd < 0)
943 return -errno;
944
945 if (fstat(backing_fd, &st) < 0)
946 return -errno;
947 if (!S_ISREG(st.st_mode))
948 return -ENODEV;
949
950 if (new_size == (uint64_t) st.st_size)
951 return 0;
952
953 if (grow_only && new_size < (uint64_t) st.st_size)
954 return -EINVAL;
955
956 if (asprintf(&loop, "/dev/block/%u:%u", major(dev), minor(dev)) < 0)
957 return -ENOMEM;
958 loop_fd = open(loop, O_RDWR|O_CLOEXEC|O_NOCTTY);
959 if (loop_fd < 0)
960 return -errno;
961
962 if (snprintf(args.name, sizeof(args.name), "%" PRIu64, new_size) >= (int) sizeof(args.name))
963 return -EINVAL;
964
965 if (new_size < (uint64_t) st.st_size) {
966 /* Decrease size: first decrease btrfs size, then shorten loopback */
967 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
968 return -errno;
969 }
970
971 if (ftruncate(backing_fd, new_size) < 0)
972 return -errno;
973
974 if (ioctl(loop_fd, LOOP_SET_CAPACITY, 0) < 0)
975 return -errno;
976
977 if (new_size > (uint64_t) st.st_size) {
978 /* Increase size: first enlarge loopback, then increase btrfs size */
979 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
980 return -errno;
981 }
982
983 /* Make sure the free disk space is correctly updated for both file systems */
984 (void) fsync(fd);
985 (void) fsync(backing_fd);
986
987 return 1;
988 }
989
990 int btrfs_resize_loopback(const char *p, uint64_t new_size, bool grow_only) {
991 _cleanup_close_ int fd = -1;
992
993 fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
994 if (fd < 0)
995 return -errno;
996
997 return btrfs_resize_loopback_fd(fd, new_size, grow_only);
998 }
999
1000 int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
1001 assert(ret);
1002
1003 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
1004 return -EINVAL;
1005
1006 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
1007 return -EINVAL;
1008
1009 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
1010 return 0;
1011 }
1012
1013 int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
1014 assert(level || id);
1015
1016 if (level)
1017 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
1018
1019 if (id)
1020 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
1021
1022 return 0;
1023 }
1024
1025 static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
1026
1027 struct btrfs_ioctl_qgroup_create_args args = {
1028 .create = b,
1029 .qgroupid = qgroupid,
1030 };
1031 unsigned c;
1032 int r;
1033
1034 r = btrfs_is_filesystem(fd);
1035 if (r < 0)
1036 return r;
1037 if (r == 0)
1038 return -ENOTTY;
1039
1040 for (c = 0;; c++) {
1041 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
1042
1043 /* If quota is not enabled, we get EINVAL. Turn this into a recognizable error */
1044 if (errno == EINVAL)
1045 return -ENOPROTOOPT;
1046
1047 if (errno == EBUSY && c < 10) {
1048 (void) btrfs_quota_scan_wait(fd);
1049 continue;
1050 }
1051
1052 return -errno;
1053 }
1054
1055 break;
1056 }
1057
1058 return 0;
1059 }
1060
1061 int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
1062 return qgroup_create_or_destroy(fd, true, qgroupid);
1063 }
1064
1065 int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
1066 return qgroup_create_or_destroy(fd, false, qgroupid);
1067 }
1068
1069 int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
1070 _cleanup_free_ uint64_t *qgroups = NULL;
1071 uint64_t subvol_id;
1072 int i, n, r;
1073
1074 /* Destroys the specified qgroup, but unassigns it from all
1075 * its parents first. Also, it recursively destroys all
1076 * qgroups it is assgined to that have the same id part of the
1077 * qgroupid as the specified group. */
1078
1079 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
1080 if (r < 0)
1081 return r;
1082
1083 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
1084 if (n < 0)
1085 return n;
1086
1087 for (i = 0; i < n; i++) {
1088 uint64_t id;
1089
1090 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
1091 if (r < 0)
1092 return r;
1093
1094 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
1095 if (r < 0)
1096 return r;
1097
1098 if (id != subvol_id)
1099 continue;
1100
1101 /* The parent qgroupid shares the same id part with
1102 * us? If so, destroy it too. */
1103
1104 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
1105 }
1106
1107 return btrfs_qgroup_destroy(fd, qgroupid);
1108 }
1109
1110 int btrfs_quota_scan_start(int fd) {
1111 struct btrfs_ioctl_quota_rescan_args args = {};
1112
1113 assert(fd >= 0);
1114
1115 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
1116 return -errno;
1117
1118 return 0;
1119 }
1120
1121 int btrfs_quota_scan_wait(int fd) {
1122 assert(fd >= 0);
1123
1124 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
1125 return -errno;
1126
1127 return 0;
1128 }
1129
1130 int btrfs_quota_scan_ongoing(int fd) {
1131 struct btrfs_ioctl_quota_rescan_args args = {};
1132
1133 assert(fd >= 0);
1134
1135 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
1136 return -errno;
1137
1138 return !!args.flags;
1139 }
1140
1141 static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
1142 struct btrfs_ioctl_qgroup_assign_args args = {
1143 .assign = b,
1144 .src = child,
1145 .dst = parent,
1146 };
1147 unsigned c;
1148 int r;
1149
1150 r = btrfs_is_filesystem(fd);
1151 if (r < 0)
1152 return r;
1153 if (r == 0)
1154 return -ENOTTY;
1155
1156 for (c = 0;; c++) {
1157 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
1158 if (r < 0) {
1159 if (errno == EBUSY && c < 10) {
1160 (void) btrfs_quota_scan_wait(fd);
1161 continue;
1162 }
1163
1164 return -errno;
1165 }
1166
1167 if (r == 0)
1168 return 0;
1169
1170 /* If the return value is > 0, we need to request a rescan */
1171
1172 (void) btrfs_quota_scan_start(fd);
1173 return 1;
1174 }
1175 }
1176
1177 int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
1178 return qgroup_assign_or_unassign(fd, true, child, parent);
1179 }
1180
1181 int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
1182 return qgroup_assign_or_unassign(fd, false, child, parent);
1183 }
1184
1185 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
1186 struct btrfs_ioctl_search_args args = {
1187 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1188
1189 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1190 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1191
1192 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1193 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1194
1195 .key.min_transid = 0,
1196 .key.max_transid = (uint64_t) -1,
1197 };
1198
1199 struct btrfs_ioctl_vol_args vol_args = {};
1200 _cleanup_close_ int subvol_fd = -1;
1201 struct stat st;
1202 bool made_writable = false;
1203 int r;
1204
1205 assert(fd >= 0);
1206 assert(subvolume);
1207
1208 if (fstat(fd, &st) < 0)
1209 return -errno;
1210
1211 if (!S_ISDIR(st.st_mode))
1212 return -EINVAL;
1213
1214 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1215 if (subvol_fd < 0)
1216 return -errno;
1217
1218 if (subvol_id == 0) {
1219 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
1220 if (r < 0)
1221 return r;
1222 }
1223
1224 /* First, try to remove the subvolume. If it happens to be
1225 * already empty, this will just work. */
1226 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1227 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
1228 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
1229 return 0;
1230 }
1231 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
1232 return -errno;
1233
1234 /* OK, the subvolume is not empty, let's look for child
1235 * subvolumes, and remove them, first */
1236
1237 args.key.min_offset = args.key.max_offset = subvol_id;
1238
1239 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1240 const struct btrfs_ioctl_search_header *sh;
1241 unsigned i;
1242
1243 args.key.nr_items = 256;
1244 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1245 return -errno;
1246
1247 if (args.key.nr_items <= 0)
1248 break;
1249
1250 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1251 _cleanup_free_ char *p = NULL;
1252 const struct btrfs_root_ref *ref;
1253 struct btrfs_ioctl_ino_lookup_args ino_args;
1254
1255 btrfs_ioctl_search_args_set(&args, sh);
1256
1257 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1258 continue;
1259 if (sh->offset != subvol_id)
1260 continue;
1261
1262 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1263
1264 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1265 if (!p)
1266 return -ENOMEM;
1267
1268 zero(ino_args);
1269 ino_args.treeid = subvol_id;
1270 ino_args.objectid = htole64(ref->dirid);
1271
1272 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1273 return -errno;
1274
1275 if (!made_writable) {
1276 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1277 if (r < 0)
1278 return r;
1279
1280 made_writable = true;
1281 }
1282
1283 if (isempty(ino_args.name))
1284 /* Subvolume is in the top-level
1285 * directory of the subvolume. */
1286 r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
1287 else {
1288 _cleanup_close_ int child_fd = -1;
1289
1290 /* Subvolume is somewhere further down,
1291 * hence we need to open the
1292 * containing directory first */
1293
1294 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1295 if (child_fd < 0)
1296 return -errno;
1297
1298 r = subvol_remove_children(child_fd, p, sh->objectid, flags);
1299 }
1300 if (r < 0)
1301 return r;
1302 }
1303
1304 /* Increase search key by one, to read the next item, if we can. */
1305 if (!btrfs_ioctl_search_args_inc(&args))
1306 break;
1307 }
1308
1309 /* OK, the child subvolumes should all be gone now, let's try
1310 * again to remove the subvolume */
1311 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1312 return -errno;
1313
1314 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
1315 return 0;
1316 }
1317
1318 int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
1319 _cleanup_close_ int fd = -1;
1320 const char *subvolume;
1321 int r;
1322
1323 assert(path);
1324
1325 r = extract_subvolume_name(path, &subvolume);
1326 if (r < 0)
1327 return r;
1328
1329 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1330 if (fd < 0)
1331 return fd;
1332
1333 return subvol_remove_children(fd, subvolume, 0, flags);
1334 }
1335
1336 int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
1337 return subvol_remove_children(fd, subvolume, 0, flags);
1338 }
1339
1340 int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1341
1342 struct btrfs_ioctl_search_args args = {
1343 /* Tree of quota items */
1344 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1345
1346 /* The object ID is always 0 */
1347 .key.min_objectid = 0,
1348 .key.max_objectid = 0,
1349
1350 /* Look precisely for the quota items */
1351 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1352 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1353
1354 /* For our qgroup */
1355 .key.min_offset = old_qgroupid,
1356 .key.max_offset = old_qgroupid,
1357
1358 /* No restrictions on the other components */
1359 .key.min_transid = 0,
1360 .key.max_transid = (uint64_t) -1,
1361 };
1362
1363 int r;
1364
1365 r = btrfs_is_filesystem(fd);
1366 if (r < 0)
1367 return r;
1368 if (!r)
1369 return -ENOTTY;
1370
1371 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1372 const struct btrfs_ioctl_search_header *sh;
1373 unsigned i;
1374
1375 args.key.nr_items = 256;
1376 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1377 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1378 break;
1379
1380 return -errno;
1381 }
1382
1383 if (args.key.nr_items <= 0)
1384 break;
1385
1386 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1387 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1388 struct btrfs_ioctl_qgroup_limit_args qargs;
1389 unsigned c;
1390
1391 /* Make sure we start the next search at least from this entry */
1392 btrfs_ioctl_search_args_set(&args, sh);
1393
1394 if (sh->objectid != 0)
1395 continue;
1396 if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
1397 continue;
1398 if (sh->offset != old_qgroupid)
1399 continue;
1400
1401 /* We found the entry, now copy things over. */
1402
1403 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1404 .qgroupid = new_qgroupid,
1405
1406 .lim.max_rfer = le64toh(qli->max_rfer),
1407 .lim.max_excl = le64toh(qli->max_excl),
1408 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1409 .lim.rsv_excl = le64toh(qli->rsv_excl),
1410
1411 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1412 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1413 BTRFS_QGROUP_LIMIT_RSV_RFER|
1414 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1415 };
1416
1417 for (c = 0;; c++) {
1418 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1419 if (errno == EBUSY && c < 10) {
1420 (void) btrfs_quota_scan_wait(fd);
1421 continue;
1422 }
1423 return -errno;
1424 }
1425
1426 break;
1427 }
1428
1429 return 1;
1430 }
1431
1432 /* Increase search key by one, to read the next item, if we can. */
1433 if (!btrfs_ioctl_search_args_inc(&args))
1434 break;
1435 }
1436
1437 return 0;
1438 }
1439
1440 static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1441 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1442 bool copy_from_parent = false, insert_intermediary_qgroup = false;
1443 int n_old_qgroups, n_old_parent_qgroups, r, i;
1444 uint64_t old_parent_id;
1445
1446 assert(fd >= 0);
1447
1448 /* Copies a reduced form of quota information from the old to
1449 * the new subvolume. */
1450
1451 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1452 if (n_old_qgroups <= 0) /* Nothing to copy */
1453 return n_old_qgroups;
1454
1455 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
1456 if (r == -ENXIO)
1457 /* We have no parent, hence nothing to copy. */
1458 n_old_parent_qgroups = 0;
1459 else if (r < 0)
1460 return r;
1461 else {
1462 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1463 if (n_old_parent_qgroups < 0)
1464 return n_old_parent_qgroups;
1465 }
1466
1467 for (i = 0; i < n_old_qgroups; i++) {
1468 uint64_t id;
1469 int j;
1470
1471 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1472 if (r < 0)
1473 return r;
1474
1475 if (id == old_subvol_id) {
1476 /* The old subvolume was member of a qgroup
1477 * that had the same id, but a different level
1478 * as it self. Let's set up something similar
1479 * in the destination. */
1480 insert_intermediary_qgroup = true;
1481 break;
1482 }
1483
1484 for (j = 0; j < n_old_parent_qgroups; j++)
1485 if (old_parent_qgroups[j] == old_qgroups[i]) {
1486 /* The old subvolume shared a common
1487 * parent qgroup with its parent
1488 * subvolume. Let's set up something
1489 * similar in the destination. */
1490 copy_from_parent = true;
1491 }
1492 }
1493
1494 if (!insert_intermediary_qgroup && !copy_from_parent)
1495 return 0;
1496
1497 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1498 }
1499
1500 static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1501 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1502 bool changed;
1503 int r;
1504
1505 /* First copy the leaf limits */
1506 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1507 if (r < 0)
1508 return r;
1509 changed = r > 0;
1510
1511 /* Then, try to copy the subtree limits, if there are any. */
1512 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1513 if (r < 0)
1514 return r;
1515 if (r == 0)
1516 return changed;
1517
1518 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1519 if (r < 0)
1520 return r;
1521 if (r == 0)
1522 return changed;
1523
1524 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1525 if (r != 0)
1526 return r;
1527
1528 return changed;
1529 }
1530
1531 static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolume, uint64_t old_subvol_id, BtrfsSnapshotFlags flags) {
1532
1533 struct btrfs_ioctl_search_args args = {
1534 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1535
1536 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1537 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1538
1539 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1540 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1541
1542 .key.min_transid = 0,
1543 .key.max_transid = (uint64_t) -1,
1544 };
1545
1546 struct btrfs_ioctl_vol_args_v2 vol_args = {
1547 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1548 .fd = old_fd,
1549 };
1550 _cleanup_close_ int subvolume_fd = -1;
1551 uint64_t new_subvol_id;
1552 int r;
1553
1554 assert(old_fd >= 0);
1555 assert(new_fd >= 0);
1556 assert(subvolume);
1557
1558 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1559
1560 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1561 return -errno;
1562
1563 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1564 !(flags & BTRFS_SNAPSHOT_QUOTA))
1565 return 0;
1566
1567 if (old_subvol_id == 0) {
1568 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
1569 if (r < 0)
1570 return r;
1571 }
1572
1573 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1574 if (r < 0)
1575 return r;
1576
1577 if (flags & BTRFS_SNAPSHOT_QUOTA)
1578 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1579
1580 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1581
1582 if (flags & BTRFS_SNAPSHOT_QUOTA)
1583 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1584
1585 return 0;
1586 }
1587
1588 args.key.min_offset = args.key.max_offset = old_subvol_id;
1589
1590 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1591 const struct btrfs_ioctl_search_header *sh;
1592 unsigned i;
1593
1594 args.key.nr_items = 256;
1595 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1596 return -errno;
1597
1598 if (args.key.nr_items <= 0)
1599 break;
1600
1601 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1602 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
1603 struct btrfs_ioctl_ino_lookup_args ino_args;
1604 const struct btrfs_root_ref *ref;
1605 _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
1606
1607 btrfs_ioctl_search_args_set(&args, sh);
1608
1609 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1610 continue;
1611
1612 /* Avoid finding the source subvolume a second
1613 * time */
1614 if (sh->offset != old_subvol_id)
1615 continue;
1616
1617 /* Avoid running into loops if the new
1618 * subvolume is below the old one. */
1619 if (sh->objectid == new_subvol_id)
1620 continue;
1621
1622 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1623 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1624 if (!p)
1625 return -ENOMEM;
1626
1627 zero(ino_args);
1628 ino_args.treeid = old_subvol_id;
1629 ino_args.objectid = htole64(ref->dirid);
1630
1631 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1632 return -errno;
1633
1634 /* The kernel returns an empty name if the
1635 * subvolume is in the top-level directory,
1636 * and otherwise appends a slash, so that we
1637 * can just concatenate easily here, without
1638 * adding a slash. */
1639 c = strappend(ino_args.name, p);
1640 if (!c)
1641 return -ENOMEM;
1642
1643 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1644 if (old_child_fd < 0)
1645 return -errno;
1646
1647 np = strjoin(subvolume, "/", ino_args.name, NULL);
1648 if (!np)
1649 return -ENOMEM;
1650
1651 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1652 if (new_child_fd < 0)
1653 return -errno;
1654
1655 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1656 /* If the snapshot is read-only we
1657 * need to mark it writable
1658 * temporarily, to put the subsnapshot
1659 * into place. */
1660
1661 if (subvolume_fd < 0) {
1662 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1663 if (subvolume_fd < 0)
1664 return -errno;
1665 }
1666
1667 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1668 if (r < 0)
1669 return r;
1670 }
1671
1672 /* When btrfs clones the subvolumes, child
1673 * subvolumes appear as empty directories. Remove
1674 * them, so that we can create a new snapshot
1675 * in their place */
1676 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1677 int k = -errno;
1678
1679 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1680 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1681
1682 return k;
1683 }
1684
1685 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
1686
1687 /* Restore the readonly flag */
1688 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1689 int k;
1690
1691 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1692 if (r >= 0 && k < 0)
1693 return k;
1694 }
1695
1696 if (r < 0)
1697 return r;
1698 }
1699
1700 /* Increase search key by one, to read the next item, if we can. */
1701 if (!btrfs_ioctl_search_args_inc(&args))
1702 break;
1703 }
1704
1705 if (flags & BTRFS_SNAPSHOT_QUOTA)
1706 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1707
1708 return 0;
1709 }
1710
1711 int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags) {
1712 _cleanup_close_ int new_fd = -1;
1713 const char *subvolume;
1714 int r;
1715
1716 assert(old_fd >= 0);
1717 assert(new_path);
1718
1719 r = btrfs_is_subvol_fd(old_fd);
1720 if (r < 0)
1721 return r;
1722 if (r == 0) {
1723 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1724 return -EISDIR;
1725
1726 r = btrfs_subvol_make(new_path);
1727 if (r < 0)
1728 return r;
1729
1730 r = copy_directory_fd(old_fd, new_path, true);
1731 if (r < 0) {
1732 (void) btrfs_subvol_remove(new_path, BTRFS_REMOVE_QUOTA);
1733 return r;
1734 }
1735
1736 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1737 r = btrfs_subvol_set_read_only(new_path, true);
1738 if (r < 0) {
1739 (void) btrfs_subvol_remove(new_path, BTRFS_REMOVE_QUOTA);
1740 return r;
1741 }
1742 }
1743
1744 return 0;
1745 }
1746
1747 r = extract_subvolume_name(new_path, &subvolume);
1748 if (r < 0)
1749 return r;
1750
1751 new_fd = open_parent(new_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1752 if (new_fd < 0)
1753 return new_fd;
1754
1755 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1756 }
1757
1758 int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags) {
1759 _cleanup_close_ int old_fd = -1;
1760
1761 assert(old_path);
1762 assert(new_path);
1763
1764 old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1765 if (old_fd < 0)
1766 return -errno;
1767
1768 return btrfs_subvol_snapshot_fd(old_fd, new_path, flags);
1769 }
1770
1771 int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1772
1773 struct btrfs_ioctl_search_args args = {
1774 /* Tree of quota items */
1775 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1776
1777 /* Look precisely for the quota relation items */
1778 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1779 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1780
1781 /* No restrictions on the other components */
1782 .key.min_offset = 0,
1783 .key.max_offset = (uint64_t) -1,
1784
1785 .key.min_transid = 0,
1786 .key.max_transid = (uint64_t) -1,
1787 };
1788
1789 _cleanup_free_ uint64_t *items = NULL;
1790 size_t n_items = 0, n_allocated = 0;
1791 int r;
1792
1793 assert(fd >= 0);
1794 assert(ret);
1795
1796 if (qgroupid == 0) {
1797 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1798 if (r < 0)
1799 return r;
1800 } else {
1801 r = btrfs_is_filesystem(fd);
1802 if (r < 0)
1803 return r;
1804 if (!r)
1805 return -ENOTTY;
1806 }
1807
1808 args.key.min_objectid = args.key.max_objectid = qgroupid;
1809
1810 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1811 const struct btrfs_ioctl_search_header *sh;
1812 unsigned i;
1813
1814 args.key.nr_items = 256;
1815 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1816 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1817 break;
1818
1819 return -errno;
1820 }
1821
1822 if (args.key.nr_items <= 0)
1823 break;
1824
1825 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1826
1827 /* Make sure we start the next search at least from this entry */
1828 btrfs_ioctl_search_args_set(&args, sh);
1829
1830 if (sh->type != BTRFS_QGROUP_RELATION_KEY)
1831 continue;
1832 if (sh->offset < sh->objectid)
1833 continue;
1834 if (sh->objectid != qgroupid)
1835 continue;
1836
1837 if (!GREEDY_REALLOC(items, n_allocated, n_items+1))
1838 return -ENOMEM;
1839
1840 items[n_items++] = sh->offset;
1841 }
1842
1843 /* Increase search key by one, to read the next item, if we can. */
1844 if (!btrfs_ioctl_search_args_inc(&args))
1845 break;
1846 }
1847
1848 if (n_items <= 0) {
1849 *ret = NULL;
1850 return 0;
1851 }
1852
1853 *ret = items;
1854 items = NULL;
1855
1856 return (int) n_items;
1857 }
1858
1859 int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1860 _cleanup_free_ uint64_t *qgroups = NULL;
1861 uint64_t parent_subvol;
1862 bool changed = false;
1863 int n = 0, r;
1864
1865 assert(fd >= 0);
1866
1867 /*
1868 * Sets up the specified subvolume's qgroup automatically in
1869 * one of two ways:
1870 *
1871 * If insert_intermediary_qgroup is false, the subvolume's
1872 * leaf qgroup will be assigned to the same parent qgroups as
1873 * the subvolume's parent subvolume.
1874 *
1875 * If insert_intermediary_qgroup is true a new intermediary
1876 * higher-level qgroup is created, with a higher level number,
1877 * but reusing the id of the subvolume. The level number is
1878 * picked as one smaller than the lowest level qgroup the
1879 * parent subvolume is a member of. If the parent subvolume's
1880 * leaf qgroup is assigned to no higher-level qgroup a new
1881 * qgroup of level 255 is created instead. Either way, the new
1882 * qgroup is then assigned to the parent's higher-level
1883 * qgroup, and the subvolume itself is assigned to it.
1884 *
1885 * If the subvolume is already assigned to a higher level
1886 * qgroup, no operation is executed.
1887 *
1888 * Effectively this means: regardless if
1889 * insert_intermediary_qgroup is true or not, after this
1890 * function is invoked the subvolume will be accounted within
1891 * the same qgroups as the parent. However, if it is true, it
1892 * will also get its own higher-level qgroup, which may in
1893 * turn be used by subvolumes created beneath this subvolume
1894 * later on.
1895 *
1896 * This hence defines a simple default qgroup setup for
1897 * subvolumes, as long as this function is invoked on each
1898 * created subvolume: each subvolume is always accounting
1899 * together with its immediate parents. Optionally, if
1900 * insert_intermediary_qgroup is true, it will also get a
1901 * qgroup that then includes all its own child subvolumes.
1902 */
1903
1904 if (subvol_id == 0) {
1905 r = btrfs_is_subvol_fd(fd);
1906 if (r < 0)
1907 return r;
1908 if (!r)
1909 return -ENOTTY;
1910
1911 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1912 if (r < 0)
1913 return r;
1914 }
1915
1916 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1917 if (n < 0)
1918 return n;
1919 if (n > 0) /* already parent qgroups set up, let's bail */
1920 return 0;
1921
1922 qgroups = mfree(qgroups);
1923
1924 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
1925 if (r == -ENXIO)
1926 /* No parent, hence no qgroup memberships */
1927 n = 0;
1928 else if (r < 0)
1929 return r;
1930 else {
1931 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1932 if (n < 0)
1933 return n;
1934 }
1935
1936 if (insert_intermediary_qgroup) {
1937 uint64_t lowest = 256, new_qgroupid;
1938 bool created = false;
1939 int i;
1940
1941 /* Determine the lowest qgroup that the parent
1942 * subvolume is assigned to. */
1943
1944 for (i = 0; i < n; i++) {
1945 uint64_t level;
1946
1947 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1948 if (r < 0)
1949 return r;
1950
1951 if (level < lowest)
1952 lowest = level;
1953 }
1954
1955 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1956 return -EBUSY;
1957
1958 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1959 if (r < 0)
1960 return r;
1961
1962 /* Create the new intermediary group, unless it already exists */
1963 r = btrfs_qgroup_create(fd, new_qgroupid);
1964 if (r < 0 && r != -EEXIST)
1965 return r;
1966 if (r >= 0)
1967 changed = created = true;
1968
1969 for (i = 0; i < n; i++) {
1970 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1971 if (r < 0 && r != -EEXIST) {
1972 if (created)
1973 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1974
1975 return r;
1976 }
1977 if (r >= 0)
1978 changed = true;
1979 }
1980
1981 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1982 if (r < 0 && r != -EEXIST) {
1983 if (created)
1984 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1985 return r;
1986 }
1987 if (r >= 0)
1988 changed = true;
1989
1990 } else {
1991 int i;
1992
1993 /* Assign our subvolume to all the same qgroups as the parent */
1994
1995 for (i = 0; i < n; i++) {
1996 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1997 if (r < 0 && r != -EEXIST)
1998 return r;
1999 if (r >= 0)
2000 changed = true;
2001 }
2002 }
2003
2004 return changed;
2005 }
2006
2007 int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
2008 _cleanup_close_ int fd = -1;
2009
2010 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
2011 if (fd < 0)
2012 return -errno;
2013
2014 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
2015 }
2016
2017 int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
2018
2019 struct btrfs_ioctl_search_args args = {
2020 /* Tree of tree roots */
2021 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
2022
2023 /* Look precisely for the subvolume items */
2024 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
2025 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
2026
2027 /* No restrictions on the other components */
2028 .key.min_offset = 0,
2029 .key.max_offset = (uint64_t) -1,
2030
2031 .key.min_transid = 0,
2032 .key.max_transid = (uint64_t) -1,
2033 };
2034 int r;
2035
2036 assert(fd >= 0);
2037 assert(ret);
2038
2039 if (subvol_id == 0) {
2040 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
2041 if (r < 0)
2042 return r;
2043 } else {
2044 r = btrfs_is_filesystem(fd);
2045 if (r < 0)
2046 return r;
2047 if (!r)
2048 return -ENOTTY;
2049 }
2050
2051 args.key.min_objectid = args.key.max_objectid = subvol_id;
2052
2053 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
2054 const struct btrfs_ioctl_search_header *sh;
2055 unsigned i;
2056
2057 args.key.nr_items = 256;
2058 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
2059 return negative_errno();
2060
2061 if (args.key.nr_items <= 0)
2062 break;
2063
2064 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
2065
2066 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
2067 continue;
2068 if (sh->objectid != subvol_id)
2069 continue;
2070
2071 *ret = sh->offset;
2072 return 0;
2073 }
2074 }
2075
2076 return -ENXIO;
2077 }