]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/btrfs-util.c
btrfs-util: check current offset before read
[thirdparty/systemd.git] / src / shared / btrfs-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
d7c7c334 2
11c3a366
TA
3#include <errno.h>
4#include <fcntl.h>
5#include <inttypes.h>
01234e1f 6#include <linux/btrfs_tree.h>
17cbb288 7#include <linux/fs.h>
11c3a366 8#include <linux/loop.h>
01234e1f 9#include <linux/magic.h>
11c3a366
TA
10#include <stddef.h>
11#include <stdio.h>
d7c7c334 12#include <stdlib.h>
ac8db36c 13#include <sys/file.h>
11c3a366 14#include <sys/ioctl.h>
11c3a366
TA
15#include <sys/sysmacros.h>
16#include <unistd.h>
17
b5efdb8a 18#include "alloc-util.h"
18c528e9 19#include "blockdev-util.h"
3ffd4af2 20#include "btrfs-util.h"
24dbe603 21#include "chase.h"
17cbb288 22#include "chattr-util.h"
07630cea 23#include "copy.h"
3ffd4af2 24#include "fd-util.h"
07630cea 25#include "fileio.h"
ef8becfa 26#include "fs-util.h"
a90fb858 27#include "io-util.h"
07630cea 28#include "macro.h"
d7c7c334 29#include "path-util.h"
17cbb288 30#include "rm-rf.h"
d7b8eec7 31#include "smack-util.h"
11c3a366 32#include "sparse-endian.h"
872a590e 33#include "stat-util.h"
07630cea 34#include "string-util.h"
93cc7779 35#include "time-util.h"
d7c7c334 36
62572894
LP
37/* WARNING: Be careful with file system ioctls! When we get an fd, we
38 * need to make sure it either refers to only a regular file or
39 * directory, or that it is located on btrfs, before invoking any
40 * btrfs ioctls. The ioctl numbers are reused by some device drivers
41 * (such as DRM), and hence might have bad effects when invoked on
42 * device nodes (that reference drivers) rather than fds to normal
43 * files or directories. */
44
6d2fd8df 45int btrfs_is_subvol_at(int dir_fd, const char *path) {
21222ea5
LP
46 struct stat st;
47
6d2fd8df 48 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
21222ea5 49
cd61c3bf
LP
50 /* On btrfs subvolumes always have the inode 256 */
51
e089efd7 52 if (fstatat(dir_fd, strempty(path), &st, isempty(path) ? AT_EMPTY_PATH : 0) < 0)
d7c7c334
LP
53 return -errno;
54
674b04ff 55 if (!btrfs_might_be_subvol(&st))
d7c7c334
LP
56 return 0;
57
6d2fd8df 58 return is_fs_type_at(dir_fd, path, BTRFS_SUPER_MAGIC);
2904e949
LP
59}
60
77c66be3
DDM
61int btrfs_subvol_set_read_only_at(int dir_fd, const char *path, bool b) {
62 _cleanup_close_ int fd = -EBADF;
d7c7c334 63 uint64_t flags, nflags;
0d6e763b 64 struct stat st;
d7c7c334 65
77c66be3
DDM
66 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
67
e40b11be 68 fd = xopenat(dir_fd, path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
77c66be3
DDM
69 if (fd < 0)
70 return fd;
0d6e763b
LP
71
72 if (fstat(fd, &st) < 0)
d7c7c334
LP
73 return -errno;
74
674b04ff 75 if (!btrfs_might_be_subvol(&st))
0d6e763b
LP
76 return -EINVAL;
77
d7c7c334
LP
78 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
79 return -errno;
80
0da96503 81 nflags = UPDATE_FLAG(flags, BTRFS_SUBVOL_RDONLY, b);
d7c7c334
LP
82 if (flags == nflags)
83 return 0;
84
7c248223 85 return RET_NERRNO(ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags));
d7c7c334
LP
86}
87
10f9c755 88int btrfs_subvol_get_read_only_fd(int fd) {
cd61c3bf 89 uint64_t flags;
62572894
LP
90 struct stat st;
91
92 assert(fd >= 0);
93
94 if (fstat(fd, &st) < 0)
95 return -errno;
96
674b04ff 97 if (!btrfs_might_be_subvol(&st))
62572894 98 return -EINVAL;
cd61c3bf
LP
99
100 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
101 return -errno;
102
103 return !!(flags & BTRFS_SUBVOL_RDONLY);
104}
105
c706b27f 106int btrfs_get_block_device_at(int dir_fd, const char *path, dev_t *ret) {
d7c7c334 107 struct btrfs_ioctl_fs_info_args fsi = {};
c706b27f 108 _cleanup_close_ int fd = -EBADF;
d7c7c334 109 uint64_t id;
62572894 110 int r;
d7c7c334 111
c706b27f
DDM
112 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
113 assert(path);
114 assert(ret);
d7c7c334 115
e40b11be 116 fd = xopenat(dir_fd, path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
698bc186
LP
117 if (fd < 0)
118 return fd;
119
65ddc2c5 120 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
62572894
LP
121 if (r < 0)
122 return r;
79de6eb1 123 if (r == 0)
62572894
LP
124 return -ENOTTY;
125
d7c7c334
LP
126 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
127 return -errno;
128
129 /* We won't do this for btrfs RAID */
66ae5130 130 if (fsi.num_devices != 1) {
c706b27f 131 *ret = 0;
d7c7c334 132 return 0;
66ae5130 133 }
d7c7c334
LP
134
135 for (id = 1; id <= fsi.max_id; id++) {
136 struct btrfs_ioctl_dev_info_args di = {
137 .devid = id,
138 };
139 struct stat st;
140
141 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
142 if (errno == ENODEV)
143 continue;
144
145 return -errno;
146 }
147
67f0ac8c
LP
148 /* For the root fs — when no initrd is involved — btrfs returns /dev/root on any kernels from
149 * the past few years. That sucks, as we have no API to determine the actual root then. let's
150 * return an recognizable error for this case, so that the caller can maybe print a nice
151 * message about this.
152 *
153 * https://bugzilla.kernel.org/show_bug.cgi?id=89721 */
154 if (path_equal((char*) di.path, "/dev/root"))
155 return -EUCLEAN;
156
d7c7c334
LP
157 if (stat((char*) di.path, &st) < 0)
158 return -errno;
159
160 if (!S_ISBLK(st.st_mode))
3468e5ac 161 return -ENOTBLK;
d7c7c334
LP
162
163 if (major(st.st_rdev) == 0)
164 return -ENODEV;
165
c706b27f 166 *ret = st.st_rdev;
d7c7c334
LP
167 return 1;
168 }
169
170 return -ENODEV;
171}
10f9c755
LP
172
173int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
174 struct btrfs_ioctl_ino_lookup_args args = {
175 .objectid = BTRFS_FIRST_FREE_OBJECTID
176 };
62572894 177 int r;
10f9c755
LP
178
179 assert(fd >= 0);
180 assert(ret);
181
65ddc2c5 182 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
62572894
LP
183 if (r < 0)
184 return r;
79de6eb1 185 if (r == 0)
62572894
LP
186 return -ENOTTY;
187
10f9c755
LP
188 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
189 return -errno;
190
191 *ret = args.treeid;
192 return 0;
193}
194
90578cbd 195int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
254d1313 196 _cleanup_close_ int subvol_fd = -EBADF;
90578cbd
LP
197
198 assert(fd >= 0);
199 assert(ret);
200
201 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
202 if (subvol_fd < 0)
203 return -errno;
204
205 return btrfs_subvol_get_id_fd(subvol_fd, ret);
206}
207
5743a585
LP
208static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
209 assert(args);
210
211 /* the objectid, type, offset together make up the btrfs key,
212 * which is considered a single 136byte integer when
213 * comparing. This call increases the counter by one, dealing
214 * with the overflow between the overflows */
215
f5fbe71d 216 if (args->key.min_offset < UINT64_MAX) {
5743a585
LP
217 args->key.min_offset++;
218 return true;
219 }
220
f5fbe71d 221 if (args->key.min_type < UINT8_MAX) {
5743a585
LP
222 args->key.min_type++;
223 args->key.min_offset = 0;
224 return true;
225 }
226
f5fbe71d 227 if (args->key.min_objectid < UINT64_MAX) {
5743a585
LP
228 args->key.min_objectid++;
229 args->key.min_offset = 0;
230 args->key.min_type = 0;
231 return true;
232 }
233
234 return 0;
235}
236
237static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
238 assert(args);
239 assert(h);
240
241 args->key.min_objectid = h->objectid;
242 args->key.min_type = h->type;
243 args->key.min_offset = h->offset;
244}
245
246static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
90c88092
YW
247 int r;
248
5743a585
LP
249 assert(args);
250
251 /* Compare min and max */
252
90c88092
YW
253 r = CMP(args->key.min_objectid, args->key.max_objectid);
254 if (r != 0)
255 return r;
5743a585 256
90c88092
YW
257 r = CMP(args->key.min_type, args->key.max_type);
258 if (r != 0)
259 return r;
5743a585 260
6dd91b36 261 return CMP(args->key.min_offset, args->key.max_offset);
5743a585
LP
262}
263
801bf40c 264typedef struct BtrfsForeachIterator {
125cca1b
YW
265 const struct btrfs_ioctl_search_args *args;
266 size_t offset;
267 unsigned index;
268 struct btrfs_ioctl_search_header *header;
269 const void **body;
801bf40c
LP
270} BtrfsForeachIterator;
271
125cca1b
YW
272static int btrfs_iterate(BtrfsForeachIterator *i) {
273 assert(i);
274 assert(i->args);
275 assert(i->header);
276 assert(i->body);
277
278 if (i->index >= i->args->key.nr_items)
279 return 0; /* end */
280
281 assert_cc(BTRFS_SEARCH_ARGS_BUFSIZE >= sizeof(struct btrfs_ioctl_search_header));
282 if (i->offset > BTRFS_SEARCH_ARGS_BUFSIZE - sizeof(struct btrfs_ioctl_search_header))
283 return -EBADMSG;
284
285 struct btrfs_ioctl_search_header h;
286 memcpy(&h, (const uint8_t*) i->args->buf + i->offset, sizeof(struct btrfs_ioctl_search_header));
287
288 if (i->offset > BTRFS_SEARCH_ARGS_BUFSIZE - sizeof(struct btrfs_ioctl_search_header) - h.len)
289 return -EBADMSG;
290
291 *i->body = (const uint8_t*) i->args->buf + i->offset + sizeof(struct btrfs_ioctl_search_header);
292 *i->header = h;
293 i->offset += sizeof(struct btrfs_ioctl_search_header) + h.len;
294 i->index++;
295
296 return 1;
297}
298
801bf40c
LP
299/* Iterates through a series of struct btrfs_file_extent_item elements. They are unfortunately not aligned,
300 * hence we copy out the header from them */
125cca1b 301#define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(_sh, _body, _args) \
801bf40c 302 for (BtrfsForeachIterator iterator = { \
125cca1b
YW
303 .args = &(_args), \
304 .header = &(_sh), \
305 .body = &(_body), \
801bf40c 306 }; \
125cca1b 307 btrfs_iterate(&iterator) > 0; )
5743a585 308
5bcd08db 309int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
10f9c755
LP
310 struct btrfs_ioctl_search_args args = {
311 /* Tree of tree roots */
b6b18498 312 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
10f9c755
LP
313
314 /* Look precisely for the subvolume items */
315 .key.min_type = BTRFS_ROOT_ITEM_KEY,
316 .key.max_type = BTRFS_ROOT_ITEM_KEY,
317
10f9c755 318 .key.min_offset = 0,
f5fbe71d 319 .key.max_offset = UINT64_MAX,
5743a585
LP
320
321 /* No restrictions on the other components */
10f9c755 322 .key.min_transid = 0,
f5fbe71d 323 .key.max_transid = UINT64_MAX,
10f9c755
LP
324 };
325
b6b18498 326 bool found = false;
10f9c755
LP
327 int r;
328
329 assert(fd >= 0);
330 assert(ret);
331
5bcd08db
LP
332 if (subvol_id == 0) {
333 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
334 if (r < 0)
335 return r;
336 } else {
65ddc2c5 337 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
338 if (r < 0)
339 return r;
79de6eb1 340 if (r == 0)
5bcd08db
LP
341 return -ENOTTY;
342 }
10f9c755
LP
343
344 args.key.min_objectid = args.key.max_objectid = subvol_id;
10f9c755 345
5743a585 346 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
347 struct btrfs_ioctl_search_header sh;
348 const void *body;
b6b18498
LP
349
350 args.key.nr_items = 256;
351 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
352 return -errno;
353
354 if (args.key.nr_items <= 0)
355 break;
10f9c755 356
801bf40c 357 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
5743a585 358 /* Make sure we start the next search at least from this entry */
801bf40c 359 btrfs_ioctl_search_args_set(&args, &sh);
5743a585 360
801bf40c 361 if (sh.objectid != subvol_id)
b6b18498 362 continue;
801bf40c 363 if (sh.type != BTRFS_ROOT_ITEM_KEY)
b6b18498 364 continue;
5743a585
LP
365
366 /* Older versions of the struct lacked the otime setting */
801bf40c 367 if (sh.len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
b6b18498 368 continue;
10f9c755 369
801bf40c 370 const struct btrfs_root_item *ri = body;
b6b18498
LP
371 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
372 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
10f9c755 373
b6b18498 374 ret->subvol_id = subvol_id;
5d904a6a 375 ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
10f9c755 376
b6b18498
LP
377 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
378 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
379 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
380
381 found = true;
382 goto finish;
383 }
384
5743a585
LP
385 /* Increase search key by one, to read the next item, if we can. */
386 if (!btrfs_ioctl_search_args_inc(&args))
b6b18498
LP
387 break;
388 }
389
390finish:
246caacb 391 return found ? 0 : -ENODATA;
b6b18498
LP
392}
393
5bcd08db 394int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
b6b18498
LP
395
396 struct btrfs_ioctl_search_args args = {
397 /* Tree of quota items */
398 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
399
5743a585
LP
400 /* The object ID is always 0 */
401 .key.min_objectid = 0,
402 .key.max_objectid = 0,
403
b6b18498
LP
404 /* Look precisely for the quota items */
405 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
406 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
407
b6b18498
LP
408 /* No restrictions on the other components */
409 .key.min_transid = 0,
f5fbe71d 410 .key.max_transid = UINT64_MAX,
b6b18498
LP
411 };
412
b6b18498
LP
413 bool found_info = false, found_limit = false;
414 int r;
415
416 assert(fd >= 0);
417 assert(ret);
418
5bcd08db
LP
419 if (qgroupid == 0) {
420 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
421 if (r < 0)
422 return r;
423 } else {
65ddc2c5 424 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
425 if (r < 0)
426 return r;
79de6eb1 427 if (r == 0)
5bcd08db
LP
428 return -ENOTTY;
429 }
b6b18498 430
5bcd08db 431 args.key.min_offset = args.key.max_offset = qgroupid;
b6b18498 432
5743a585 433 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
434 struct btrfs_ioctl_search_header sh;
435 const void *body;
b6b18498
LP
436
437 args.key.nr_items = 256;
12ee6186
LP
438 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
439 if (errno == ENOENT) /* quota tree is missing: quota disabled */
440 break;
441
b6b18498 442 return -errno;
12ee6186 443 }
b6b18498
LP
444
445 if (args.key.nr_items <= 0)
446 break;
447
801bf40c 448 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
b6b18498 449
5743a585 450 /* Make sure we start the next search at least from this entry */
801bf40c 451 btrfs_ioctl_search_args_set(&args, &sh);
b6b18498 452
801bf40c 453 if (sh.objectid != 0)
b6b18498 454 continue;
801bf40c 455 if (sh.offset != qgroupid)
b6b18498
LP
456 continue;
457
801bf40c
LP
458 if (sh.type == BTRFS_QGROUP_INFO_KEY) {
459 const struct btrfs_qgroup_info_item *qii = body;
b6b18498 460
cb81cd80 461 ret->referenced = le64toh(qii->rfer);
b6b18498
LP
462 ret->exclusive = le64toh(qii->excl);
463
464 found_info = true;
465
801bf40c
LP
466 } else if (sh.type == BTRFS_QGROUP_LIMIT_KEY) {
467 const struct btrfs_qgroup_limit_item *qli = body;
b6b18498 468
5bcd08db
LP
469 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
470 ret->referenced_max = le64toh(qli->max_rfer);
471 else
f5fbe71d 472 ret->referenced_max = UINT64_MAX;
5bcd08db
LP
473
474 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
475 ret->exclusive_max = le64toh(qli->max_excl);
476 else
f5fbe71d 477 ret->exclusive_max = UINT64_MAX;
b6b18498
LP
478
479 found_limit = true;
480 }
481
482 if (found_info && found_limit)
483 goto finish;
484 }
485
5743a585
LP
486 /* Increase search key by one, to read the next item, if we can. */
487 if (!btrfs_ioctl_search_args_inc(&args))
b6b18498
LP
488 break;
489 }
490
491finish:
492 if (!found_limit && !found_info)
493 return -ENODATA;
494
495 if (!found_info) {
f5fbe71d
YW
496 ret->referenced = UINT64_MAX;
497 ret->exclusive = UINT64_MAX;
b6b18498
LP
498 }
499
500 if (!found_limit) {
f5fbe71d
YW
501 ret->referenced_max = UINT64_MAX;
502 ret->exclusive_max = UINT64_MAX;
b6b18498 503 }
10f9c755
LP
504
505 return 0;
506}
f27a3864 507
5bcd08db 508int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
254d1313 509 _cleanup_close_ int fd = -EBADF;
5bcd08db
LP
510
511 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
512 if (fd < 0)
513 return -errno;
514
515 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
516}
517
518int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
f5fbe71d 519 uint64_t level, lowest = UINT64_MAX, lowest_qgroupid = 0;
5bcd08db 520 _cleanup_free_ uint64_t *qgroups = NULL;
cecaba20 521 int r, n;
5bcd08db
LP
522
523 assert(fd >= 0);
524 assert(ret);
525
526 /* This finds the "subtree" qgroup for a specific
527 * subvolume. This only works for subvolumes that have been
528 * prepared with btrfs_subvol_auto_qgroup_fd() with
529 * insert_intermediary_qgroup=true (or equivalent). For others
530 * it will return the leaf qgroup instead. The two cases may
3a258d3a 531 * be distinguished via the return value, which is 1 in case
5bcd08db
LP
532 * an appropriate "subtree" qgroup was found, and 0
533 * otherwise. */
534
535 if (subvol_id == 0) {
536 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
537 if (r < 0)
538 return r;
539 }
540
541 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
542 if (r < 0)
543 return r;
544 if (level != 0) /* Input must be a leaf qgroup */
545 return -EINVAL;
546
547 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
548 if (n < 0)
549 return n;
550
cecaba20 551 for (int i = 0; i < n; i++) {
5bcd08db
LP
552 uint64_t id;
553
554 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
555 if (r < 0)
556 return r;
557
558 if (id != subvol_id)
559 continue;
560
f5fbe71d 561 if (lowest == UINT64_MAX || level < lowest) {
5bcd08db
LP
562 lowest_qgroupid = qgroups[i];
563 lowest = level;
564 }
565 }
566
f5fbe71d 567 if (lowest == UINT64_MAX) {
5bcd08db
LP
568 /* No suitable higher-level qgroup found, let's return
569 * the leaf qgroup instead, and indicate that with the
570 * return value. */
571
572 *ret = subvol_id;
573 return 0;
574 }
575
576 *ret = lowest_qgroupid;
577 return 1;
578}
579
580int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
581 uint64_t qgroupid;
582 int r;
583
584 assert(fd >= 0);
585 assert(ret);
586
587 /* This determines the quota data of the qgroup with the
588 * lowest level, that shares the id part with the specified
589 * subvolume. This is useful for determining the quota data
590 * for entire subvolume subtrees, as long as the subtrees have
591 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
592 * compatible way */
593
594 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
595 if (r < 0)
596 return r;
597
598 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
599}
600
601int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
254d1313 602 _cleanup_close_ int fd = -EBADF;
5bcd08db
LP
603
604 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
605 if (fd < 0)
606 return -errno;
607
608 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
609}
610
4117366a
YW
611int btrfs_defrag_fd(int fd) {
612 int r;
613
614 assert(fd >= 0);
615
616 r = fd_verify_regular(fd);
617 if (r < 0)
618 return r;
619
620 return RET_NERRNO(ioctl(fd, BTRFS_IOC_DEFRAG, NULL));
621}
622
f27a3864 623int btrfs_defrag(const char *p) {
254d1313 624 _cleanup_close_ int fd = -EBADF;
f27a3864
LP
625
626 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
627 if (fd < 0)
628 return -errno;
629
630 return btrfs_defrag_fd(fd);
631}
754061ce
LP
632
633int btrfs_quota_enable_fd(int fd, bool b) {
634 struct btrfs_ioctl_quota_ctl_args args = {
635 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
636 };
62572894 637 int r;
754061ce
LP
638
639 assert(fd >= 0);
640
65ddc2c5 641 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
62572894
LP
642 if (r < 0)
643 return r;
79de6eb1 644 if (r == 0)
62572894
LP
645 return -ENOTTY;
646
7c248223 647 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args));
754061ce
LP
648}
649
650int btrfs_quota_enable(const char *path, bool b) {
254d1313 651 _cleanup_close_ int fd = -EBADF;
754061ce
LP
652
653 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
654 if (fd < 0)
655 return -errno;
656
657 return btrfs_quota_enable_fd(fd, b);
658}
d6ce17c7 659
5bcd08db
LP
660int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
661
d6ce17c7 662 struct btrfs_ioctl_qgroup_limit_args args = {
5bcd08db 663 .lim.max_rfer = referenced_max,
d6ce17c7
LP
664 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
665 };
62572894 666 int r;
d6ce17c7
LP
667
668 assert(fd >= 0);
669
5bcd08db
LP
670 if (qgroupid == 0) {
671 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
672 if (r < 0)
673 return r;
674 } else {
65ddc2c5 675 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
676 if (r < 0)
677 return r;
79de6eb1 678 if (r == 0)
5bcd08db
LP
679 return -ENOTTY;
680 }
62572894 681
5bcd08db
LP
682 args.qgroupid = qgroupid;
683
cecaba20 684 for (unsigned c = 0;; c++) {
5bcd08db
LP
685 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
686
687 if (errno == EBUSY && c < 10) {
688 (void) btrfs_quota_scan_wait(fd);
689 continue;
690 }
691
692 return -errno;
693 }
694
695 break;
696 }
d6ce17c7
LP
697
698 return 0;
699}
700
5bcd08db 701int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
254d1313 702 _cleanup_close_ int fd = -EBADF;
5bcd08db
LP
703
704 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
705 if (fd < 0)
706 return -errno;
707
708 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
709}
710
711int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
712 uint64_t qgroupid;
713 int r;
714
715 assert(fd >= 0);
716
717 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
718 if (r < 0)
719 return r;
720
721 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
722}
723
724int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
254d1313 725 _cleanup_close_ int fd = -EBADF;
d6ce17c7
LP
726
727 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
728 if (fd < 0)
729 return -errno;
730
5bcd08db 731 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
d6ce17c7 732}
efe02862 733
5bcd08db 734int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
3f952f92
LP
735 assert(ret);
736
737 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
738 return -EINVAL;
739
740 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
741 return -EINVAL;
742
743 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
744 return 0;
745}
746
5bcd08db
LP
747int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
748 assert(level || id);
749
750 if (level)
751 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
752
753 if (id)
754 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
755
756 return 0;
757}
758
759static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
3f952f92
LP
760
761 struct btrfs_ioctl_qgroup_create_args args = {
762 .create = b,
5bcd08db 763 .qgroupid = qgroupid,
3f952f92 764 };
3f952f92
LP
765 int r;
766
65ddc2c5 767 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
3f952f92
LP
768 if (r < 0)
769 return r;
5bcd08db
LP
770 if (r == 0)
771 return -ENOTTY;
772
cecaba20 773 for (unsigned c = 0;; c++) {
5bcd08db
LP
774 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
775
4b019d2f
LP
776 /* On old kernels if quota is not enabled, we get EINVAL. On newer kernels we get
777 * ENOTCONN. Let's always convert this to ENOTCONN to make this recognizable
778 * everywhere the same way. */
779
780 if (IN_SET(errno, EINVAL, ENOTCONN))
781 return -ENOTCONN;
be6d467c 782
5bcd08db
LP
783 if (errno == EBUSY && c < 10) {
784 (void) btrfs_quota_scan_wait(fd);
785 continue;
786 }
787
788 return -errno;
789 }
3f952f92 790
5bcd08db
LP
791 break;
792 }
793
794 return 0;
795}
796
797int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
798 return qgroup_create_or_destroy(fd, true, qgroupid);
799}
800
801int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
802 return qgroup_create_or_destroy(fd, false, qgroupid);
803}
804
805int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
806 _cleanup_free_ uint64_t *qgroups = NULL;
807 uint64_t subvol_id;
cecaba20 808 int n, r;
5bcd08db
LP
809
810 /* Destroys the specified qgroup, but unassigns it from all
811 * its parents first. Also, it recursively destroys all
1b2a7d92 812 * qgroups it is assigned to that have the same id part of the
5bcd08db
LP
813 * qgroupid as the specified group. */
814
815 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
816 if (r < 0)
817 return r;
818
819 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
820 if (n < 0)
821 return n;
822
cecaba20 823 for (int i = 0; i < n; i++) {
5bcd08db
LP
824 uint64_t id;
825
826 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
827 if (r < 0)
828 return r;
829
830 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
831 if (r < 0)
832 return r;
833
834 if (id != subvol_id)
835 continue;
836
837 /* The parent qgroupid shares the same id part with
838 * us? If so, destroy it too. */
839
840 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
841 }
842
843 return btrfs_qgroup_destroy(fd, qgroupid);
844}
845
846int btrfs_quota_scan_start(int fd) {
847 struct btrfs_ioctl_quota_rescan_args args = {};
848
849 assert(fd >= 0);
850
7c248223 851 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args));
3f952f92
LP
852}
853
5bcd08db
LP
854int btrfs_quota_scan_wait(int fd) {
855 assert(fd >= 0);
856
7c248223 857 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT));
3f952f92
LP
858}
859
5bcd08db
LP
860int btrfs_quota_scan_ongoing(int fd) {
861 struct btrfs_ioctl_quota_rescan_args args = {};
862
863 assert(fd >= 0);
864
865 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
866 return -errno;
867
868 return !!args.flags;
3f952f92
LP
869}
870
5bcd08db
LP
871static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
872 struct btrfs_ioctl_qgroup_assign_args args = {
873 .assign = b,
874 .src = child,
875 .dst = parent,
876 };
5bcd08db
LP
877 int r;
878
65ddc2c5 879 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
880 if (r < 0)
881 return r;
882 if (r == 0)
883 return -ENOTTY;
884
cecaba20 885 for (unsigned c = 0;; c++) {
5bcd08db
LP
886 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
887 if (r < 0) {
888 if (errno == EBUSY && c < 10) {
889 (void) btrfs_quota_scan_wait(fd);
890 continue;
891 }
892
893 return -errno;
894 }
895
896 if (r == 0)
897 return 0;
898
899 /* If the return value is > 0, we need to request a rescan */
900
901 (void) btrfs_quota_scan_start(fd);
902 return 1;
903 }
904}
905
906int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
907 return qgroup_assign_or_unassign(fd, true, child, parent);
908}
909
910int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
911 return qgroup_assign_or_unassign(fd, false, child, parent);
912}
913
914static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
d9e2daaf
LP
915 struct btrfs_ioctl_search_args args = {
916 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
917
918 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
919 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
920
921 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
922 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
923
924 .key.min_transid = 0,
f5fbe71d 925 .key.max_transid = UINT64_MAX,
d9e2daaf
LP
926 };
927
928 struct btrfs_ioctl_vol_args vol_args = {};
254d1313 929 _cleanup_close_ int subvol_fd = -EBADF;
62572894 930 struct stat st;
3986b258 931 bool made_writable = false;
d9e2daaf
LP
932 int r;
933
934 assert(fd >= 0);
935 assert(subvolume);
936
62572894
LP
937 if (fstat(fd, &st) < 0)
938 return -errno;
939
940 if (!S_ISDIR(st.st_mode))
941 return -EINVAL;
942
f7c9f4a2 943 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
d9e2daaf
LP
944 if (subvol_fd < 0)
945 return -errno;
946
ae1940d2
LP
947 /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
948 * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
949 * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
950 * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
951 * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
952 * let's prefer ENOTTY over EPERM/EACCES though. */
953 r = btrfs_is_subvol_fd(subvol_fd);
954 if (r < 0)
955 return r;
956 if (r == 0) /* Not a btrfs subvolume */
957 return -ENOTTY;
958
d9e2daaf
LP
959 if (subvol_id == 0) {
960 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
961 if (r < 0)
962 return r;
963 }
964
3f952f92
LP
965 /* First, try to remove the subvolume. If it happens to be
966 * already empty, this will just work. */
967 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
968 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
5bcd08db 969 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
3f952f92
LP
970 return 0;
971 }
5bcd08db 972 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
3f952f92
LP
973 return -errno;
974
975 /* OK, the subvolume is not empty, let's look for child
976 * subvolumes, and remove them, first */
977
d9e2daaf
LP
978 args.key.min_offset = args.key.max_offset = subvol_id;
979
980 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
981 struct btrfs_ioctl_search_header sh;
982 const void *body;
d9e2daaf
LP
983
984 args.key.nr_items = 256;
985 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
986 return -errno;
987
988 if (args.key.nr_items <= 0)
989 break;
990
801bf40c 991 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
d9e2daaf 992 _cleanup_free_ char *p = NULL;
d9e2daaf 993
801bf40c 994 btrfs_ioctl_search_args_set(&args, &sh);
d9e2daaf 995
801bf40c 996 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
d9e2daaf 997 continue;
801bf40c 998 if (sh.offset != subvol_id)
d9e2daaf
LP
999 continue;
1000
801bf40c 1001 const struct btrfs_root_ref *ref = body;
e5c41c61 1002 p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
d9e2daaf
LP
1003 if (!p)
1004 return -ENOMEM;
1005
41ab8c67
LP
1006 struct btrfs_ioctl_ino_lookup_args ino_args = {
1007 .treeid = subvol_id,
1008 .objectid = htole64(ref->dirid),
1009 };
d9e2daaf
LP
1010
1011 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1012 return -errno;
1013
3986b258
LP
1014 if (!made_writable) {
1015 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1016 if (r < 0)
1017 return r;
1018
1019 made_writable = true;
1020 }
1021
d9e2daaf
LP
1022 if (isempty(ino_args.name))
1023 /* Subvolume is in the top-level
1024 * directory of the subvolume. */
801bf40c 1025 r = subvol_remove_children(subvol_fd, p, sh.objectid, flags);
d9e2daaf 1026 else {
254d1313 1027 _cleanup_close_ int child_fd = -EBADF;
d9e2daaf
LP
1028
1029 /* Subvolume is somewhere further down,
1030 * hence we need to open the
1031 * containing directory first */
1032
f7c9f4a2 1033 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
d9e2daaf
LP
1034 if (child_fd < 0)
1035 return -errno;
1036
801bf40c 1037 r = subvol_remove_children(child_fd, p, sh.objectid, flags);
d9e2daaf
LP
1038 }
1039 if (r < 0)
1040 return r;
1041 }
1042
1043 /* Increase search key by one, to read the next item, if we can. */
1044 if (!btrfs_ioctl_search_args_inc(&args))
1045 break;
1046 }
1047
1048 /* OK, the child subvolumes should all be gone now, let's try
1049 * again to remove the subvolume */
1050 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1051 return -errno;
1052
5bcd08db 1053 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
d9e2daaf
LP
1054 return 0;
1055}
1056
24dbe603 1057int btrfs_subvol_remove_at(int dir_fd, const char *path, BtrfsRemoveFlags flags) {
03469b77 1058 _cleanup_free_ char *subvolume = NULL;
254d1313 1059 _cleanup_close_ int fd = -EBADF;
d9e2daaf
LP
1060 int r;
1061
1062 assert(path);
1063
24dbe603 1064 fd = chase_and_openat(dir_fd, path, CHASE_PARENT|CHASE_EXTRACT_FILENAME, O_CLOEXEC, &subvolume);
d9e2daaf
LP
1065 if (fd < 0)
1066 return fd;
1067
e54c79cc 1068 r = btrfs_validate_subvolume_name(subvolume);
24dbe603
DDM
1069 if (r < 0)
1070 return r;
5bcd08db 1071
5bcd08db
LP
1072 return subvol_remove_children(fd, subvolume, 0, flags);
1073}
1074
1075int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1076
1077 struct btrfs_ioctl_search_args args = {
1078 /* Tree of quota items */
1079 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1080
1081 /* The object ID is always 0 */
1082 .key.min_objectid = 0,
1083 .key.max_objectid = 0,
1084
1085 /* Look precisely for the quota items */
1086 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1087 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1088
1089 /* For our qgroup */
1090 .key.min_offset = old_qgroupid,
1091 .key.max_offset = old_qgroupid,
1092
1093 /* No restrictions on the other components */
1094 .key.min_transid = 0,
f5fbe71d 1095 .key.max_transid = UINT64_MAX,
5bcd08db
LP
1096 };
1097
1098 int r;
1099
65ddc2c5 1100 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
1101 if (r < 0)
1102 return r;
79de6eb1 1103 if (r == 0)
5bcd08db
LP
1104 return -ENOTTY;
1105
1106 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
1107 struct btrfs_ioctl_search_header sh;
1108 const void *body;
5bcd08db
LP
1109
1110 args.key.nr_items = 256;
12ee6186
LP
1111 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1112 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1113 break;
1114
5bcd08db 1115 return -errno;
12ee6186 1116 }
5bcd08db
LP
1117
1118 if (args.key.nr_items <= 0)
1119 break;
1120
801bf40c 1121 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
5bcd08db
LP
1122 struct btrfs_ioctl_qgroup_limit_args qargs;
1123 unsigned c;
1124
1125 /* Make sure we start the next search at least from this entry */
801bf40c 1126 btrfs_ioctl_search_args_set(&args, &sh);
5bcd08db 1127
801bf40c 1128 if (sh.objectid != 0)
5bcd08db 1129 continue;
801bf40c 1130 if (sh.type != BTRFS_QGROUP_LIMIT_KEY)
5bcd08db 1131 continue;
801bf40c 1132 if (sh.offset != old_qgroupid)
5bcd08db
LP
1133 continue;
1134
1135 /* We found the entry, now copy things over. */
1136
801bf40c 1137 const struct btrfs_qgroup_limit_item *qli = body;
5bcd08db
LP
1138 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1139 .qgroupid = new_qgroupid,
1140
1141 .lim.max_rfer = le64toh(qli->max_rfer),
1142 .lim.max_excl = le64toh(qli->max_excl),
1143 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1144 .lim.rsv_excl = le64toh(qli->rsv_excl),
1145
1146 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1147 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1148 BTRFS_QGROUP_LIMIT_RSV_RFER|
1149 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1150 };
1151
1152 for (c = 0;; c++) {
1153 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1154 if (errno == EBUSY && c < 10) {
1155 (void) btrfs_quota_scan_wait(fd);
1156 continue;
1157 }
1158 return -errno;
1159 }
1160
1161 break;
1162 }
1163
1164 return 1;
1165 }
1166
1167 /* Increase search key by one, to read the next item, if we can. */
1168 if (!btrfs_ioctl_search_args_inc(&args))
1169 break;
1170 }
1171
1172 return 0;
d9e2daaf
LP
1173}
1174
5bcd08db
LP
1175static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1176 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1177 bool copy_from_parent = false, insert_intermediary_qgroup = false;
cecaba20 1178 int n_old_qgroups, n_old_parent_qgroups, r;
5bcd08db
LP
1179 uint64_t old_parent_id;
1180
1181 assert(fd >= 0);
1182
1183 /* Copies a reduced form of quota information from the old to
1184 * the new subvolume. */
1185
1186 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1187 if (n_old_qgroups <= 0) /* Nothing to copy */
1188 return n_old_qgroups;
1189
1190 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
08c77cf3
LP
1191 if (r == -ENXIO)
1192 /* We have no parent, hence nothing to copy. */
1193 n_old_parent_qgroups = 0;
1194 else if (r < 0)
5bcd08db 1195 return r;
08c77cf3
LP
1196 else {
1197 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1198 if (n_old_parent_qgroups < 0)
1199 return n_old_parent_qgroups;
1200 }
5bcd08db 1201
cecaba20 1202 for (int i = 0; i < n_old_qgroups; i++) {
5bcd08db 1203 uint64_t id;
5bcd08db
LP
1204
1205 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1206 if (r < 0)
1207 return r;
1208
1209 if (id == old_subvol_id) {
1210 /* The old subvolume was member of a qgroup
1211 * that had the same id, but a different level
1212 * as it self. Let's set up something similar
1213 * in the destination. */
1214 insert_intermediary_qgroup = true;
1215 break;
1216 }
1217
cecaba20 1218 for (int j = 0; j < n_old_parent_qgroups; j++)
d46b79bb 1219 if (old_parent_qgroups[j] == old_qgroups[i])
5bcd08db
LP
1220 /* The old subvolume shared a common
1221 * parent qgroup with its parent
1222 * subvolume. Let's set up something
1223 * similar in the destination. */
1224 copy_from_parent = true;
5bcd08db
LP
1225 }
1226
1227 if (!insert_intermediary_qgroup && !copy_from_parent)
1228 return 0;
1229
1230 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1231}
1232
1233static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1234 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1235 bool changed;
1236 int r;
1237
1238 /* First copy the leaf limits */
1239 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1240 if (r < 0)
1241 return r;
1242 changed = r > 0;
1243
1244 /* Then, try to copy the subtree limits, if there are any. */
1245 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1246 if (r < 0)
1247 return r;
1248 if (r == 0)
1249 return changed;
1250
1251 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1252 if (r < 0)
1253 return r;
1254 if (r == 0)
1255 return changed;
1256
1257 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1258 if (r != 0)
1259 return r;
1260
1261 return changed;
d9e2daaf 1262}
f70a17f8 1263
b3cade0c
LP
1264static int subvol_snapshot_children(
1265 int old_fd,
1266 int new_fd,
1267 const char *subvolume,
1268 uint64_t old_subvol_id,
1269 BtrfsSnapshotFlags flags) {
f70a17f8
LP
1270
1271 struct btrfs_ioctl_search_args args = {
1272 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1273
1274 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1275 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1276
1277 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1278 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1279
1280 .key.min_transid = 0,
f5fbe71d 1281 .key.max_transid = UINT64_MAX,
f70a17f8
LP
1282 };
1283
1284 struct btrfs_ioctl_vol_args_v2 vol_args = {
1285 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1286 .fd = old_fd,
1287 };
254d1313 1288 _cleanup_close_ int subvolume_fd = -EBADF;
90578cbd
LP
1289 uint64_t new_subvol_id;
1290 int r;
f70a17f8
LP
1291
1292 assert(old_fd >= 0);
1293 assert(new_fd >= 0);
1294 assert(subvolume);
1295
1296 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
f70a17f8
LP
1297
1298 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1299 return -errno;
1300
d54f60c2
DDM
1301 if (FLAGS_SET(flags, BTRFS_SNAPSHOT_LOCK_BSD)) {
1302 subvolume_fd = xopenat_lock(new_fd, subvolume,
1303 O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW,
d54f60c2
DDM
1304 LOCK_BSD,
1305 LOCK_EX);
1306 if (subvolume_fd < 0)
1307 return subvolume_fd;
1308
1309 r = btrfs_is_subvol_fd(subvolume_fd);
1310 if (r < 0)
1311 return r;
1312 if (r == 0)
1313 return -EEXIST;
1314 }
1315
5bcd08db
LP
1316 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1317 !(flags & BTRFS_SNAPSHOT_QUOTA))
d54f60c2 1318 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
f70a17f8 1319
90578cbd
LP
1320 if (old_subvol_id == 0) {
1321 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
f70a17f8
LP
1322 if (r < 0)
1323 return r;
1324 }
1325
90578cbd
LP
1326 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1327 if (r < 0)
1328 return r;
1329
5bcd08db
LP
1330 if (flags & BTRFS_SNAPSHOT_QUOTA)
1331 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1332
1333 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1334
1335 if (flags & BTRFS_SNAPSHOT_QUOTA)
1336 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1337
d54f60c2 1338 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
5bcd08db
LP
1339 }
1340
90578cbd 1341 args.key.min_offset = args.key.max_offset = old_subvol_id;
f70a17f8
LP
1342
1343 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
1344 struct btrfs_ioctl_search_header sh;
1345 const void *body;
f70a17f8
LP
1346
1347 args.key.nr_items = 256;
1348 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1349 return -errno;
1350
1351 if (args.key.nr_items <= 0)
1352 break;
1353
801bf40c 1354 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
f70a17f8 1355 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
254d1313 1356 _cleanup_close_ int old_child_fd = -EBADF, new_child_fd = -EBADF;
f70a17f8 1357
801bf40c 1358 btrfs_ioctl_search_args_set(&args, &sh);
f70a17f8 1359
801bf40c 1360 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
f70a17f8 1361 continue;
90578cbd 1362
801bf40c
LP
1363 /* Avoid finding the source subvolume a second time */
1364 if (sh.offset != old_subvol_id)
f70a17f8
LP
1365 continue;
1366
801bf40c
LP
1367 /* Avoid running into loops if the new subvolume is below the old one. */
1368 if (sh.objectid == new_subvol_id)
90578cbd 1369 continue;
f70a17f8 1370
801bf40c 1371 const struct btrfs_root_ref *ref = body;
e5c41c61 1372 p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
f70a17f8
LP
1373 if (!p)
1374 return -ENOMEM;
1375
41ab8c67
LP
1376 struct btrfs_ioctl_ino_lookup_args ino_args = {
1377 .treeid = old_subvol_id,
1378 .objectid = htole64(ref->dirid),
1379 };
f70a17f8
LP
1380
1381 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1382 return -errno;
1383
b910cc72 1384 c = path_join(ino_args.name, p);
f70a17f8
LP
1385 if (!c)
1386 return -ENOMEM;
1387
f7c9f4a2 1388 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
f70a17f8
LP
1389 if (old_child_fd < 0)
1390 return -errno;
1391
657ee2d8 1392 np = path_join(subvolume, ino_args.name);
f70a17f8
LP
1393 if (!np)
1394 return -ENOMEM;
1395
f7c9f4a2 1396 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
f70a17f8
LP
1397 if (new_child_fd < 0)
1398 return -errno;
1399
ffb296b2 1400 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
801bf40c
LP
1401 /* If the snapshot is read-only we need to mark it writable temporarily, to
1402 * put the subsnapshot into place. */
ffb296b2
LP
1403
1404 if (subvolume_fd < 0) {
f7c9f4a2 1405 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
ffb296b2
LP
1406 if (subvolume_fd < 0)
1407 return -errno;
1408 }
1409
1410 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1411 if (r < 0)
1412 return r;
1413 }
1414
801bf40c
LP
1415 /* When btrfs clones the subvolumes, child subvolumes appear as empty
1416 * directories. Remove them, so that we can create a new snapshot in their place */
ffb296b2
LP
1417 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1418 int k = -errno;
1419
1420 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1421 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1422
1423 return k;
1424 }
f70a17f8 1425
801bf40c 1426 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh.objectid,
d54f60c2 1427 flags & ~(BTRFS_SNAPSHOT_FALLBACK_COPY|BTRFS_SNAPSHOT_LOCK_BSD));
ffb296b2
LP
1428
1429 /* Restore the readonly flag */
1430 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1431 int k;
1432
1433 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1434 if (r >= 0 && k < 0)
1435 return k;
1436 }
1437
f70a17f8
LP
1438 if (r < 0)
1439 return r;
1440 }
1441
1442 /* Increase search key by one, to read the next item, if we can. */
1443 if (!btrfs_ioctl_search_args_inc(&args))
1444 break;
1445 }
1446
5bcd08db
LP
1447 if (flags & BTRFS_SNAPSHOT_QUOTA)
1448 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1449
d54f60c2 1450 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
f70a17f8
LP
1451}
1452
fab4ef72
DDM
1453int btrfs_subvol_snapshot_at_full(
1454 int dir_fdf,
1455 const char *from,
1456 int dir_fdt,
1457 const char *to,
b3cade0c
LP
1458 BtrfsSnapshotFlags flags,
1459 copy_progress_path_t progress_path,
1460 copy_progress_bytes_t progress_bytes,
1461 void *userdata) {
1462
03469b77 1463 _cleanup_free_ char *subvolume = NULL;
d54f60c2 1464 _cleanup_close_ int old_fd = -EBADF, new_fd = -EBADF, subvolume_fd = -EBADF;
f70a17f8
LP
1465 int r;
1466
fab4ef72
DDM
1467 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1468 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
1469 assert(to);
1470
e40b11be 1471 old_fd = xopenat(dir_fdf, from, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
fab4ef72
DDM
1472 if (old_fd < 0)
1473 return old_fd;
f70a17f8 1474
fab4ef72
DDM
1475 new_fd = chase_and_openat(dir_fdt, to, CHASE_PARENT|CHASE_EXTRACT_FILENAME, O_CLOEXEC, &subvolume);
1476 if (new_fd < 0)
1477 return new_fd;
1478
e54c79cc 1479 r = btrfs_validate_subvolume_name(subvolume);
fab4ef72
DDM
1480 if (r < 0)
1481 return r;
1482
1483 r = btrfs_is_subvol_at(dir_fdf, from);
f70a17f8
LP
1484 if (r < 0)
1485 return r;
1486 if (r == 0) {
17cbb288
LP
1487 bool plain_directory = false;
1488
1489 /* If the source isn't a proper subvolume, fail unless fallback is requested */
f70a17f8
LP
1490 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1491 return -EISDIR;
1492
e54c79cc 1493 r = btrfs_subvol_make(new_fd, subvolume);
08b8e913
DL
1494 if (r < 0) {
1495 if (ERRNO_IS_NOT_SUPPORTED(r) && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
1496 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1497 if (mkdirat(new_fd, subvolume, 0755) < 0)
1498 return -errno;
17cbb288 1499
08b8e913
DL
1500 plain_directory = true;
1501 } else
1502 return r;
1503 }
f70a17f8 1504
d54f60c2
DDM
1505 if (FLAGS_SET(flags, BTRFS_SNAPSHOT_LOCK_BSD)) {
1506 subvolume_fd = xopenat_lock(new_fd, subvolume,
1507 O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW,
d54f60c2
DDM
1508 LOCK_BSD,
1509 LOCK_EX);
1510 if (subvolume_fd < 0)
1511 return subvolume_fd;
1512
1513 if (!plain_directory) {
1514 r = btrfs_is_subvol_fd(subvolume_fd);
1515 if (r < 0)
1516 return r;
1517 if (r == 0)
1518 return -EEXIST;
1519 }
1520 }
1521
f9f70e06 1522 r = copy_directory_at_full(
fab4ef72
DDM
1523 dir_fdf, from,
1524 new_fd, subvolume,
28ba7e36
LP
1525 COPY_MERGE_EMPTY|
1526 COPY_REFLINK|
1527 COPY_SAME_MOUNT|
1528 COPY_HARDLINKS|
23e026de 1529 COPY_ALL_XATTRS|
28ba7e36
LP
1530 (FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGINT) ? COPY_SIGINT : 0)|
1531 (FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGTERM) ? COPY_SIGTERM : 0),
1532 progress_path,
1533 progress_bytes,
1534 userdata);
17cbb288
LP
1535 if (r < 0)
1536 goto fallback_fail;
f70a17f8
LP
1537
1538 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
17cbb288
LP
1539
1540 if (plain_directory) {
1541 /* Plain directories have no recursive read-only flag, but something pretty close to
1542 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1543
1544 if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
fab4ef72 1545 (void) chattr_at(new_fd, subvolume, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
17cbb288 1546 } else {
fab4ef72 1547 r = btrfs_subvol_set_read_only_at(new_fd, subvolume, true);
17cbb288
LP
1548 if (r < 0)
1549 goto fallback_fail;
f70a17f8
LP
1550 }
1551 }
1552
d54f60c2 1553 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
17cbb288
LP
1554
1555 fallback_fail:
fab4ef72 1556 (void) rm_rf_at(new_fd, subvolume, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
17cbb288 1557 return r;
f70a17f8
LP
1558 }
1559
f70a17f8
LP
1560 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1561}
1562
5bcd08db
LP
1563int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1564
1565 struct btrfs_ioctl_search_args args = {
1566 /* Tree of quota items */
1567 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1568
1569 /* Look precisely for the quota relation items */
1570 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1571 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1572
1573 /* No restrictions on the other components */
1574 .key.min_offset = 0,
f5fbe71d 1575 .key.max_offset = UINT64_MAX,
5bcd08db
LP
1576
1577 .key.min_transid = 0,
f5fbe71d 1578 .key.max_transid = UINT64_MAX,
5bcd08db
LP
1579 };
1580
1581 _cleanup_free_ uint64_t *items = NULL;
319a4f4b 1582 size_t n_items = 0;
5bcd08db
LP
1583 int r;
1584
1585 assert(fd >= 0);
1586 assert(ret);
1587
1588 if (qgroupid == 0) {
1589 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1590 if (r < 0)
1591 return r;
1592 } else {
65ddc2c5 1593 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
1594 if (r < 0)
1595 return r;
79de6eb1 1596 if (r == 0)
5bcd08db
LP
1597 return -ENOTTY;
1598 }
1599
1600 args.key.min_objectid = args.key.max_objectid = qgroupid;
1601
1602 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
1603 struct btrfs_ioctl_search_header sh;
1604 _unused_ const void *body;
5bcd08db
LP
1605
1606 args.key.nr_items = 256;
12ee6186
LP
1607 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1608 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1609 break;
1610
5bcd08db 1611 return -errno;
12ee6186 1612 }
5bcd08db
LP
1613
1614 if (args.key.nr_items <= 0)
1615 break;
1616
801bf40c 1617 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
5bcd08db
LP
1618
1619 /* Make sure we start the next search at least from this entry */
801bf40c 1620 btrfs_ioctl_search_args_set(&args, &sh);
5bcd08db 1621
801bf40c 1622 if (sh.type != BTRFS_QGROUP_RELATION_KEY)
5bcd08db 1623 continue;
801bf40c 1624 if (sh.offset < sh.objectid)
5bcd08db 1625 continue;
801bf40c 1626 if (sh.objectid != qgroupid)
5bcd08db
LP
1627 continue;
1628
319a4f4b 1629 if (!GREEDY_REALLOC(items, n_items+1))
5bcd08db
LP
1630 return -ENOMEM;
1631
801bf40c 1632 items[n_items++] = sh.offset;
5bcd08db
LP
1633 }
1634
1635 /* Increase search key by one, to read the next item, if we can. */
1636 if (!btrfs_ioctl_search_args_inc(&args))
1637 break;
1638 }
1639
1640 if (n_items <= 0) {
1641 *ret = NULL;
1642 return 0;
1643 }
1644
1cc6c93a 1645 *ret = TAKE_PTR(items);
5bcd08db
LP
1646
1647 return (int) n_items;
1648}
1649
1650int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1651 _cleanup_free_ uint64_t *qgroups = NULL;
254d1313 1652 _cleanup_close_ int real_fd = -EBADF;
5bcd08db
LP
1653 uint64_t parent_subvol;
1654 bool changed = false;
1655 int n = 0, r;
1656
1657 assert(fd >= 0);
1658
1659 /*
1660 * Sets up the specified subvolume's qgroup automatically in
1661 * one of two ways:
1662 *
1663 * If insert_intermediary_qgroup is false, the subvolume's
1664 * leaf qgroup will be assigned to the same parent qgroups as
1665 * the subvolume's parent subvolume.
1666 *
1667 * If insert_intermediary_qgroup is true a new intermediary
1668 * higher-level qgroup is created, with a higher level number,
1669 * but reusing the id of the subvolume. The level number is
1670 * picked as one smaller than the lowest level qgroup the
1671 * parent subvolume is a member of. If the parent subvolume's
1672 * leaf qgroup is assigned to no higher-level qgroup a new
1673 * qgroup of level 255 is created instead. Either way, the new
1674 * qgroup is then assigned to the parent's higher-level
1675 * qgroup, and the subvolume itself is assigned to it.
1676 *
1677 * If the subvolume is already assigned to a higher level
1678 * qgroup, no operation is executed.
1679 *
1680 * Effectively this means: regardless if
1681 * insert_intermediary_qgroup is true or not, after this
1682 * function is invoked the subvolume will be accounted within
1683 * the same qgroups as the parent. However, if it is true, it
1684 * will also get its own higher-level qgroup, which may in
1685 * turn be used by subvolumes created beneath this subvolume
1686 * later on.
1687 *
1688 * This hence defines a simple default qgroup setup for
1689 * subvolumes, as long as this function is invoked on each
1690 * created subvolume: each subvolume is always accounting
1691 * together with its immediate parents. Optionally, if
1692 * insert_intermediary_qgroup is true, it will also get a
1693 * qgroup that then includes all its own child subvolumes.
1694 */
1695
e6d1d4c0
LP
1696 /* Turn this into a proper fd, if it is currently O_PATH */
1697 fd = fd_reopen_condition(fd, O_RDONLY|O_CLOEXEC, O_PATH, &real_fd);
1698 if (fd < 0)
1699 return fd;
1700
5bcd08db 1701 if (subvol_id == 0) {
2904e949 1702 r = btrfs_is_subvol_fd(fd);
5bcd08db
LP
1703 if (r < 0)
1704 return r;
1705 if (!r)
1706 return -ENOTTY;
1707
1708 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1709 if (r < 0)
1710 return r;
1711 }
1712
1713 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1714 if (n < 0)
1715 return n;
1716 if (n > 0) /* already parent qgroups set up, let's bail */
1717 return 0;
1718
08c77cf3
LP
1719 qgroups = mfree(qgroups);
1720
5bcd08db 1721 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
08c77cf3
LP
1722 if (r == -ENXIO)
1723 /* No parent, hence no qgroup memberships */
1724 n = 0;
1725 else if (r < 0)
5bcd08db 1726 return r;
08c77cf3
LP
1727 else {
1728 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1729 if (n < 0)
1730 return n;
1731 }
5bcd08db
LP
1732
1733 if (insert_intermediary_qgroup) {
1734 uint64_t lowest = 256, new_qgroupid;
1735 bool created = false;
5bcd08db
LP
1736
1737 /* Determine the lowest qgroup that the parent
1738 * subvolume is assigned to. */
1739
cecaba20 1740 for (int i = 0; i < n; i++) {
5bcd08db
LP
1741 uint64_t level;
1742
1743 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1744 if (r < 0)
1745 return r;
1746
1747 if (level < lowest)
1748 lowest = level;
1749 }
1750
1751 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1752 return -EBUSY;
1753
1754 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1755 if (r < 0)
1756 return r;
1757
1758 /* Create the new intermediary group, unless it already exists */
1759 r = btrfs_qgroup_create(fd, new_qgroupid);
1760 if (r < 0 && r != -EEXIST)
1761 return r;
1762 if (r >= 0)
1763 changed = created = true;
1764
cecaba20 1765 for (int i = 0; i < n; i++) {
5bcd08db
LP
1766 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1767 if (r < 0 && r != -EEXIST) {
1768 if (created)
1769 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1770
1771 return r;
1772 }
1773 if (r >= 0)
1774 changed = true;
1775 }
1776
1777 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1778 if (r < 0 && r != -EEXIST) {
1779 if (created)
1780 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1781 return r;
1782 }
1783 if (r >= 0)
1784 changed = true;
1785
1786 } else {
1787 int i;
1788
1789 /* Assign our subvolume to all the same qgroups as the parent */
1790
1791 for (i = 0; i < n; i++) {
1792 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1793 if (r < 0 && r != -EEXIST)
1794 return r;
1795 if (r >= 0)
1796 changed = true;
1797 }
1798 }
1799
1800 return changed;
1801}
1802
1803int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
254d1313 1804 _cleanup_close_ int fd = -EBADF;
5bcd08db
LP
1805
1806 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1807 if (fd < 0)
1808 return -errno;
1809
1810 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
1811}
1812
3799fa80
DDM
1813int btrfs_subvol_make_default(const char *path) {
1814 _cleanup_close_ int fd = -EBADF;
1815 uint64_t id;
1816 int r;
1817
1818 assert(path);
1819
1820 fd = open(path, O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1821 if (fd < 0)
1822 return -errno;
1823
1824 r = btrfs_subvol_get_id_fd(fd, &id);
1825 if (r < 0)
1826 return r;
1827
1828 return RET_NERRNO(ioctl(fd, BTRFS_IOC_DEFAULT_SUBVOL, &id));
1829}
1830
5bcd08db
LP
1831int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
1832
1833 struct btrfs_ioctl_search_args args = {
1834 /* Tree of tree roots */
1835 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1836
1837 /* Look precisely for the subvolume items */
1838 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1839 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1840
1841 /* No restrictions on the other components */
1842 .key.min_offset = 0,
f5fbe71d 1843 .key.max_offset = UINT64_MAX,
5bcd08db
LP
1844
1845 .key.min_transid = 0,
f5fbe71d 1846 .key.max_transid = UINT64_MAX,
5bcd08db
LP
1847 };
1848 int r;
1849
1850 assert(fd >= 0);
1851 assert(ret);
1852
1853 if (subvol_id == 0) {
1854 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1855 if (r < 0)
1856 return r;
1857 } else {
65ddc2c5 1858 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
1859 if (r < 0)
1860 return r;
79de6eb1 1861 if (r == 0)
5bcd08db
LP
1862 return -ENOTTY;
1863 }
1864
1865 args.key.min_objectid = args.key.max_objectid = subvol_id;
1866
1867 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
1868 struct btrfs_ioctl_search_header sh;
1869 _unused_ const void *body = NULL;
5bcd08db
LP
1870
1871 args.key.nr_items = 256;
1872 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
9c4615fb 1873 return negative_errno();
5bcd08db
LP
1874
1875 if (args.key.nr_items <= 0)
1876 break;
1877
801bf40c 1878 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
5bcd08db 1879
801bf40c 1880 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
5bcd08db 1881 continue;
801bf40c 1882 if (sh.objectid != subvol_id)
5bcd08db
LP
1883 continue;
1884
801bf40c 1885 *ret = sh.offset;
5bcd08db
LP
1886 return 0;
1887 }
1888 }
1889
1890 return -ENXIO;
1891}
5228c58e
DDM
1892
1893int btrfs_forget_device(const char *path) {
1894 _cleanup_close_ int control_fd = -EBADF;
1895 struct btrfs_ioctl_vol_args args = {};
1896
1897 assert(path);
1898
1899 if (strlen(path) > BTRFS_PATH_NAME_MAX)
1900 return -E2BIG;
1901
1902 strcpy(args.name, path);
1903
1904 control_fd = open("/dev/btrfs-control", O_RDWR|O_CLOEXEC);
1905 if (control_fd < 0)
1906 return -errno;
1907
1908 return RET_NERRNO(ioctl(control_fd, BTRFS_IOC_FORGET_DEV, &args));
1909}
efb6a76a
MY
1910
1911typedef struct BtrfsStripe {
1912 uint64_t devid;
1913 uint64_t offset;
1914} BtrfsStripe;
1915
1916typedef struct BtrfsChunk {
1917 uint64_t offset;
1918 uint64_t length;
1919 uint64_t type;
1920
1921 BtrfsStripe *stripes;
1922 uint16_t n_stripes;
1923 uint64_t stripe_len;
1924} BtrfsChunk;
1925
1926typedef struct BtrfsChunkTree {
1927 BtrfsChunk **chunks;
1928 size_t n_chunks;
1929} BtrfsChunkTree;
1930
1931static BtrfsChunk* btrfs_chunk_free(BtrfsChunk *chunk) {
1932 if (!chunk)
1933 return NULL;
1934
1935 free(chunk->stripes);
1936
1937 return mfree(chunk);
1938}
1939
1940DEFINE_TRIVIAL_CLEANUP_FUNC(BtrfsChunk*, btrfs_chunk_free);
1941
1942static void btrfs_chunk_tree_done(BtrfsChunkTree *tree) {
1943 assert(tree);
1944
1945 FOREACH_ARRAY(i, tree->chunks, tree->n_chunks)
1946 btrfs_chunk_free(*i);
e504e465
MY
1947
1948 free(tree->chunks);
efb6a76a
MY
1949}
1950
1951static int btrfs_read_chunk_tree_fd(int fd, BtrfsChunkTree *ret) {
1952
1953 struct btrfs_ioctl_search_args search_args = {
1954 .key.tree_id = BTRFS_CHUNK_TREE_OBJECTID,
1955
1956 .key.min_type = BTRFS_CHUNK_ITEM_KEY,
1957 .key.max_type = BTRFS_CHUNK_ITEM_KEY,
1958
1959 .key.min_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1960 .key.max_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1961
1962 .key.min_offset = 0,
1963 .key.max_offset = UINT64_MAX,
1964
1965 .key.min_transid = 0,
1966 .key.max_transid = UINT64_MAX,
1967 };
1968
1969 _cleanup_(btrfs_chunk_tree_done) BtrfsChunkTree tree = {};
1970
1971 assert(fd >= 0);
1972 assert(ret);
1973
1974 while (btrfs_ioctl_search_args_compare(&search_args) <= 0) {
801bf40c
LP
1975 struct btrfs_ioctl_search_header sh;
1976 const void *body;
efb6a76a
MY
1977
1978 search_args.key.nr_items = 256;
1979
1980 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args) < 0)
1981 return -errno;
1982
1983 if (search_args.key.nr_items == 0)
1984 break;
1985
801bf40c 1986 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, search_args) {
efb6a76a 1987 _cleanup_(btrfs_chunk_freep) BtrfsChunk *chunk = NULL;
efb6a76a 1988
801bf40c 1989 btrfs_ioctl_search_args_set(&search_args, &sh);
efb6a76a 1990
801bf40c 1991 if (sh.objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)
efb6a76a 1992 continue;
801bf40c 1993 if (sh.type != BTRFS_CHUNK_ITEM_KEY)
efb6a76a
MY
1994 continue;
1995
1996 chunk = new(BtrfsChunk, 1);
1997 if (!chunk)
1998 return -ENOMEM;
1999
801bf40c 2000 const struct btrfs_chunk *item = body;
efb6a76a 2001 *chunk = (BtrfsChunk) {
801bf40c 2002 .offset = sh.offset,
efb6a76a
MY
2003 .length = le64toh(item->length),
2004 .type = le64toh(item->type),
2005 .n_stripes = le16toh(item->num_stripes),
2006 .stripe_len = le64toh(item->stripe_len),
2007 };
2008
2009 chunk->stripes = new(BtrfsStripe, chunk->n_stripes);
2010 if (!chunk->stripes)
2011 return -ENOMEM;
2012
2013 for (size_t j = 0; j < chunk->n_stripes; j++) {
2014 const struct btrfs_stripe *stripe = &item->stripe + j;
2015
2016 chunk->stripes[j] = (BtrfsStripe) {
2017 .devid = le64toh(stripe->devid),
2018 .offset = le64toh(stripe->offset),
2019 };
2020 }
2021
2022 if (!GREEDY_REALLOC(tree.chunks, tree.n_chunks + 1))
2023 return -ENOMEM;
2024
2025 tree.chunks[tree.n_chunks++] = TAKE_PTR(chunk);
2026 }
2027
2028 if (!btrfs_ioctl_search_args_inc(&search_args))
2029 break;
2030 }
2031
2032 *ret = TAKE_STRUCT(tree);
2033 return 0;
2034}
2035
2036static BtrfsChunk* btrfs_find_chunk_from_logical_address(const BtrfsChunkTree *tree, uint64_t logical) {
2037 size_t min_index, max_index;
2038
2039 assert(tree);
2040 assert(tree->chunks || tree->n_chunks == 0);
2041
2042 if (tree->n_chunks == 0)
2043 return NULL;
2044
2045 /* bisection */
2046 min_index = 0;
2047 max_index = tree->n_chunks - 1;
2048
2049 while (min_index <= max_index) {
2050 size_t mid = (min_index + max_index) / 2;
2051
2052 if (logical < tree->chunks[mid]->offset) {
2053 if (mid < 1)
2054 return NULL;
2055
2056 max_index = mid - 1;
2057 } else if (logical >= tree->chunks[mid]->offset + tree->chunks[mid]->length)
2058 min_index = mid + 1;
2059 else
2060 return tree->chunks[mid];
2061 }
2062
2063 return NULL;
2064}
2065
2066static int btrfs_is_nocow_fd(int fd) {
efb6a76a 2067 unsigned flags;
05f38c89 2068 int r;
efb6a76a
MY
2069
2070 assert(fd >= 0);
2071
05f38c89
LP
2072 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
2073 if (r < 0)
2074 return r;
2075 if (r == 0)
efb6a76a
MY
2076 return -ENOTTY;
2077
05f38c89
LP
2078 r = read_attr_fd(fd, &flags);
2079 if (r < 0)
2080 return r;
efb6a76a
MY
2081
2082 return FLAGS_SET(flags, FS_NOCOW_FL) && !FLAGS_SET(flags, FS_COMPR_FL);
2083}
2084
2085int btrfs_get_file_physical_offset_fd(int fd, uint64_t *ret) {
2086
2087 struct btrfs_ioctl_search_args search_args = {
2088 .key.min_type = BTRFS_EXTENT_DATA_KEY,
2089 .key.max_type = BTRFS_EXTENT_DATA_KEY,
2090
2091 .key.min_offset = 0,
2092 .key.max_offset = UINT64_MAX,
2093
2094 .key.min_transid = 0,
2095 .key.max_transid = UINT64_MAX,
2096 };
2097
2098 _cleanup_(btrfs_chunk_tree_done) BtrfsChunkTree tree = {};
2099 uint64_t subvol_id;
2100 struct stat st;
2101 int r;
2102
2103 assert(fd >= 0);
2104 assert(ret);
2105
2106 if (fstat(fd, &st) < 0)
2107 return -errno;
2108
2109 r = stat_verify_regular(&st);
2110 if (r < 0)
2111 return r;
2112
2113 r = btrfs_is_nocow_fd(fd);
2114 if (r < 0)
2115 return r;
2116 if (r == 0)
2117 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2118 "Cannot get physical address for btrfs extent: CoW enabled");
2119
2120 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
2121 if (r < 0)
2122 return r;
2123
2124 r = btrfs_read_chunk_tree_fd(fd, &tree);
2125 if (r < 0)
2126 return r;
2127
2128 search_args.key.tree_id = subvol_id;
2129 search_args.key.min_objectid = search_args.key.max_objectid = st.st_ino;
2130
2131 while (btrfs_ioctl_search_args_compare(&search_args) <= 0) {
801bf40c
LP
2132 struct btrfs_ioctl_search_header sh;
2133 const void *body;
efb6a76a
MY
2134
2135 search_args.key.nr_items = 256;
2136
2137 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args) < 0)
2138 return -errno;
2139
2140 if (search_args.key.nr_items == 0)
2141 break;
2142
801bf40c 2143 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, search_args) {
efb6a76a
MY
2144 uint64_t logical_offset;
2145 BtrfsChunk *chunk;
2146
801bf40c 2147 btrfs_ioctl_search_args_set(&search_args, &sh);
efb6a76a 2148
801bf40c 2149 if (sh.type != BTRFS_EXTENT_DATA_KEY)
efb6a76a
MY
2150 continue;
2151
801bf40c 2152 if (sh.objectid != st.st_ino)
efb6a76a
MY
2153 continue;
2154
801bf40c 2155 const struct btrfs_file_extent_item *item = body;
efb6a76a
MY
2156 if (!IN_SET(item->type, BTRFS_FILE_EXTENT_REG, BTRFS_FILE_EXTENT_PREALLOC))
2157 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2158 "Cannot get physical address for btrfs extent: invalid type %" PRIu8,
2159 item->type);
2160
2161 if (item->compression != 0 || item->encryption != 0 || item->other_encoding != 0)
2162 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2163 "Cannot get physical address for btrfs extent: has incompatible property");
2164
2165 logical_offset = le64toh(item->disk_bytenr);
2166 if (logical_offset == 0)
2167 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2168 "Cannot get physical address for btrfs extent: failed to get logical offset");
2169
2170 chunk = btrfs_find_chunk_from_logical_address(&tree, logical_offset);
2171 if (!chunk)
2172 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2173 "Cannot get physical address for btrfs extent: no matching chunk found");
2174
2175 if ((chunk->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)
2176 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2177 "Cannot get physical address for btrfs extent: unsupported profile");
2178
2179 uint64_t relative_chunk, relative_stripe, stripe_nr;
2180 uint16_t stripe_index;
2181
2182 assert(logical_offset >= chunk->offset);
2183 assert(chunk->n_stripes > 0);
2184 assert(chunk->stripe_len > 0);
2185
2186 relative_chunk = logical_offset - chunk->offset;
2187 stripe_nr = relative_chunk / chunk->stripe_len;
2188 relative_stripe = relative_chunk - stripe_nr * chunk->stripe_len;
2189 stripe_index = stripe_nr % chunk->n_stripes;
2190
2191 *ret = chunk->stripes[stripe_index].offset +
2192 stripe_nr / chunk->n_stripes * chunk->stripe_len +
2193 relative_stripe;
2194
2195 return 0;
2196 }
2197
2198 if (!btrfs_ioctl_search_args_inc(&search_args))
2199 break;
2200 }
2201
2202 return -ENODATA;
2203}