1 // SPDX-License-Identifier: GPL-2.0
5 #include "btree_key_cache.h"
6 #include "btree_update.h"
13 #include <linux/random.h>
18 * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they
19 * exist to provide a stable identifier for the whole lifetime of a snapshot
23 void bch2_snapshot_tree_to_text(struct printbuf
*out
, struct bch_fs
*c
,
26 struct bkey_s_c_snapshot_tree t
= bkey_s_c_to_snapshot_tree(k
);
28 prt_printf(out
, "subvol %u root snapshot %u",
29 le32_to_cpu(t
.v
->master_subvol
),
30 le32_to_cpu(t
.v
->root_snapshot
));
33 int bch2_snapshot_tree_invalid(struct bch_fs
*c
, struct bkey_s_c k
,
34 enum bkey_invalid_flags flags
,
39 bkey_fsck_err_on(bkey_gt(k
.k
->p
, POS(0, U32_MAX
)) ||
40 bkey_lt(k
.k
->p
, POS(0, 1)), c
, err
,
41 snapshot_tree_pos_bad
,
47 int bch2_snapshot_tree_lookup(struct btree_trans
*trans
, u32 id
,
48 struct bch_snapshot_tree
*s
)
50 int ret
= bch2_bkey_get_val_typed(trans
, BTREE_ID_snapshot_trees
, POS(0, id
),
51 BTREE_ITER_WITH_UPDATES
, snapshot_tree
, s
);
53 if (bch2_err_matches(ret
, ENOENT
))
54 ret
= -BCH_ERR_ENOENT_snapshot_tree
;
58 struct bkey_i_snapshot_tree
*
59 __bch2_snapshot_tree_create(struct btree_trans
*trans
)
61 struct btree_iter iter
;
62 int ret
= bch2_bkey_get_empty_slot(trans
, &iter
,
63 BTREE_ID_snapshot_trees
, POS(0, U32_MAX
));
64 struct bkey_i_snapshot_tree
*s_t
;
66 if (ret
== -BCH_ERR_ENOSPC_btree_slot
)
67 ret
= -BCH_ERR_ENOSPC_snapshot_tree
;
71 s_t
= bch2_bkey_alloc(trans
, &iter
, 0, snapshot_tree
);
72 ret
= PTR_ERR_OR_ZERO(s_t
);
73 bch2_trans_iter_exit(trans
, &iter
);
74 return ret
? ERR_PTR(ret
) : s_t
;
77 static int bch2_snapshot_tree_create(struct btree_trans
*trans
,
78 u32 root_id
, u32 subvol_id
, u32
*tree_id
)
80 struct bkey_i_snapshot_tree
*n_tree
=
81 __bch2_snapshot_tree_create(trans
);
84 return PTR_ERR(n_tree
);
86 n_tree
->v
.master_subvol
= cpu_to_le32(subvol_id
);
87 n_tree
->v
.root_snapshot
= cpu_to_le32(root_id
);
88 *tree_id
= n_tree
->k
.p
.offset
;
94 static bool bch2_snapshot_is_ancestor_early(struct bch_fs
*c
, u32 id
, u32 ancestor
)
96 struct snapshot_table
*t
;
99 t
= rcu_dereference(c
->snapshots
);
101 while (id
&& id
< ancestor
)
102 id
= __snapshot_t(t
, id
)->parent
;
105 return id
== ancestor
;
108 static inline u32
get_ancestor_below(struct snapshot_table
*t
, u32 id
, u32 ancestor
)
110 const struct snapshot_t
*s
= __snapshot_t(t
, id
);
112 if (s
->skip
[2] <= ancestor
)
114 if (s
->skip
[1] <= ancestor
)
116 if (s
->skip
[0] <= ancestor
)
121 bool __bch2_snapshot_is_ancestor(struct bch_fs
*c
, u32 id
, u32 ancestor
)
123 struct snapshot_table
*t
;
126 EBUG_ON(c
->recovery_pass_done
<= BCH_RECOVERY_PASS_check_snapshots
);
129 t
= rcu_dereference(c
->snapshots
);
131 while (id
&& id
< ancestor
- IS_ANCESTOR_BITMAP
)
132 id
= get_ancestor_below(t
, id
, ancestor
);
134 if (id
&& id
< ancestor
) {
135 ret
= test_bit(ancestor
- id
- 1, __snapshot_t(t
, id
)->is_ancestor
);
137 EBUG_ON(ret
!= bch2_snapshot_is_ancestor_early(c
, id
, ancestor
));
139 ret
= id
== ancestor
;
147 static noinline
struct snapshot_t
*__snapshot_t_mut(struct bch_fs
*c
, u32 id
)
149 size_t idx
= U32_MAX
- id
;
151 struct snapshot_table
*new, *old
;
153 new_size
= max(16UL, roundup_pow_of_two(idx
+ 1));
155 new = kvzalloc(struct_size(new, s
, new_size
), GFP_KERNEL
);
159 old
= rcu_dereference_protected(c
->snapshots
, true);
162 rcu_dereference_protected(c
->snapshots
, true)->s
,
163 sizeof(new->s
[0]) * c
->snapshot_table_size
);
165 rcu_assign_pointer(c
->snapshots
, new);
166 c
->snapshot_table_size
= new_size
;
167 kvfree_rcu_mightsleep(old
);
169 return &rcu_dereference_protected(c
->snapshots
, true)->s
[idx
];
172 static inline struct snapshot_t
*snapshot_t_mut(struct bch_fs
*c
, u32 id
)
174 size_t idx
= U32_MAX
- id
;
176 lockdep_assert_held(&c
->snapshot_table_lock
);
178 if (likely(idx
< c
->snapshot_table_size
))
179 return &rcu_dereference_protected(c
->snapshots
, true)->s
[idx
];
181 return __snapshot_t_mut(c
, id
);
184 void bch2_snapshot_to_text(struct printbuf
*out
, struct bch_fs
*c
,
187 struct bkey_s_c_snapshot s
= bkey_s_c_to_snapshot(k
);
189 prt_printf(out
, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
190 BCH_SNAPSHOT_SUBVOL(s
.v
),
191 BCH_SNAPSHOT_DELETED(s
.v
),
192 le32_to_cpu(s
.v
->parent
),
193 le32_to_cpu(s
.v
->children
[0]),
194 le32_to_cpu(s
.v
->children
[1]),
195 le32_to_cpu(s
.v
->subvol
),
196 le32_to_cpu(s
.v
->tree
));
198 if (bkey_val_bytes(k
.k
) > offsetof(struct bch_snapshot
, depth
))
199 prt_printf(out
, " depth %u skiplist %u %u %u",
200 le32_to_cpu(s
.v
->depth
),
201 le32_to_cpu(s
.v
->skip
[0]),
202 le32_to_cpu(s
.v
->skip
[1]),
203 le32_to_cpu(s
.v
->skip
[2]));
206 int bch2_snapshot_invalid(struct bch_fs
*c
, struct bkey_s_c k
,
207 enum bkey_invalid_flags flags
,
208 struct printbuf
*err
)
210 struct bkey_s_c_snapshot s
;
214 bkey_fsck_err_on(bkey_gt(k
.k
->p
, POS(0, U32_MAX
)) ||
215 bkey_lt(k
.k
->p
, POS(0, 1)), c
, err
,
219 s
= bkey_s_c_to_snapshot(k
);
221 id
= le32_to_cpu(s
.v
->parent
);
222 bkey_fsck_err_on(id
&& id
<= k
.k
->p
.offset
, c
, err
,
224 "bad parent node (%u <= %llu)",
227 bkey_fsck_err_on(le32_to_cpu(s
.v
->children
[0]) < le32_to_cpu(s
.v
->children
[1]), c
, err
,
228 snapshot_children_not_normalized
,
229 "children not normalized");
231 bkey_fsck_err_on(s
.v
->children
[0] && s
.v
->children
[0] == s
.v
->children
[1], c
, err
,
232 snapshot_child_duplicate
,
233 "duplicate child nodes");
235 for (i
= 0; i
< 2; i
++) {
236 id
= le32_to_cpu(s
.v
->children
[i
]);
238 bkey_fsck_err_on(id
>= k
.k
->p
.offset
, c
, err
,
240 "bad child node (%u >= %llu)",
244 if (bkey_val_bytes(k
.k
) > offsetof(struct bch_snapshot
, skip
)) {
245 bkey_fsck_err_on(le32_to_cpu(s
.v
->skip
[0]) > le32_to_cpu(s
.v
->skip
[1]) ||
246 le32_to_cpu(s
.v
->skip
[1]) > le32_to_cpu(s
.v
->skip
[2]), c
, err
,
247 snapshot_skiplist_not_normalized
,
248 "skiplist not normalized");
250 for (i
= 0; i
< ARRAY_SIZE(s
.v
->skip
); i
++) {
251 id
= le32_to_cpu(s
.v
->skip
[i
]);
253 bkey_fsck_err_on(id
&& id
< le32_to_cpu(s
.v
->parent
), c
, err
,
254 snapshot_skiplist_bad
,
255 "bad skiplist node %u", id
);
262 static void __set_is_ancestor_bitmap(struct bch_fs
*c
, u32 id
)
264 struct snapshot_t
*t
= snapshot_t_mut(c
, id
);
267 while ((parent
= bch2_snapshot_parent_early(c
, parent
)) &&
268 parent
- id
- 1 < IS_ANCESTOR_BITMAP
)
269 __set_bit(parent
- id
- 1, t
->is_ancestor
);
272 static void set_is_ancestor_bitmap(struct bch_fs
*c
, u32 id
)
274 mutex_lock(&c
->snapshot_table_lock
);
275 __set_is_ancestor_bitmap(c
, id
);
276 mutex_unlock(&c
->snapshot_table_lock
);
279 static int __bch2_mark_snapshot(struct btree_trans
*trans
,
280 enum btree_id btree
, unsigned level
,
281 struct bkey_s_c old
, struct bkey_s_c
new,
284 struct bch_fs
*c
= trans
->c
;
285 struct snapshot_t
*t
;
286 u32 id
= new.k
->p
.offset
;
289 mutex_lock(&c
->snapshot_table_lock
);
291 t
= snapshot_t_mut(c
, id
);
293 ret
= -BCH_ERR_ENOMEM_mark_snapshot
;
297 if (new.k
->type
== KEY_TYPE_snapshot
) {
298 struct bkey_s_c_snapshot s
= bkey_s_c_to_snapshot(new);
300 t
->parent
= le32_to_cpu(s
.v
->parent
);
301 t
->children
[0] = le32_to_cpu(s
.v
->children
[0]);
302 t
->children
[1] = le32_to_cpu(s
.v
->children
[1]);
303 t
->subvol
= BCH_SNAPSHOT_SUBVOL(s
.v
) ? le32_to_cpu(s
.v
->subvol
) : 0;
304 t
->tree
= le32_to_cpu(s
.v
->tree
);
306 if (bkey_val_bytes(s
.k
) > offsetof(struct bch_snapshot
, depth
)) {
307 t
->depth
= le32_to_cpu(s
.v
->depth
);
308 t
->skip
[0] = le32_to_cpu(s
.v
->skip
[0]);
309 t
->skip
[1] = le32_to_cpu(s
.v
->skip
[1]);
310 t
->skip
[2] = le32_to_cpu(s
.v
->skip
[2]);
318 __set_is_ancestor_bitmap(c
, id
);
320 if (BCH_SNAPSHOT_DELETED(s
.v
)) {
321 set_bit(BCH_FS_need_delete_dead_snapshots
, &c
->flags
);
322 if (c
->curr_recovery_pass
> BCH_RECOVERY_PASS_delete_dead_snapshots
)
323 bch2_delete_dead_snapshots_async(c
);
326 memset(t
, 0, sizeof(*t
));
329 mutex_unlock(&c
->snapshot_table_lock
);
333 int bch2_mark_snapshot(struct btree_trans
*trans
,
334 enum btree_id btree
, unsigned level
,
335 struct bkey_s_c old
, struct bkey_s
new,
338 return __bch2_mark_snapshot(trans
, btree
, level
, old
, new.s_c
, flags
);
341 int bch2_snapshot_lookup(struct btree_trans
*trans
, u32 id
,
342 struct bch_snapshot
*s
)
344 return bch2_bkey_get_val_typed(trans
, BTREE_ID_snapshots
, POS(0, id
),
345 BTREE_ITER_WITH_UPDATES
, snapshot
, s
);
348 static int bch2_snapshot_live(struct btree_trans
*trans
, u32 id
)
350 struct bch_snapshot v
;
356 ret
= bch2_snapshot_lookup(trans
, id
, &v
);
357 if (bch2_err_matches(ret
, ENOENT
))
358 bch_err(trans
->c
, "snapshot node %u not found", id
);
362 return !BCH_SNAPSHOT_DELETED(&v
);
366 * If @k is a snapshot with just one live child, it's part of a linear chain,
367 * which we consider to be an equivalence class: and then after snapshot
368 * deletion cleanup, there should only be a single key at a given position in
369 * this equivalence class.
371 * This sets the equivalence class of @k to be the child's equivalence class, if
372 * it's part of such a linear chain: this correctly sets equivalence classes on
373 * startup if we run leaf to root (i.e. in natural key order).
375 static int bch2_snapshot_set_equiv(struct btree_trans
*trans
, struct bkey_s_c k
)
377 struct bch_fs
*c
= trans
->c
;
378 unsigned i
, nr_live
= 0, live_idx
= 0;
379 struct bkey_s_c_snapshot snap
;
380 u32 id
= k
.k
->p
.offset
, child
[2];
382 if (k
.k
->type
!= KEY_TYPE_snapshot
)
385 snap
= bkey_s_c_to_snapshot(k
);
387 child
[0] = le32_to_cpu(snap
.v
->children
[0]);
388 child
[1] = le32_to_cpu(snap
.v
->children
[1]);
390 for (i
= 0; i
< 2; i
++) {
391 int ret
= bch2_snapshot_live(trans
, child
[i
]);
401 mutex_lock(&c
->snapshot_table_lock
);
403 snapshot_t_mut(c
, id
)->equiv
= nr_live
== 1
404 ? snapshot_t_mut(c
, child
[live_idx
])->equiv
407 mutex_unlock(&c
->snapshot_table_lock
);
414 static u32
bch2_snapshot_child(struct bch_fs
*c
, u32 id
, unsigned child
)
416 return snapshot_t(c
, id
)->children
[child
];
419 static u32
bch2_snapshot_left_child(struct bch_fs
*c
, u32 id
)
421 return bch2_snapshot_child(c
, id
, 0);
424 static u32
bch2_snapshot_right_child(struct bch_fs
*c
, u32 id
)
426 return bch2_snapshot_child(c
, id
, 1);
429 static u32
bch2_snapshot_tree_next(struct bch_fs
*c
, u32 id
)
433 n
= bch2_snapshot_left_child(c
, id
);
437 while ((parent
= bch2_snapshot_parent(c
, id
))) {
438 n
= bch2_snapshot_right_child(c
, parent
);
447 static u32
bch2_snapshot_tree_oldest_subvol(struct bch_fs
*c
, u32 snapshot_root
)
449 u32 id
= snapshot_root
;
453 s
= snapshot_t(c
, id
)->subvol
;
455 if (s
&& (!subvol
|| s
< subvol
))
458 id
= bch2_snapshot_tree_next(c
, id
);
464 static int bch2_snapshot_tree_master_subvol(struct btree_trans
*trans
,
465 u32 snapshot_root
, u32
*subvol_id
)
467 struct bch_fs
*c
= trans
->c
;
468 struct btree_iter iter
;
473 for_each_btree_key_norestart(trans
, iter
, BTREE_ID_subvolumes
, POS_MIN
,
475 if (k
.k
->type
!= KEY_TYPE_subvolume
)
478 struct bkey_s_c_subvolume s
= bkey_s_c_to_subvolume(k
);
479 if (!bch2_snapshot_is_ancestor(c
, le32_to_cpu(s
.v
->snapshot
), snapshot_root
))
481 if (!BCH_SUBVOLUME_SNAP(s
.v
)) {
482 *subvol_id
= s
.k
->p
.offset
;
488 bch2_trans_iter_exit(trans
, &iter
);
490 if (!ret
&& !found
) {
491 struct bkey_i_subvolume
*u
;
493 *subvol_id
= bch2_snapshot_tree_oldest_subvol(c
, snapshot_root
);
495 u
= bch2_bkey_get_mut_typed(trans
, &iter
,
496 BTREE_ID_subvolumes
, POS(0, *subvol_id
),
498 ret
= PTR_ERR_OR_ZERO(u
);
502 SET_BCH_SUBVOLUME_SNAP(&u
->v
, false);
508 static int check_snapshot_tree(struct btree_trans
*trans
,
509 struct btree_iter
*iter
,
512 struct bch_fs
*c
= trans
->c
;
513 struct bkey_s_c_snapshot_tree st
;
514 struct bch_snapshot s
;
515 struct bch_subvolume subvol
;
516 struct printbuf buf
= PRINTBUF
;
520 if (k
.k
->type
!= KEY_TYPE_snapshot_tree
)
523 st
= bkey_s_c_to_snapshot_tree(k
);
524 root_id
= le32_to_cpu(st
.v
->root_snapshot
);
526 ret
= bch2_snapshot_lookup(trans
, root_id
, &s
);
527 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
530 if (fsck_err_on(ret
||
531 root_id
!= bch2_snapshot_root(c
, root_id
) ||
532 st
.k
->p
.offset
!= le32_to_cpu(s
.tree
),
533 c
, snapshot_tree_to_missing_snapshot
,
534 "snapshot tree points to missing/incorrect snapshot:\n %s",
535 (bch2_bkey_val_to_text(&buf
, c
, st
.s_c
), buf
.buf
))) {
536 ret
= bch2_btree_delete_at(trans
, iter
, 0);
540 ret
= bch2_subvolume_get(trans
, le32_to_cpu(st
.v
->master_subvol
),
542 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
546 c
, snapshot_tree_to_missing_subvol
,
547 "snapshot tree points to missing subvolume:\n %s",
548 (printbuf_reset(&buf
),
549 bch2_bkey_val_to_text(&buf
, c
, st
.s_c
), buf
.buf
)) ||
550 fsck_err_on(!bch2_snapshot_is_ancestor_early(c
,
551 le32_to_cpu(subvol
.snapshot
),
553 c
, snapshot_tree_to_wrong_subvol
,
554 "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
555 (printbuf_reset(&buf
),
556 bch2_bkey_val_to_text(&buf
, c
, st
.s_c
), buf
.buf
)) ||
557 fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol
),
558 c
, snapshot_tree_to_snapshot_subvol
,
559 "snapshot tree points to snapshot subvolume:\n %s",
560 (printbuf_reset(&buf
),
561 bch2_bkey_val_to_text(&buf
, c
, st
.s_c
), buf
.buf
))) {
562 struct bkey_i_snapshot_tree
*u
;
565 ret
= bch2_snapshot_tree_master_subvol(trans
, root_id
, &subvol_id
);
569 u
= bch2_bkey_make_mut_typed(trans
, iter
, &k
, 0, snapshot_tree
);
570 ret
= PTR_ERR_OR_ZERO(u
);
574 u
->v
.master_subvol
= cpu_to_le32(subvol_id
);
575 st
= snapshot_tree_i_to_s_c(u
);
584 * For each snapshot_tree, make sure it points to the root of a snapshot tree
585 * and that snapshot entry points back to it, or delete it.
587 * And, make sure it points to a subvolume within that snapshot tree, or correct
588 * it to point to the oldest subvolume within that snapshot tree.
590 int bch2_check_snapshot_trees(struct bch_fs
*c
)
592 int ret
= bch2_trans_run(c
,
593 for_each_btree_key_commit(trans
, iter
,
594 BTREE_ID_snapshot_trees
, POS_MIN
,
595 BTREE_ITER_PREFETCH
, k
,
596 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
597 check_snapshot_tree(trans
, &iter
, k
)));
603 * Look up snapshot tree for @tree_id and find root,
604 * make sure @snap_id is a descendent:
606 static int snapshot_tree_ptr_good(struct btree_trans
*trans
,
607 u32 snap_id
, u32 tree_id
)
609 struct bch_snapshot_tree s_t
;
610 int ret
= bch2_snapshot_tree_lookup(trans
, tree_id
, &s_t
);
612 if (bch2_err_matches(ret
, ENOENT
))
617 return bch2_snapshot_is_ancestor_early(trans
->c
, snap_id
, le32_to_cpu(s_t
.root_snapshot
));
620 u32
bch2_snapshot_skiplist_get(struct bch_fs
*c
, u32 id
)
622 const struct snapshot_t
*s
;
628 s
= snapshot_t(c
, id
);
630 id
= bch2_snapshot_nth_parent(c
, id
, get_random_u32_below(s
->depth
));
636 static int snapshot_skiplist_good(struct btree_trans
*trans
, u32 id
, struct bch_snapshot s
)
640 for (i
= 0; i
< 3; i
++)
645 if (!bch2_snapshot_is_ancestor_early(trans
->c
, id
, le32_to_cpu(s
.skip
[i
])))
653 * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
654 * its snapshot_tree pointer is correct (allocate new one if necessary), then
655 * update this node's pointer to root node's pointer:
657 static int snapshot_tree_ptr_repair(struct btree_trans
*trans
,
658 struct btree_iter
*iter
,
660 struct bch_snapshot
*s
)
662 struct bch_fs
*c
= trans
->c
;
663 struct btree_iter root_iter
;
664 struct bch_snapshot_tree s_t
;
665 struct bkey_s_c_snapshot root
;
666 struct bkey_i_snapshot
*u
;
667 u32 root_id
= bch2_snapshot_root(c
, k
.k
->p
.offset
), tree_id
;
670 root
= bch2_bkey_get_iter_typed(trans
, &root_iter
,
671 BTREE_ID_snapshots
, POS(0, root_id
),
672 BTREE_ITER_WITH_UPDATES
, snapshot
);
673 ret
= bkey_err(root
);
677 tree_id
= le32_to_cpu(root
.v
->tree
);
679 ret
= bch2_snapshot_tree_lookup(trans
, tree_id
, &s_t
);
680 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
683 if (ret
|| le32_to_cpu(s_t
.root_snapshot
) != root_id
) {
684 u
= bch2_bkey_make_mut_typed(trans
, &root_iter
, &root
.s_c
, 0, snapshot
);
685 ret
= PTR_ERR_OR_ZERO(u
) ?:
686 bch2_snapshot_tree_create(trans
, root_id
,
687 bch2_snapshot_tree_oldest_subvol(c
, root_id
),
692 u
->v
.tree
= cpu_to_le32(tree_id
);
693 if (k
.k
->p
.offset
== root_id
)
697 if (k
.k
->p
.offset
!= root_id
) {
698 u
= bch2_bkey_make_mut_typed(trans
, iter
, &k
, 0, snapshot
);
699 ret
= PTR_ERR_OR_ZERO(u
);
703 u
->v
.tree
= cpu_to_le32(tree_id
);
707 bch2_trans_iter_exit(trans
, &root_iter
);
711 static int check_snapshot(struct btree_trans
*trans
,
712 struct btree_iter
*iter
,
715 struct bch_fs
*c
= trans
->c
;
716 struct bch_snapshot s
;
717 struct bch_subvolume subvol
;
718 struct bch_snapshot v
;
719 struct bkey_i_snapshot
*u
;
720 u32 parent_id
= bch2_snapshot_parent_early(c
, k
.k
->p
.offset
);
722 struct printbuf buf
= PRINTBUF
;
723 bool should_have_subvol
;
727 if (k
.k
->type
!= KEY_TYPE_snapshot
)
730 memset(&s
, 0, sizeof(s
));
731 memcpy(&s
, k
.v
, min(sizeof(s
), bkey_val_bytes(k
.k
)));
733 id
= le32_to_cpu(s
.parent
);
735 ret
= bch2_snapshot_lookup(trans
, id
, &v
);
736 if (bch2_err_matches(ret
, ENOENT
))
737 bch_err(c
, "snapshot with nonexistent parent:\n %s",
738 (bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
));
742 if (le32_to_cpu(v
.children
[0]) != k
.k
->p
.offset
&&
743 le32_to_cpu(v
.children
[1]) != k
.k
->p
.offset
) {
744 bch_err(c
, "snapshot parent %u missing pointer to child %llu",
751 for (i
= 0; i
< 2 && s
.children
[i
]; i
++) {
752 id
= le32_to_cpu(s
.children
[i
]);
754 ret
= bch2_snapshot_lookup(trans
, id
, &v
);
755 if (bch2_err_matches(ret
, ENOENT
))
756 bch_err(c
, "snapshot node %llu has nonexistent child %u",
761 if (le32_to_cpu(v
.parent
) != k
.k
->p
.offset
) {
762 bch_err(c
, "snapshot child %u has wrong parent (got %u should be %llu)",
763 id
, le32_to_cpu(v
.parent
), k
.k
->p
.offset
);
769 should_have_subvol
= BCH_SNAPSHOT_SUBVOL(&s
) &&
770 !BCH_SNAPSHOT_DELETED(&s
);
772 if (should_have_subvol
) {
773 id
= le32_to_cpu(s
.subvol
);
774 ret
= bch2_subvolume_get(trans
, id
, 0, false, &subvol
);
775 if (bch2_err_matches(ret
, ENOENT
))
776 bch_err(c
, "snapshot points to nonexistent subvolume:\n %s",
777 (bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
));
781 if (BCH_SNAPSHOT_SUBVOL(&s
) != (le32_to_cpu(subvol
.snapshot
) == k
.k
->p
.offset
)) {
782 bch_err(c
, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
788 if (fsck_err_on(s
.subvol
,
789 c
, snapshot_should_not_have_subvol
,
790 "snapshot should not point to subvol:\n %s",
791 (bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
))) {
792 u
= bch2_bkey_make_mut_typed(trans
, iter
, &k
, 0, snapshot
);
793 ret
= PTR_ERR_OR_ZERO(u
);
802 ret
= snapshot_tree_ptr_good(trans
, k
.k
->p
.offset
, le32_to_cpu(s
.tree
));
806 if (fsck_err_on(!ret
, c
, snapshot_to_bad_snapshot_tree
,
807 "snapshot points to missing/incorrect tree:\n %s",
808 (bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
))) {
809 ret
= snapshot_tree_ptr_repair(trans
, iter
, k
, &s
);
815 real_depth
= bch2_snapshot_depth(c
, parent_id
);
817 if (fsck_err_on(le32_to_cpu(s
.depth
) != real_depth
,
818 c
, snapshot_bad_depth
,
819 "snapshot with incorrect depth field, should be %u:\n %s",
820 real_depth
, (bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
))) {
821 u
= bch2_bkey_make_mut_typed(trans
, iter
, &k
, 0, snapshot
);
822 ret
= PTR_ERR_OR_ZERO(u
);
826 u
->v
.depth
= cpu_to_le32(real_depth
);
830 ret
= snapshot_skiplist_good(trans
, k
.k
->p
.offset
, s
);
834 if (fsck_err_on(!ret
, c
, snapshot_bad_skiplist
,
835 "snapshot with bad skiplist field:\n %s",
836 (bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
))) {
837 u
= bch2_bkey_make_mut_typed(trans
, iter
, &k
, 0, snapshot
);
838 ret
= PTR_ERR_OR_ZERO(u
);
842 for (i
= 0; i
< ARRAY_SIZE(u
->v
.skip
); i
++)
843 u
->v
.skip
[i
] = cpu_to_le32(bch2_snapshot_skiplist_get(c
, parent_id
));
845 bubble_sort(u
->v
.skip
, ARRAY_SIZE(u
->v
.skip
), cmp_le32
);
855 int bch2_check_snapshots(struct bch_fs
*c
)
858 * We iterate backwards as checking/fixing the depth field requires that
859 * the parent's depth already be correct:
861 int ret
= bch2_trans_run(c
,
862 for_each_btree_key_reverse_commit(trans
, iter
,
863 BTREE_ID_snapshots
, POS_MAX
,
864 BTREE_ITER_PREFETCH
, k
,
865 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
866 check_snapshot(trans
, &iter
, k
)));
872 * Mark a snapshot as deleted, for future cleanup:
874 int bch2_snapshot_node_set_deleted(struct btree_trans
*trans
, u32 id
)
876 struct btree_iter iter
;
877 struct bkey_i_snapshot
*s
;
880 s
= bch2_bkey_get_mut_typed(trans
, &iter
,
881 BTREE_ID_snapshots
, POS(0, id
),
883 ret
= PTR_ERR_OR_ZERO(s
);
885 bch2_fs_inconsistent_on(bch2_err_matches(ret
, ENOENT
),
886 trans
->c
, "missing snapshot %u", id
);
890 /* already deleted? */
891 if (BCH_SNAPSHOT_DELETED(&s
->v
))
894 SET_BCH_SNAPSHOT_DELETED(&s
->v
, true);
895 SET_BCH_SNAPSHOT_SUBVOL(&s
->v
, false);
898 bch2_trans_iter_exit(trans
, &iter
);
902 static inline void normalize_snapshot_child_pointers(struct bch_snapshot
*s
)
904 if (le32_to_cpu(s
->children
[0]) < le32_to_cpu(s
->children
[1]))
905 swap(s
->children
[0], s
->children
[1]);
908 static int bch2_snapshot_node_delete(struct btree_trans
*trans
, u32 id
)
910 struct bch_fs
*c
= trans
->c
;
911 struct btree_iter iter
, p_iter
= (struct btree_iter
) { NULL
};
912 struct btree_iter c_iter
= (struct btree_iter
) { NULL
};
913 struct btree_iter tree_iter
= (struct btree_iter
) { NULL
};
914 struct bkey_s_c_snapshot s
;
915 u32 parent_id
, child_id
;
919 s
= bch2_bkey_get_iter_typed(trans
, &iter
, BTREE_ID_snapshots
, POS(0, id
),
920 BTREE_ITER_INTENT
, snapshot
);
922 bch2_fs_inconsistent_on(bch2_err_matches(ret
, ENOENT
), c
,
923 "missing snapshot %u", id
);
928 BUG_ON(s
.v
->children
[1]);
930 parent_id
= le32_to_cpu(s
.v
->parent
);
931 child_id
= le32_to_cpu(s
.v
->children
[0]);
934 struct bkey_i_snapshot
*parent
;
936 parent
= bch2_bkey_get_mut_typed(trans
, &p_iter
,
937 BTREE_ID_snapshots
, POS(0, parent_id
),
939 ret
= PTR_ERR_OR_ZERO(parent
);
940 bch2_fs_inconsistent_on(bch2_err_matches(ret
, ENOENT
), c
,
941 "missing snapshot %u", parent_id
);
945 /* find entry in parent->children for node being deleted */
946 for (i
= 0; i
< 2; i
++)
947 if (le32_to_cpu(parent
->v
.children
[i
]) == id
)
950 if (bch2_fs_inconsistent_on(i
== 2, c
,
951 "snapshot %u missing child pointer to %u",
955 parent
->v
.children
[i
] = cpu_to_le32(child_id
);
957 normalize_snapshot_child_pointers(&parent
->v
);
961 struct bkey_i_snapshot
*child
;
963 child
= bch2_bkey_get_mut_typed(trans
, &c_iter
,
964 BTREE_ID_snapshots
, POS(0, child_id
),
966 ret
= PTR_ERR_OR_ZERO(child
);
967 bch2_fs_inconsistent_on(bch2_err_matches(ret
, ENOENT
), c
,
968 "missing snapshot %u", child_id
);
972 child
->v
.parent
= cpu_to_le32(parent_id
);
974 if (!child
->v
.parent
) {
975 child
->v
.skip
[0] = 0;
976 child
->v
.skip
[1] = 0;
977 child
->v
.skip
[2] = 0;
983 * We're deleting the root of a snapshot tree: update the
984 * snapshot_tree entry to point to the new root, or delete it if
985 * this is the last snapshot ID in this tree:
987 struct bkey_i_snapshot_tree
*s_t
;
989 BUG_ON(s
.v
->children
[1]);
991 s_t
= bch2_bkey_get_mut_typed(trans
, &tree_iter
,
992 BTREE_ID_snapshot_trees
, POS(0, le32_to_cpu(s
.v
->tree
)),
994 ret
= PTR_ERR_OR_ZERO(s_t
);
998 if (s
.v
->children
[0]) {
999 s_t
->v
.root_snapshot
= s
.v
->children
[0];
1001 s_t
->k
.type
= KEY_TYPE_deleted
;
1002 set_bkey_val_u64s(&s_t
->k
, 0);
1006 ret
= bch2_btree_delete_at(trans
, &iter
, 0);
1008 bch2_trans_iter_exit(trans
, &tree_iter
);
1009 bch2_trans_iter_exit(trans
, &p_iter
);
1010 bch2_trans_iter_exit(trans
, &c_iter
);
1011 bch2_trans_iter_exit(trans
, &iter
);
1015 static int create_snapids(struct btree_trans
*trans
, u32 parent
, u32 tree
,
1017 u32
*snapshot_subvols
,
1018 unsigned nr_snapids
)
1020 struct bch_fs
*c
= trans
->c
;
1021 struct btree_iter iter
;
1022 struct bkey_i_snapshot
*n
;
1025 u32 depth
= bch2_snapshot_depth(c
, parent
);
1028 bch2_trans_iter_init(trans
, &iter
, BTREE_ID_snapshots
,
1029 POS_MIN
, BTREE_ITER_INTENT
);
1030 k
= bch2_btree_iter_peek(&iter
);
1035 for (i
= 0; i
< nr_snapids
; i
++) {
1036 k
= bch2_btree_iter_prev_slot(&iter
);
1041 if (!k
.k
|| !k
.k
->p
.offset
) {
1042 ret
= -BCH_ERR_ENOSPC_snapshot_create
;
1046 n
= bch2_bkey_alloc(trans
, &iter
, 0, snapshot
);
1047 ret
= PTR_ERR_OR_ZERO(n
);
1052 n
->v
.parent
= cpu_to_le32(parent
);
1053 n
->v
.subvol
= cpu_to_le32(snapshot_subvols
[i
]);
1054 n
->v
.tree
= cpu_to_le32(tree
);
1055 n
->v
.depth
= cpu_to_le32(depth
);
1056 n
->v
.btime
.lo
= cpu_to_le64(bch2_current_time(c
));
1059 for (j
= 0; j
< ARRAY_SIZE(n
->v
.skip
); j
++)
1060 n
->v
.skip
[j
] = cpu_to_le32(bch2_snapshot_skiplist_get(c
, parent
));
1062 bubble_sort(n
->v
.skip
, ARRAY_SIZE(n
->v
.skip
), cmp_le32
);
1063 SET_BCH_SNAPSHOT_SUBVOL(&n
->v
, true);
1065 ret
= __bch2_mark_snapshot(trans
, BTREE_ID_snapshots
, 0,
1066 bkey_s_c_null
, bkey_i_to_s_c(&n
->k_i
), 0);
1070 new_snapids
[i
] = iter
.pos
.offset
;
1072 mutex_lock(&c
->snapshot_table_lock
);
1073 snapshot_t_mut(c
, new_snapids
[i
])->equiv
= new_snapids
[i
];
1074 mutex_unlock(&c
->snapshot_table_lock
);
1077 bch2_trans_iter_exit(trans
, &iter
);
1082 * Create new snapshot IDs as children of an existing snapshot ID:
1084 static int bch2_snapshot_node_create_children(struct btree_trans
*trans
, u32 parent
,
1086 u32
*snapshot_subvols
,
1087 unsigned nr_snapids
)
1089 struct btree_iter iter
;
1090 struct bkey_i_snapshot
*n_parent
;
1093 n_parent
= bch2_bkey_get_mut_typed(trans
, &iter
,
1094 BTREE_ID_snapshots
, POS(0, parent
),
1096 ret
= PTR_ERR_OR_ZERO(n_parent
);
1097 if (unlikely(ret
)) {
1098 if (bch2_err_matches(ret
, ENOENT
))
1099 bch_err(trans
->c
, "snapshot %u not found", parent
);
1103 if (n_parent
->v
.children
[0] || n_parent
->v
.children
[1]) {
1104 bch_err(trans
->c
, "Trying to add child snapshot nodes to parent that already has children");
1109 ret
= create_snapids(trans
, parent
, le32_to_cpu(n_parent
->v
.tree
),
1110 new_snapids
, snapshot_subvols
, nr_snapids
);
1114 n_parent
->v
.children
[0] = cpu_to_le32(new_snapids
[0]);
1115 n_parent
->v
.children
[1] = cpu_to_le32(new_snapids
[1]);
1116 n_parent
->v
.subvol
= 0;
1117 SET_BCH_SNAPSHOT_SUBVOL(&n_parent
->v
, false);
1119 bch2_trans_iter_exit(trans
, &iter
);
1124 * Create a snapshot node that is the root of a new tree:
1126 static int bch2_snapshot_node_create_tree(struct btree_trans
*trans
,
1128 u32
*snapshot_subvols
,
1129 unsigned nr_snapids
)
1131 struct bkey_i_snapshot_tree
*n_tree
;
1134 n_tree
= __bch2_snapshot_tree_create(trans
);
1135 ret
= PTR_ERR_OR_ZERO(n_tree
) ?:
1136 create_snapids(trans
, 0, n_tree
->k
.p
.offset
,
1137 new_snapids
, snapshot_subvols
, nr_snapids
);
1141 n_tree
->v
.master_subvol
= cpu_to_le32(snapshot_subvols
[0]);
1142 n_tree
->v
.root_snapshot
= cpu_to_le32(new_snapids
[0]);
1146 int bch2_snapshot_node_create(struct btree_trans
*trans
, u32 parent
,
1148 u32
*snapshot_subvols
,
1149 unsigned nr_snapids
)
1151 BUG_ON((parent
== 0) != (nr_snapids
== 1));
1152 BUG_ON((parent
!= 0) != (nr_snapids
== 2));
1155 ? bch2_snapshot_node_create_children(trans
, parent
,
1156 new_snapids
, snapshot_subvols
, nr_snapids
)
1157 : bch2_snapshot_node_create_tree(trans
,
1158 new_snapids
, snapshot_subvols
, nr_snapids
);
1163 * If we have an unlinked inode in an internal snapshot node, and the inode
1164 * really has been deleted in all child snapshots, how does this get cleaned up?
1166 * first there is the problem of how keys that have been overwritten in all
1167 * child snapshots get deleted (unimplemented?), but inodes may perhaps be
1170 * also: unlinked inode in internal snapshot appears to not be getting deleted
1171 * correctly if inode doesn't exist in leaf snapshots
1175 * for a key in an interior snapshot node that needs work to be done that
1176 * requires it to be mutated: iterate over all descendent leaf nodes and copy
1177 * that key to snapshot leaf nodes, where we can mutate it
1180 static int snapshot_delete_key(struct btree_trans
*trans
,
1181 struct btree_iter
*iter
,
1183 snapshot_id_list
*deleted
,
1184 snapshot_id_list
*equiv_seen
,
1185 struct bpos
*last_pos
)
1187 struct bch_fs
*c
= trans
->c
;
1188 u32 equiv
= bch2_snapshot_equiv(c
, k
.k
->p
.snapshot
);
1190 if (!bkey_eq(k
.k
->p
, *last_pos
))
1194 if (snapshot_list_has_id(deleted
, k
.k
->p
.snapshot
) ||
1195 snapshot_list_has_id(equiv_seen
, equiv
)) {
1196 return bch2_btree_delete_at(trans
, iter
,
1197 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE
);
1199 return snapshot_list_add(c
, equiv_seen
, equiv
);
1203 static int move_key_to_correct_snapshot(struct btree_trans
*trans
,
1204 struct btree_iter
*iter
,
1207 struct bch_fs
*c
= trans
->c
;
1208 u32 equiv
= bch2_snapshot_equiv(c
, k
.k
->p
.snapshot
);
1211 * When we have a linear chain of snapshot nodes, we consider
1212 * those to form an equivalence class: we're going to collapse
1213 * them all down to a single node, and keep the leaf-most node -
1214 * which has the same id as the equivalence class id.
1216 * If there are multiple keys in different snapshots at the same
1217 * position, we're only going to keep the one in the newest
1218 * snapshot - the rest have been overwritten and are redundant,
1219 * and for the key we're going to keep we need to move it to the
1220 * equivalance class ID if it's not there already.
1222 if (equiv
!= k
.k
->p
.snapshot
) {
1223 struct bkey_i
*new = bch2_bkey_make_mut_noupdate(trans
, k
);
1224 struct btree_iter new_iter
;
1227 ret
= PTR_ERR_OR_ZERO(new);
1231 new->k
.p
.snapshot
= equiv
;
1233 bch2_trans_iter_init(trans
, &new_iter
, iter
->btree_id
, new->k
.p
,
1234 BTREE_ITER_ALL_SNAPSHOTS
|
1238 ret
= bch2_btree_iter_traverse(&new_iter
) ?:
1239 bch2_trans_update(trans
, &new_iter
, new,
1240 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE
) ?:
1241 bch2_btree_delete_at(trans
, iter
,
1242 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE
);
1243 bch2_trans_iter_exit(trans
, &new_iter
);
1251 static int bch2_snapshot_needs_delete(struct btree_trans
*trans
, struct bkey_s_c k
)
1253 struct bkey_s_c_snapshot snap
;
1257 if (k
.k
->type
!= KEY_TYPE_snapshot
)
1260 snap
= bkey_s_c_to_snapshot(k
);
1261 if (BCH_SNAPSHOT_DELETED(snap
.v
) ||
1262 BCH_SNAPSHOT_SUBVOL(snap
.v
))
1265 children
[0] = le32_to_cpu(snap
.v
->children
[0]);
1266 children
[1] = le32_to_cpu(snap
.v
->children
[1]);
1268 ret
= bch2_snapshot_live(trans
, children
[0]) ?:
1269 bch2_snapshot_live(trans
, children
[1]);
1276 * For a given snapshot, if it doesn't have a subvolume that points to it, and
1277 * it doesn't have child snapshot nodes - it's now redundant and we can mark it
1280 static int bch2_delete_redundant_snapshot(struct btree_trans
*trans
, struct bkey_s_c k
)
1282 int ret
= bch2_snapshot_needs_delete(trans
, k
);
1286 : bch2_snapshot_node_set_deleted(trans
, k
.k
->p
.offset
);
1289 static inline u32
bch2_snapshot_nth_parent_skip(struct bch_fs
*c
, u32 id
, u32 n
,
1290 snapshot_id_list
*skip
)
1293 while (snapshot_list_has_id(skip
, id
))
1294 id
= __bch2_snapshot_parent(c
, id
);
1298 id
= __bch2_snapshot_parent(c
, id
);
1299 } while (snapshot_list_has_id(skip
, id
));
1306 static int bch2_fix_child_of_deleted_snapshot(struct btree_trans
*trans
,
1307 struct btree_iter
*iter
, struct bkey_s_c k
,
1308 snapshot_id_list
*deleted
)
1310 struct bch_fs
*c
= trans
->c
;
1311 u32 nr_deleted_ancestors
= 0;
1312 struct bkey_i_snapshot
*s
;
1315 if (k
.k
->type
!= KEY_TYPE_snapshot
)
1318 if (snapshot_list_has_id(deleted
, k
.k
->p
.offset
))
1321 s
= bch2_bkey_make_mut_noupdate_typed(trans
, k
, snapshot
);
1322 ret
= PTR_ERR_OR_ZERO(s
);
1326 darray_for_each(*deleted
, i
)
1327 nr_deleted_ancestors
+= bch2_snapshot_is_ancestor(c
, s
->k
.p
.offset
, *i
);
1329 if (!nr_deleted_ancestors
)
1332 le32_add_cpu(&s
->v
.depth
, -nr_deleted_ancestors
);
1339 u32 depth
= le32_to_cpu(s
->v
.depth
);
1340 u32 parent
= bch2_snapshot_parent(c
, s
->k
.p
.offset
);
1342 for (unsigned j
= 0; j
< ARRAY_SIZE(s
->v
.skip
); j
++) {
1343 u32 id
= le32_to_cpu(s
->v
.skip
[j
]);
1345 if (snapshot_list_has_id(deleted
, id
)) {
1346 id
= bch2_snapshot_nth_parent_skip(c
,
1349 ? get_random_u32_below(depth
- 1)
1352 s
->v
.skip
[j
] = cpu_to_le32(id
);
1356 bubble_sort(s
->v
.skip
, ARRAY_SIZE(s
->v
.skip
), cmp_le32
);
1359 return bch2_trans_update(trans
, iter
, &s
->k_i
, 0);
1362 int bch2_delete_dead_snapshots(struct bch_fs
*c
)
1364 struct btree_trans
*trans
;
1365 snapshot_id_list deleted
= { 0 };
1366 snapshot_id_list deleted_interior
= { 0 };
1370 if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots
, &c
->flags
))
1373 if (!test_bit(BCH_FS_started
, &c
->flags
)) {
1374 ret
= bch2_fs_read_write_early(c
);
1375 bch_err_msg(c
, ret
, "deleting dead snapshots: error going rw");
1380 trans
= bch2_trans_get(c
);
1383 * For every snapshot node: If we have no live children and it's not
1384 * pointed to by a subvolume, delete it:
1386 ret
= for_each_btree_key_commit(trans
, iter
, BTREE_ID_snapshots
,
1389 bch2_delete_redundant_snapshot(trans
, k
));
1390 bch_err_msg(c
, ret
, "deleting redundant snapshots");
1394 ret
= for_each_btree_key(trans
, iter
, BTREE_ID_snapshots
,
1396 bch2_snapshot_set_equiv(trans
, k
));
1397 bch_err_msg(c
, ret
, "in bch2_snapshots_set_equiv");
1401 ret
= for_each_btree_key(trans
, iter
, BTREE_ID_snapshots
,
1403 if (k
.k
->type
!= KEY_TYPE_snapshot
)
1406 BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k
).v
)
1407 ? snapshot_list_add(c
, &deleted
, k
.k
->p
.offset
)
1410 bch_err_msg(c
, ret
, "walking snapshots");
1414 for (id
= 0; id
< BTREE_ID_NR
; id
++) {
1415 struct bpos last_pos
= POS_MIN
;
1416 snapshot_id_list equiv_seen
= { 0 };
1417 struct disk_reservation res
= { 0 };
1419 if (!btree_type_has_snapshots(id
))
1423 * deleted inodes btree is maintained by a trigger on the inodes
1424 * btree - no work for us to do here, and it's not safe to scan
1425 * it because we'll see out of date keys due to the btree write
1428 if (id
== BTREE_ID_deleted_inodes
)
1431 ret
= for_each_btree_key_commit(trans
, iter
,
1433 BTREE_ITER_PREFETCH
|BTREE_ITER_ALL_SNAPSHOTS
, k
,
1434 &res
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
1435 snapshot_delete_key(trans
, &iter
, k
, &deleted
, &equiv_seen
, &last_pos
)) ?:
1436 for_each_btree_key_commit(trans
, iter
,
1438 BTREE_ITER_PREFETCH
|BTREE_ITER_ALL_SNAPSHOTS
, k
,
1439 &res
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
1440 move_key_to_correct_snapshot(trans
, &iter
, k
));
1442 bch2_disk_reservation_put(c
, &res
);
1443 darray_exit(&equiv_seen
);
1445 bch_err_msg(c
, ret
, "deleting keys from dying snapshots");
1450 bch2_trans_unlock(trans
);
1451 down_write(&c
->snapshot_create_lock
);
1453 ret
= for_each_btree_key(trans
, iter
, BTREE_ID_snapshots
,
1455 u32 snapshot
= k
.k
->p
.offset
;
1456 u32 equiv
= bch2_snapshot_equiv(c
, snapshot
);
1459 ? snapshot_list_add(c
, &deleted_interior
, snapshot
)
1463 bch_err_msg(c
, ret
, "walking snapshots");
1465 goto err_create_lock
;
1468 * Fixing children of deleted snapshots can't be done completely
1469 * atomically, if we crash between here and when we delete the interior
1470 * nodes some depth fields will be off:
1472 ret
= for_each_btree_key_commit(trans
, iter
, BTREE_ID_snapshots
, POS_MIN
,
1473 BTREE_ITER_INTENT
, k
,
1474 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
1475 bch2_fix_child_of_deleted_snapshot(trans
, &iter
, k
, &deleted_interior
));
1477 goto err_create_lock
;
1479 darray_for_each(deleted
, i
) {
1480 ret
= commit_do(trans
, NULL
, NULL
, 0,
1481 bch2_snapshot_node_delete(trans
, *i
));
1482 bch_err_msg(c
, ret
, "deleting snapshot %u", *i
);
1484 goto err_create_lock
;
1487 darray_for_each(deleted_interior
, i
) {
1488 ret
= commit_do(trans
, NULL
, NULL
, 0,
1489 bch2_snapshot_node_delete(trans
, *i
));
1490 bch_err_msg(c
, ret
, "deleting snapshot %u", *i
);
1492 goto err_create_lock
;
1495 up_write(&c
->snapshot_create_lock
);
1497 darray_exit(&deleted_interior
);
1498 darray_exit(&deleted
);
1499 bch2_trans_put(trans
);
1504 void bch2_delete_dead_snapshots_work(struct work_struct
*work
)
1506 struct bch_fs
*c
= container_of(work
, struct bch_fs
, snapshot_delete_work
);
1508 bch2_delete_dead_snapshots(c
);
1509 bch2_write_ref_put(c
, BCH_WRITE_REF_delete_dead_snapshots
);
1512 void bch2_delete_dead_snapshots_async(struct bch_fs
*c
)
1514 if (bch2_write_ref_tryget(c
, BCH_WRITE_REF_delete_dead_snapshots
) &&
1515 !queue_work(c
->write_ref_wq
, &c
->snapshot_delete_work
))
1516 bch2_write_ref_put(c
, BCH_WRITE_REF_delete_dead_snapshots
);
1519 int __bch2_key_has_snapshot_overwrites(struct btree_trans
*trans
,
1523 struct bch_fs
*c
= trans
->c
;
1524 struct btree_iter iter
;
1528 bch2_trans_iter_init(trans
, &iter
, id
, pos
,
1529 BTREE_ITER_NOT_EXTENTS
|
1530 BTREE_ITER_ALL_SNAPSHOTS
);
1532 k
= bch2_btree_iter_prev(&iter
);
1540 if (!bkey_eq(pos
, k
.k
->p
))
1543 if (bch2_snapshot_is_ancestor(c
, k
.k
->p
.snapshot
, pos
.snapshot
)) {
1548 bch2_trans_iter_exit(trans
, &iter
);
1553 static u32
bch2_snapshot_smallest_child(struct bch_fs
*c
, u32 id
)
1555 const struct snapshot_t
*s
= snapshot_t(c
, id
);
1557 return s
->children
[1] ?: s
->children
[0];
1560 static u32
bch2_snapshot_smallest_descendent(struct bch_fs
*c
, u32 id
)
1564 while ((child
= bch2_snapshot_smallest_child(c
, id
)))
1569 static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans
*trans
,
1570 enum btree_id btree
,
1571 struct bkey_s_c interior_k
,
1572 u32 leaf_id
, struct bpos
*new_min_pos
)
1574 struct btree_iter iter
;
1575 struct bpos pos
= interior_k
.k
->p
;
1580 pos
.snapshot
= leaf_id
;
1582 bch2_trans_iter_init(trans
, &iter
, btree
, pos
, BTREE_ITER_INTENT
);
1583 k
= bch2_btree_iter_peek_slot(&iter
);
1588 /* key already overwritten in this snapshot? */
1589 if (k
.k
->p
.snapshot
!= interior_k
.k
->p
.snapshot
)
1592 if (bpos_eq(*new_min_pos
, POS_MIN
)) {
1593 *new_min_pos
= k
.k
->p
;
1594 new_min_pos
->snapshot
= leaf_id
;
1597 new = bch2_bkey_make_mut_noupdate(trans
, interior_k
);
1598 ret
= PTR_ERR_OR_ZERO(new);
1602 new->k
.p
.snapshot
= leaf_id
;
1603 ret
= bch2_trans_update(trans
, &iter
, new, 0);
1605 bch2_trans_iter_exit(trans
, &iter
);
1609 int bch2_propagate_key_to_snapshot_leaves(struct btree_trans
*trans
,
1610 enum btree_id btree
,
1612 struct bpos
*new_min_pos
)
1614 struct bch_fs
*c
= trans
->c
;
1616 u32 restart_count
= trans
->restart_count
;
1619 bch2_bkey_buf_init(&sk
);
1620 bch2_bkey_buf_reassemble(&sk
, c
, k
);
1621 k
= bkey_i_to_s_c(sk
.k
);
1623 *new_min_pos
= POS_MIN
;
1625 for (u32 id
= bch2_snapshot_smallest_descendent(c
, k
.k
->p
.snapshot
);
1626 id
< k
.k
->p
.snapshot
;
1628 if (!bch2_snapshot_is_ancestor(c
, id
, k
.k
->p
.snapshot
) ||
1629 !bch2_snapshot_is_leaf(c
, id
))
1632 ret
= btree_trans_too_many_iters(trans
) ?:
1633 bch2_propagate_key_to_snapshot_leaf(trans
, btree
, k
, id
, new_min_pos
) ?:
1634 bch2_trans_commit(trans
, NULL
, NULL
, 0);
1635 if (ret
&& bch2_err_matches(ret
, BCH_ERR_transaction_restart
)) {
1636 bch2_trans_begin(trans
);
1644 bch2_bkey_buf_exit(&sk
, c
);
1646 return ret
?: trans_was_restarted(trans
, restart_count
);
1649 static int bch2_check_snapshot_needs_deletion(struct btree_trans
*trans
, struct bkey_s_c k
)
1651 struct bch_fs
*c
= trans
->c
;
1652 struct bkey_s_c_snapshot snap
;
1655 if (k
.k
->type
!= KEY_TYPE_snapshot
)
1658 snap
= bkey_s_c_to_snapshot(k
);
1659 if (BCH_SNAPSHOT_DELETED(snap
.v
) ||
1660 bch2_snapshot_equiv(c
, k
.k
->p
.offset
) != k
.k
->p
.offset
||
1661 (ret
= bch2_snapshot_needs_delete(trans
, k
)) > 0) {
1662 set_bit(BCH_FS_need_delete_dead_snapshots
, &c
->flags
);
1669 int bch2_snapshots_read(struct bch_fs
*c
)
1671 int ret
= bch2_trans_run(c
,
1672 for_each_btree_key(trans
, iter
, BTREE_ID_snapshots
,
1674 __bch2_mark_snapshot(trans
, BTREE_ID_snapshots
, 0, bkey_s_c_null
, k
, 0) ?:
1675 bch2_snapshot_set_equiv(trans
, k
) ?:
1676 bch2_check_snapshot_needs_deletion(trans
, k
)) ?:
1677 for_each_btree_key(trans
, iter
, BTREE_ID_snapshots
,
1679 (set_is_ancestor_bitmap(c
, k
.k
->p
.offset
), 0)));
1684 void bch2_fs_snapshots_exit(struct bch_fs
*c
)
1686 kvfree(rcu_dereference_protected(c
->snapshots
, true));