1 // SPDX-License-Identifier: GPL-2.0
4 #include "btree_update.h"
11 #include "rebalance.h"
13 #include "subvolume.h"
16 #include <linux/sched/signal.h>
18 static inline unsigned bkey_type_to_indirect(const struct bkey
*k
)
22 return KEY_TYPE_reflink_v
;
23 case KEY_TYPE_inline_data
:
24 return KEY_TYPE_indirect_inline_data
;
30 /* reflink pointers */
32 int bch2_reflink_p_invalid(struct bch_fs
*c
, struct bkey_s_c k
,
33 enum bkey_invalid_flags flags
,
36 struct bkey_s_c_reflink_p p
= bkey_s_c_to_reflink_p(k
);
39 bkey_fsck_err_on(le64_to_cpu(p
.v
->idx
) < le32_to_cpu(p
.v
->front_pad
),
40 c
, err
, reflink_p_front_pad_bad
,
41 "idx < front_pad (%llu < %u)",
42 le64_to_cpu(p
.v
->idx
), le32_to_cpu(p
.v
->front_pad
));
47 void bch2_reflink_p_to_text(struct printbuf
*out
, struct bch_fs
*c
,
50 struct bkey_s_c_reflink_p p
= bkey_s_c_to_reflink_p(k
);
52 prt_printf(out
, "idx %llu front_pad %u back_pad %u",
53 le64_to_cpu(p
.v
->idx
),
54 le32_to_cpu(p
.v
->front_pad
),
55 le32_to_cpu(p
.v
->back_pad
));
58 bool bch2_reflink_p_merge(struct bch_fs
*c
, struct bkey_s _l
, struct bkey_s_c _r
)
60 struct bkey_s_reflink_p l
= bkey_s_to_reflink_p(_l
);
61 struct bkey_s_c_reflink_p r
= bkey_s_c_to_reflink_p(_r
);
64 * Disabled for now, the triggers code needs to be reworked for merging
65 * of reflink pointers to work:
69 if (le64_to_cpu(l
.v
->idx
) + l
.k
->size
!= le64_to_cpu(r
.v
->idx
))
72 bch2_key_resize(l
.k
, l
.k
->size
+ r
.k
->size
);
76 static int trans_trigger_reflink_p_segment(struct btree_trans
*trans
,
77 struct bkey_s_c_reflink_p p
,
78 u64
*idx
, unsigned flags
)
80 struct bch_fs
*c
= trans
->c
;
81 struct btree_iter iter
;
84 int add
= !(flags
& BTREE_TRIGGER_OVERWRITE
) ? 1 : -1;
85 struct printbuf buf
= PRINTBUF
;
88 k
= bch2_bkey_get_mut_noupdate(trans
, &iter
,
89 BTREE_ID_reflink
, POS(0, *idx
),
90 BTREE_ITER_WITH_UPDATES
);
91 ret
= PTR_ERR_OR_ZERO(k
);
95 refcount
= bkey_refcount(bkey_i_to_s(k
));
97 bch2_bkey_val_to_text(&buf
, c
, p
.s_c
);
98 bch2_trans_inconsistent(trans
,
99 "nonexistent indirect extent at %llu while marking\n %s",
105 if (!*refcount
&& (flags
& BTREE_TRIGGER_OVERWRITE
)) {
106 bch2_bkey_val_to_text(&buf
, c
, p
.s_c
);
107 bch2_trans_inconsistent(trans
,
108 "indirect extent refcount underflow at %llu while marking\n %s",
114 if (flags
& BTREE_TRIGGER_INSERT
) {
115 struct bch_reflink_p
*v
= (struct bch_reflink_p
*) p
.v
;
118 pad
= max_t(s64
, le32_to_cpu(v
->front_pad
),
119 le64_to_cpu(v
->idx
) - bkey_start_offset(&k
->k
));
120 BUG_ON(pad
> U32_MAX
);
121 v
->front_pad
= cpu_to_le32(pad
);
123 pad
= max_t(s64
, le32_to_cpu(v
->back_pad
),
124 k
->k
.p
.offset
- p
.k
->size
- le64_to_cpu(v
->idx
));
125 BUG_ON(pad
> U32_MAX
);
126 v
->back_pad
= cpu_to_le32(pad
);
129 le64_add_cpu(refcount
, add
);
131 bch2_btree_iter_set_pos_to_extent_start(&iter
);
132 ret
= bch2_trans_update(trans
, &iter
, k
, 0);
136 *idx
= k
->k
.p
.offset
;
138 bch2_trans_iter_exit(trans
, &iter
);
143 static s64
gc_trigger_reflink_p_segment(struct btree_trans
*trans
,
144 struct bkey_s_c_reflink_p p
,
145 u64
*idx
, unsigned flags
, size_t r_idx
)
147 struct bch_fs
*c
= trans
->c
;
148 struct reflink_gc
*r
;
149 int add
= !(flags
& BTREE_TRIGGER_OVERWRITE
) ? 1 : -1;
150 u64 start
= le64_to_cpu(p
.v
->idx
);
151 u64 end
= le64_to_cpu(p
.v
->idx
) + p
.k
->size
;
152 u64 next_idx
= end
+ le32_to_cpu(p
.v
->back_pad
);
154 struct printbuf buf
= PRINTBUF
;
156 if (r_idx
>= c
->reflink_gc_nr
)
159 r
= genradix_ptr(&c
->reflink_gc_table
, r_idx
);
160 next_idx
= min(next_idx
, r
->offset
- r
->size
);
164 BUG_ON((s64
) r
->refcount
+ add
< 0);
170 if (fsck_err(c
, reflink_p_to_missing_reflink_v
,
171 "pointer to missing indirect extent\n"
173 " missing range %llu-%llu",
174 (bch2_bkey_val_to_text(&buf
, c
, p
.s_c
), buf
.buf
),
176 struct bkey_i
*update
= bch2_bkey_make_mut_noupdate(trans
, p
.s_c
);
177 ret
= PTR_ERR_OR_ZERO(update
);
181 if (next_idx
<= start
) {
182 bkey_i_to_reflink_p(update
)->v
.front_pad
= cpu_to_le32(start
- next_idx
);
183 } else if (*idx
>= end
) {
184 bkey_i_to_reflink_p(update
)->v
.back_pad
= cpu_to_le32(*idx
- end
);
186 bkey_error_init(update
);
187 update
->k
.p
= p
.k
->p
;
188 update
->k
.p
.offset
= next_idx
;
189 update
->k
.size
= next_idx
- *idx
;
190 set_bkey_val_u64s(&update
->k
, 0);
193 ret
= bch2_btree_insert_trans(trans
, BTREE_ID_extents
, update
, BTREE_TRIGGER_NORUN
);
203 static int __trigger_reflink_p(struct btree_trans
*trans
,
204 enum btree_id btree_id
, unsigned level
,
205 struct bkey_s_c k
, unsigned flags
)
207 struct bch_fs
*c
= trans
->c
;
208 struct bkey_s_c_reflink_p p
= bkey_s_c_to_reflink_p(k
);
211 u64 idx
= le64_to_cpu(p
.v
->idx
) - le32_to_cpu(p
.v
->front_pad
);
212 u64 end
= le64_to_cpu(p
.v
->idx
) + p
.k
->size
+ le32_to_cpu(p
.v
->back_pad
);
214 if (flags
& BTREE_TRIGGER_TRANSACTIONAL
) {
215 while (idx
< end
&& !ret
)
216 ret
= trans_trigger_reflink_p_segment(trans
, p
, &idx
, flags
);
219 if (flags
& BTREE_TRIGGER_GC
) {
220 size_t l
= 0, r
= c
->reflink_gc_nr
;
223 size_t m
= l
+ (r
- l
) / 2;
224 struct reflink_gc
*ref
= genradix_ptr(&c
->reflink_gc_table
, m
);
225 if (ref
->offset
<= idx
)
231 while (idx
< end
&& !ret
)
232 ret
= gc_trigger_reflink_p_segment(trans
, p
, &idx
, flags
, l
++);
238 int bch2_trigger_reflink_p(struct btree_trans
*trans
,
239 enum btree_id btree_id
, unsigned level
,
244 if ((flags
& BTREE_TRIGGER_TRANSACTIONAL
) &&
245 (flags
& BTREE_TRIGGER_INSERT
)) {
246 struct bch_reflink_p
*v
= bkey_s_to_reflink_p(new).v
;
248 v
->front_pad
= v
->back_pad
= 0;
251 return trigger_run_overwrite_then_insert(__trigger_reflink_p
, trans
, btree_id
, level
, old
, new, flags
);
254 /* indirect extents */
256 int bch2_reflink_v_invalid(struct bch_fs
*c
, struct bkey_s_c k
,
257 enum bkey_invalid_flags flags
,
258 struct printbuf
*err
)
260 return bch2_bkey_ptrs_invalid(c
, k
, flags
, err
);
263 void bch2_reflink_v_to_text(struct printbuf
*out
, struct bch_fs
*c
,
266 struct bkey_s_c_reflink_v r
= bkey_s_c_to_reflink_v(k
);
268 prt_printf(out
, "refcount: %llu ", le64_to_cpu(r
.v
->refcount
));
270 bch2_bkey_ptrs_to_text(out
, c
, k
);
274 Currently disabled
, needs to be debugged
:
276 bool bch2_reflink_v_merge(struct bch_fs
*c
, struct bkey_s _l
, struct bkey_s_c _r
)
278 struct bkey_s_reflink_v l
= bkey_s_to_reflink_v(_l
);
279 struct bkey_s_c_reflink_v r
= bkey_s_c_to_reflink_v(_r
);
281 return l
.v
->refcount
== r
.v
->refcount
&& bch2_extent_merge(c
, _l
, _r
);
285 static inline void check_indirect_extent_deleting(struct bkey_s
new, unsigned *flags
)
287 if ((*flags
& BTREE_TRIGGER_INSERT
) && !*bkey_refcount(new)) {
288 new.k
->type
= KEY_TYPE_deleted
;
290 set_bkey_val_u64s(new.k
, 0);
291 *flags
&= ~BTREE_TRIGGER_INSERT
;
295 int bch2_trigger_reflink_v(struct btree_trans
*trans
,
296 enum btree_id btree_id
, unsigned level
,
297 struct bkey_s_c old
, struct bkey_s
new,
300 if ((flags
& BTREE_TRIGGER_TRANSACTIONAL
) &&
301 (flags
& BTREE_TRIGGER_INSERT
))
302 check_indirect_extent_deleting(new, &flags
);
304 return bch2_trigger_extent(trans
, btree_id
, level
, old
, new, flags
);
307 /* indirect inline data */
309 int bch2_indirect_inline_data_invalid(struct bch_fs
*c
, struct bkey_s_c k
,
310 enum bkey_invalid_flags flags
,
311 struct printbuf
*err
)
316 void bch2_indirect_inline_data_to_text(struct printbuf
*out
,
317 struct bch_fs
*c
, struct bkey_s_c k
)
319 struct bkey_s_c_indirect_inline_data d
= bkey_s_c_to_indirect_inline_data(k
);
320 unsigned datalen
= bkey_inline_data_bytes(k
.k
);
322 prt_printf(out
, "refcount %llu datalen %u: %*phN",
323 le64_to_cpu(d
.v
->refcount
), datalen
,
324 min(datalen
, 32U), d
.v
->data
);
327 int bch2_trigger_indirect_inline_data(struct btree_trans
*trans
,
328 enum btree_id btree_id
, unsigned level
,
329 struct bkey_s_c old
, struct bkey_s
new,
332 check_indirect_extent_deleting(new, &flags
);
337 static int bch2_make_extent_indirect(struct btree_trans
*trans
,
338 struct btree_iter
*extent_iter
,
341 struct bch_fs
*c
= trans
->c
;
342 struct btree_iter reflink_iter
= { NULL
};
345 struct bkey_i_reflink_p
*r_p
;
349 if (orig
->k
.type
== KEY_TYPE_inline_data
)
350 bch2_check_set_feature(c
, BCH_FEATURE_reflink_inline_data
);
352 bch2_trans_iter_init(trans
, &reflink_iter
, BTREE_ID_reflink
, POS_MAX
,
354 k
= bch2_btree_iter_peek_prev(&reflink_iter
);
359 r_v
= bch2_trans_kmalloc(trans
, sizeof(__le64
) + bkey_bytes(&orig
->k
));
360 ret
= PTR_ERR_OR_ZERO(r_v
);
365 r_v
->k
.type
= bkey_type_to_indirect(&orig
->k
);
366 r_v
->k
.p
= reflink_iter
.pos
;
367 bch2_key_resize(&r_v
->k
, orig
->k
.size
);
368 r_v
->k
.version
= orig
->k
.version
;
370 set_bkey_val_bytes(&r_v
->k
, sizeof(__le64
) + bkey_val_bytes(&orig
->k
));
372 refcount
= bkey_refcount(bkey_i_to_s(r_v
));
374 memcpy(refcount
+ 1, &orig
->v
, bkey_val_bytes(&orig
->k
));
376 ret
= bch2_trans_update(trans
, &reflink_iter
, r_v
, 0);
381 * orig is in a bkey_buf which statically allocates 5 64s for the val,
382 * so we know it will be big enough:
384 orig
->k
.type
= KEY_TYPE_reflink_p
;
385 r_p
= bkey_i_to_reflink_p(orig
);
386 set_bkey_val_bytes(&r_p
->k
, sizeof(r_p
->v
));
388 /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */
389 #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
390 __underlying_memset(&r_p
->v
, 0, sizeof(r_p
->v
));
392 memset(&r_p
->v
, 0, sizeof(r_p
->v
));
395 r_p
->v
.idx
= cpu_to_le64(bkey_start_offset(&r_v
->k
));
397 ret
= bch2_trans_update(trans
, extent_iter
, &r_p
->k_i
,
398 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE
);
400 bch2_trans_iter_exit(trans
, &reflink_iter
);
405 static struct bkey_s_c
get_next_src(struct btree_iter
*iter
, struct bpos end
)
410 for_each_btree_key_upto_continue_norestart(*iter
, end
, 0, k
, ret
) {
411 if (bkey_extent_is_unwritten(k
))
414 if (bkey_extent_is_data(k
.k
))
418 if (bkey_ge(iter
->pos
, end
))
419 bch2_btree_iter_set_pos(iter
, end
);
420 return ret
? bkey_s_c_err(ret
) : bkey_s_c_null
;
423 s64
bch2_remap_range(struct bch_fs
*c
,
424 subvol_inum dst_inum
, u64 dst_offset
,
425 subvol_inum src_inum
, u64 src_offset
,
427 u64 new_i_size
, s64
*i_sectors_delta
)
429 struct btree_trans
*trans
;
430 struct btree_iter dst_iter
, src_iter
;
431 struct bkey_s_c src_k
;
432 struct bkey_buf new_dst
, new_src
;
433 struct bpos dst_start
= POS(dst_inum
.inum
, dst_offset
);
434 struct bpos src_start
= POS(src_inum
.inum
, src_offset
);
435 struct bpos dst_end
= dst_start
, src_end
= src_start
;
436 struct bch_io_opts opts
;
437 struct bpos src_want
;
439 u32 dst_snapshot
, src_snapshot
;
440 int ret
= 0, ret2
= 0;
442 if (!bch2_write_ref_tryget(c
, BCH_WRITE_REF_reflink
))
443 return -BCH_ERR_erofs_no_writes
;
445 bch2_check_set_feature(c
, BCH_FEATURE_reflink
);
447 dst_end
.offset
+= remap_sectors
;
448 src_end
.offset
+= remap_sectors
;
450 bch2_bkey_buf_init(&new_dst
);
451 bch2_bkey_buf_init(&new_src
);
452 trans
= bch2_trans_get(c
);
454 ret
= bch2_inum_opts_get(trans
, src_inum
, &opts
);
458 bch2_trans_iter_init(trans
, &src_iter
, BTREE_ID_extents
, src_start
,
460 bch2_trans_iter_init(trans
, &dst_iter
, BTREE_ID_extents
, dst_start
,
464 bch2_err_matches(ret
, BCH_ERR_transaction_restart
)) &&
465 bkey_lt(dst_iter
.pos
, dst_end
)) {
466 struct disk_reservation disk_res
= { 0 };
468 bch2_trans_begin(trans
);
470 if (fatal_signal_pending(current
)) {
475 ret
= bch2_subvolume_get_snapshot(trans
, src_inum
.subvol
,
480 bch2_btree_iter_set_snapshot(&src_iter
, src_snapshot
);
482 ret
= bch2_subvolume_get_snapshot(trans
, dst_inum
.subvol
,
487 bch2_btree_iter_set_snapshot(&dst_iter
, dst_snapshot
);
489 if (dst_inum
.inum
< src_inum
.inum
) {
490 /* Avoid some lock cycle transaction restarts */
491 ret
= bch2_btree_iter_traverse(&dst_iter
);
496 dst_done
= dst_iter
.pos
.offset
- dst_start
.offset
;
497 src_want
= POS(src_start
.inode
, src_start
.offset
+ dst_done
);
498 bch2_btree_iter_set_pos(&src_iter
, src_want
);
500 src_k
= get_next_src(&src_iter
, src_end
);
501 ret
= bkey_err(src_k
);
505 if (bkey_lt(src_want
, src_iter
.pos
)) {
506 ret
= bch2_fpunch_at(trans
, &dst_iter
, dst_inum
,
508 dst_iter
.pos
.offset
+
509 src_iter
.pos
.offset
- src_want
.offset
),
514 if (src_k
.k
->type
!= KEY_TYPE_reflink_p
) {
515 bch2_btree_iter_set_pos_to_extent_start(&src_iter
);
517 bch2_bkey_buf_reassemble(&new_src
, c
, src_k
);
518 src_k
= bkey_i_to_s_c(new_src
.k
);
520 ret
= bch2_make_extent_indirect(trans
, &src_iter
,
525 BUG_ON(src_k
.k
->type
!= KEY_TYPE_reflink_p
);
528 if (src_k
.k
->type
== KEY_TYPE_reflink_p
) {
529 struct bkey_s_c_reflink_p src_p
=
530 bkey_s_c_to_reflink_p(src_k
);
531 struct bkey_i_reflink_p
*dst_p
=
532 bkey_reflink_p_init(new_dst
.k
);
534 u64 offset
= le64_to_cpu(src_p
.v
->idx
) +
536 bkey_start_offset(src_k
.k
));
538 dst_p
->v
.idx
= cpu_to_le64(offset
);
543 new_dst
.k
->k
.p
= dst_iter
.pos
;
544 bch2_key_resize(&new_dst
.k
->k
,
545 min(src_k
.k
->p
.offset
- src_want
.offset
,
546 dst_end
.offset
- dst_iter
.pos
.offset
));
548 ret
= bch2_bkey_set_needs_rebalance(c
, new_dst
.k
,
549 opts
.background_target
,
550 opts
.background_compression
) ?:
551 bch2_extent_update(trans
, dst_inum
, &dst_iter
,
552 new_dst
.k
, &disk_res
,
553 new_i_size
, i_sectors_delta
,
555 bch2_disk_reservation_put(c
, &disk_res
);
557 bch2_trans_iter_exit(trans
, &dst_iter
);
558 bch2_trans_iter_exit(trans
, &src_iter
);
560 BUG_ON(!ret
&& !bkey_eq(dst_iter
.pos
, dst_end
));
561 BUG_ON(bkey_gt(dst_iter
.pos
, dst_end
));
563 dst_done
= dst_iter
.pos
.offset
- dst_start
.offset
;
564 new_i_size
= min(dst_iter
.pos
.offset
<< 9, new_i_size
);
567 struct bch_inode_unpacked inode_u
;
568 struct btree_iter inode_iter
= { NULL
};
570 bch2_trans_begin(trans
);
572 ret2
= bch2_inode_peek(trans
, &inode_iter
, &inode_u
,
573 dst_inum
, BTREE_ITER_INTENT
);
576 inode_u
.bi_size
< new_i_size
) {
577 inode_u
.bi_size
= new_i_size
;
578 ret2
= bch2_inode_write(trans
, &inode_iter
, &inode_u
) ?:
579 bch2_trans_commit(trans
, NULL
, NULL
,
580 BCH_TRANS_COMMIT_no_enospc
);
583 bch2_trans_iter_exit(trans
, &inode_iter
);
584 } while (bch2_err_matches(ret2
, BCH_ERR_transaction_restart
));
586 bch2_trans_put(trans
);
587 bch2_bkey_buf_exit(&new_src
, c
);
588 bch2_bkey_buf_exit(&new_dst
, c
);
590 bch2_write_ref_put(c
, BCH_WRITE_REF_reflink
);
592 return dst_done
?: ret
?: ret2
;