]>
Commit | Line | Data |
---|---|---|
9888c340 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
56bec294 CM |
2 | /* |
3 | * Copyright (C) 2008 Oracle. All rights reserved. | |
56bec294 | 4 | */ |
9888c340 DS |
5 | |
6 | #ifndef BTRFS_DELAYED_REF_H | |
7 | #define BTRFS_DELAYED_REF_H | |
56bec294 | 8 | |
6df8cdf5 ER |
9 | #include <linux/refcount.h> |
10 | ||
44a075bd | 11 | /* these are the possible values of struct btrfs_delayed_ref_node->action */ |
321f4992 DS |
12 | enum btrfs_delayed_ref_action { |
13 | /* Add one backref to the tree */ | |
14 | BTRFS_ADD_DELAYED_REF = 1, | |
15 | /* Delete one backref from the tree */ | |
16 | BTRFS_DROP_DELAYED_REF, | |
17 | /* Record a full extent allocation */ | |
18 | BTRFS_ADD_DELAYED_EXTENT, | |
19 | /* Not changing ref count on head ref */ | |
20 | BTRFS_UPDATE_DELAYED_HEAD, | |
21 | } __packed; | |
56bec294 CM |
22 | |
23 | struct btrfs_delayed_ref_node { | |
0e0adbcf | 24 | struct rb_node ref_node; |
1d57ee94 WX |
25 | /* |
26 | * If action is BTRFS_ADD_DELAYED_REF, also link this node to | |
27 | * ref_head->ref_add_list, then we do not need to iterate the | |
28 | * whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes. | |
29 | */ | |
30 | struct list_head add_list; | |
c6fc2454 | 31 | |
56bec294 CM |
32 | /* the starting bytenr of the extent */ |
33 | u64 bytenr; | |
34 | ||
56bec294 CM |
35 | /* the size of the extent */ |
36 | u64 num_bytes; | |
37 | ||
00f04b88 AJ |
38 | /* seq number to keep track of insertion order */ |
39 | u64 seq; | |
40 | ||
56bec294 | 41 | /* ref count on this data structure */ |
6df8cdf5 | 42 | refcount_t refs; |
56bec294 CM |
43 | |
44 | /* | |
45 | * how many refs is this entry adding or deleting. For | |
46 | * head refs, this may be a negative number because it is keeping | |
47 | * track of the total mods done to the reference count. | |
48 | * For individual refs, this will always be a positive number | |
49 | * | |
50 | * It may be more than one, since it is possible for a single | |
51 | * parent to have more than one ref on an extent | |
52 | */ | |
53 | int ref_mod; | |
54 | ||
5d4f98a2 YZ |
55 | unsigned int action:8; |
56 | unsigned int type:8; | |
56bec294 CM |
57 | }; |
58 | ||
5d4f98a2 YZ |
59 | struct btrfs_delayed_extent_op { |
60 | struct btrfs_disk_key key; | |
35b3ad50 DS |
61 | u8 level; |
62 | bool update_key; | |
63 | bool update_flags; | |
5d4f98a2 | 64 | u64 flags_to_set; |
5d4f98a2 YZ |
65 | }; |
66 | ||
56bec294 CM |
67 | /* |
68 | * the head refs are used to hold a lock on a given extent, which allows us | |
69 | * to make sure that only one process is running the delayed refs | |
70 | * at a time for a single extent. They also store the sum of all the | |
71 | * reference count modifications we've queued up. | |
72 | */ | |
73 | struct btrfs_delayed_ref_head { | |
d278850e JB |
74 | u64 bytenr; |
75 | u64 num_bytes; | |
315dd5cc FM |
76 | /* |
77 | * For insertion into struct btrfs_delayed_ref_root::href_root. | |
78 | * Keep it in the same cache line as 'bytenr' for more efficient | |
79 | * searches in the rbtree. | |
80 | */ | |
81 | struct rb_node href_node; | |
56bec294 CM |
82 | /* |
83 | * the mutex is held while running the refs, and it is also | |
84 | * held when checking the sum of reference modifications. | |
85 | */ | |
86 | struct mutex mutex; | |
87 | ||
315dd5cc FM |
88 | refcount_t refs; |
89 | ||
90 | /* Protects 'ref_tree' and 'ref_add_list'. */ | |
d7df2c79 | 91 | spinlock_t lock; |
e3d03965 | 92 | struct rb_root_cached ref_tree; |
1d57ee94 WX |
93 | /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ |
94 | struct list_head ref_add_list; | |
c3e69d58 | 95 | |
5d4f98a2 | 96 | struct btrfs_delayed_extent_op *extent_op; |
1262133b JB |
97 | |
98 | /* | |
99 | * This is used to track the final ref_mod from all the refs associated | |
100 | * with this head ref, this is not adjusted as delayed refs are run, | |
101 | * this is meant to track if we need to do the csum accounting or not. | |
102 | */ | |
103 | int total_ref_mod; | |
104 | ||
d278850e JB |
105 | /* |
106 | * This is the current outstanding mod references for this bytenr. This | |
107 | * is used with lookup_extent_info to get an accurate reference count | |
108 | * for a bytenr, so it is adjusted as delayed refs are run so that any | |
109 | * on disk reference count + ref_mod is accurate. | |
110 | */ | |
111 | int ref_mod; | |
112 | ||
cf79ac47 BB |
113 | /* |
114 | * The root that triggered the allocation when must_insert_reserved is | |
115 | * set to true. | |
116 | */ | |
117 | u64 owning_root; | |
118 | ||
cecbb533 BB |
119 | /* |
120 | * Track reserved bytes when setting must_insert_reserved. On success | |
121 | * or cleanup, we will need to free the reservation. | |
122 | */ | |
123 | u64 reserved_bytes; | |
124 | ||
56bec294 CM |
125 | /* |
126 | * when a new extent is allocated, it is just reserved in memory | |
127 | * The actual extent isn't inserted into the extent allocation tree | |
128 | * until the delayed ref is processed. must_insert_reserved is | |
129 | * used to flag a delayed ref so the accounting can be updated | |
130 | * when a full insert is done. | |
131 | * | |
132 | * It is possible the extent will be freed before it is ever | |
133 | * inserted into the extent allocation tree. In this case | |
134 | * we need to update the in ram accounting to properly reflect | |
135 | * the free has happened. | |
136 | */ | |
61c681fe | 137 | bool must_insert_reserved; |
cf79ac47 | 138 | |
61c681fe FM |
139 | bool is_data; |
140 | bool is_system; | |
141 | bool processing; | |
56bec294 CM |
142 | }; |
143 | ||
5d4f98a2 | 144 | struct btrfs_delayed_tree_ref { |
56bec294 | 145 | struct btrfs_delayed_ref_node node; |
eebe063b AJ |
146 | u64 root; |
147 | u64 parent; | |
5d4f98a2 YZ |
148 | int level; |
149 | }; | |
56bec294 | 150 | |
5d4f98a2 YZ |
151 | struct btrfs_delayed_data_ref { |
152 | struct btrfs_delayed_ref_node node; | |
eebe063b AJ |
153 | u64 root; |
154 | u64 parent; | |
5d4f98a2 YZ |
155 | u64 objectid; |
156 | u64 offset; | |
56bec294 CM |
157 | }; |
158 | ||
e19eb11f JB |
159 | enum btrfs_delayed_ref_flags { |
160 | /* Indicate that we are flushing delayed refs for the commit */ | |
161 | BTRFS_DELAYED_REFS_FLUSHING, | |
162 | }; | |
163 | ||
56bec294 | 164 | struct btrfs_delayed_ref_root { |
c46effa6 | 165 | /* head ref rbtree */ |
5c9d028b | 166 | struct rb_root_cached href_root; |
c46effa6 | 167 | |
3368d001 QW |
168 | /* dirty extent records */ |
169 | struct rb_root dirty_extent_root; | |
170 | ||
56bec294 CM |
171 | /* this spin lock protects the rbtree and the entries inside */ |
172 | spinlock_t lock; | |
173 | ||
174 | /* how many delayed ref updates we've queued, used by the | |
175 | * throttling code | |
176 | */ | |
d7df2c79 | 177 | atomic_t num_entries; |
56bec294 | 178 | |
c3e69d58 CM |
179 | /* total number of head nodes in tree */ |
180 | unsigned long num_heads; | |
181 | ||
182 | /* total number of head nodes ready for processing */ | |
183 | unsigned long num_heads_ready; | |
184 | ||
1262133b JB |
185 | u64 pending_csums; |
186 | ||
e19eb11f | 187 | unsigned long flags; |
c3e69d58 CM |
188 | |
189 | u64 run_delayed_start; | |
9086db86 QW |
190 | |
191 | /* | |
192 | * To make qgroup to skip given root. | |
01327610 | 193 | * This is for snapshot, as btrfs_qgroup_inherit() will manually |
9086db86 QW |
194 | * modify counters for snapshot and its source, so we should skip |
195 | * the snapshot in new_root/old_roots or it will get calculated twice | |
196 | */ | |
197 | u64 qgroup_to_skip; | |
56bec294 CM |
198 | }; |
199 | ||
b28b1f0c QW |
200 | enum btrfs_ref_type { |
201 | BTRFS_REF_NOT_SET, | |
202 | BTRFS_REF_DATA, | |
203 | BTRFS_REF_METADATA, | |
204 | BTRFS_REF_LAST, | |
321f4992 | 205 | } __packed; |
b28b1f0c QW |
206 | |
207 | struct btrfs_data_ref { | |
208 | /* For EXTENT_DATA_REF */ | |
209 | ||
610647d7 BB |
210 | /* Root which owns this data reference. */ |
211 | u64 ref_root; | |
b28b1f0c QW |
212 | |
213 | /* Inode which refers to this data extent */ | |
214 | u64 ino; | |
215 | ||
216 | /* | |
217 | * file_offset - extent_offset | |
218 | * | |
219 | * file_offset is the key.offset of the EXTENT_DATA key. | |
220 | * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data. | |
221 | */ | |
222 | u64 offset; | |
223 | }; | |
224 | ||
225 | struct btrfs_tree_ref { | |
226 | /* | |
227 | * Level of this tree block | |
228 | * | |
229 | * Shared for skinny (TREE_BLOCK_REF) and normal tree ref. | |
230 | */ | |
231 | int level; | |
232 | ||
233 | /* | |
610647d7 | 234 | * Root which owns this tree block reference. |
b28b1f0c QW |
235 | * |
236 | * For TREE_BLOCK_REF (skinny metadata, either inline or keyed) | |
237 | */ | |
610647d7 | 238 | u64 ref_root; |
b28b1f0c QW |
239 | |
240 | /* For non-skinny metadata, no special member needed */ | |
241 | }; | |
242 | ||
243 | struct btrfs_ref { | |
244 | enum btrfs_ref_type type; | |
321f4992 | 245 | enum btrfs_delayed_ref_action action; |
b28b1f0c QW |
246 | |
247 | /* | |
248 | * Whether this extent should go through qgroup record. | |
249 | * | |
250 | * Normally false, but for certain cases like delayed subtree scan, | |
251 | * setting this flag can hugely reduce qgroup overhead. | |
252 | */ | |
253 | bool skip_qgroup; | |
254 | ||
eed2037f NB |
255 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY |
256 | /* Through which root is this modification. */ | |
b28b1f0c | 257 | u64 real_root; |
eed2037f | 258 | #endif |
b28b1f0c QW |
259 | u64 bytenr; |
260 | u64 len; | |
457cb1dd | 261 | u64 owning_root; |
b28b1f0c QW |
262 | |
263 | /* Bytenr of the parent tree block */ | |
264 | u64 parent; | |
265 | union { | |
266 | struct btrfs_data_ref data_ref; | |
267 | struct btrfs_tree_ref tree_ref; | |
268 | }; | |
269 | }; | |
270 | ||
78a6184a MX |
271 | extern struct kmem_cache *btrfs_delayed_ref_head_cachep; |
272 | extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; | |
273 | extern struct kmem_cache *btrfs_delayed_data_ref_cachep; | |
274 | extern struct kmem_cache *btrfs_delayed_extent_op_cachep; | |
275 | ||
f5c29bd9 | 276 | int __init btrfs_delayed_ref_init(void); |
e67c718b | 277 | void __cold btrfs_delayed_ref_exit(void); |
78a6184a | 278 | |
0e55a545 FM |
279 | static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_info, |
280 | int num_delayed_refs) | |
281 | { | |
282 | u64 num_bytes; | |
283 | ||
284 | num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_delayed_refs); | |
285 | ||
286 | /* | |
287 | * We have to check the mount option here because we could be enabling | |
288 | * the free space tree for the first time and don't have the compat_ro | |
289 | * option set yet. | |
290 | * | |
291 | * We need extra reservations if we have the free space tree because | |
292 | * we'll have to modify that tree as well. | |
293 | */ | |
294 | if (btrfs_test_opt(fs_info, FREE_SPACE_TREE)) | |
295 | num_bytes *= 2; | |
296 | ||
297 | return num_bytes; | |
298 | } | |
299 | ||
adb86dbe FM |
300 | static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *fs_info, |
301 | int num_csum_items) | |
302 | { | |
303 | /* | |
304 | * Deleting csum items does not result in new nodes/leaves and does not | |
305 | * require changing the free space tree, only the csum tree, so this is | |
306 | * all we need. | |
307 | */ | |
308 | return btrfs_calc_metadata_size(fs_info, num_csum_items); | |
309 | } | |
310 | ||
b28b1f0c | 311 | static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, |
457cb1dd BB |
312 | int action, u64 bytenr, u64 len, |
313 | u64 parent, u64 owning_root) | |
b28b1f0c QW |
314 | { |
315 | generic_ref->action = action; | |
316 | generic_ref->bytenr = bytenr; | |
317 | generic_ref->len = len; | |
318 | generic_ref->parent = parent; | |
457cb1dd | 319 | generic_ref->owning_root = owning_root; |
b28b1f0c QW |
320 | } |
321 | ||
457cb1dd BB |
322 | static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, |
323 | u64 root, u64 mod_root, bool skip_qgroup) | |
b28b1f0c | 324 | { |
eed2037f | 325 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY |
b28b1f0c | 326 | /* If @real_root not set, use @root as fallback */ |
eed2037f NB |
327 | generic_ref->real_root = mod_root ?: root; |
328 | #endif | |
b28b1f0c | 329 | generic_ref->tree_ref.level = level; |
610647d7 | 330 | generic_ref->tree_ref.ref_root = root; |
b28b1f0c | 331 | generic_ref->type = BTRFS_REF_METADATA; |
681145d4 NB |
332 | if (skip_qgroup || !(is_fstree(root) && |
333 | (!mod_root || is_fstree(mod_root)))) | |
334 | generic_ref->skip_qgroup = true; | |
335 | else | |
336 | generic_ref->skip_qgroup = false; | |
337 | ||
b28b1f0c QW |
338 | } |
339 | ||
340 | static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, | |
f42c5da6 NB |
341 | u64 ref_root, u64 ino, u64 offset, u64 mod_root, |
342 | bool skip_qgroup) | |
b28b1f0c | 343 | { |
eed2037f | 344 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY |
b28b1f0c | 345 | /* If @real_root not set, use @root as fallback */ |
eed2037f NB |
346 | generic_ref->real_root = mod_root ?: ref_root; |
347 | #endif | |
610647d7 | 348 | generic_ref->data_ref.ref_root = ref_root; |
b28b1f0c QW |
349 | generic_ref->data_ref.ino = ino; |
350 | generic_ref->data_ref.offset = offset; | |
351 | generic_ref->type = BTRFS_REF_DATA; | |
681145d4 NB |
352 | if (skip_qgroup || !(is_fstree(ref_root) && |
353 | (!mod_root || is_fstree(mod_root)))) | |
354 | generic_ref->skip_qgroup = true; | |
355 | else | |
356 | generic_ref->skip_qgroup = false; | |
b28b1f0c QW |
357 | } |
358 | ||
78a6184a MX |
359 | static inline struct btrfs_delayed_extent_op * |
360 | btrfs_alloc_delayed_extent_op(void) | |
361 | { | |
362 | return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS); | |
363 | } | |
364 | ||
365 | static inline void | |
366 | btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) | |
367 | { | |
368 | if (op) | |
369 | kmem_cache_free(btrfs_delayed_extent_op_cachep, op); | |
370 | } | |
371 | ||
56bec294 CM |
372 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) |
373 | { | |
6df8cdf5 | 374 | if (refcount_dec_and_test(&ref->refs)) { |
4d34ad34 | 375 | WARN_ON(!RB_EMPTY_NODE(&ref->ref_node)); |
78a6184a MX |
376 | switch (ref->type) { |
377 | case BTRFS_TREE_BLOCK_REF_KEY: | |
378 | case BTRFS_SHARED_BLOCK_REF_KEY: | |
379 | kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); | |
380 | break; | |
381 | case BTRFS_EXTENT_DATA_REF_KEY: | |
382 | case BTRFS_SHARED_DATA_REF_KEY: | |
383 | kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); | |
384 | break; | |
78a6184a MX |
385 | default: |
386 | BUG(); | |
387 | } | |
56bec294 CM |
388 | } |
389 | } | |
390 | ||
2187374f JB |
391 | static inline u64 btrfs_ref_head_to_space_flags( |
392 | struct btrfs_delayed_ref_head *head_ref) | |
393 | { | |
394 | if (head_ref->is_data) | |
395 | return BTRFS_BLOCK_GROUP_DATA; | |
396 | else if (head_ref->is_system) | |
397 | return BTRFS_BLOCK_GROUP_SYSTEM; | |
398 | return BTRFS_BLOCK_GROUP_METADATA; | |
399 | } | |
400 | ||
d278850e JB |
401 | static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head) |
402 | { | |
403 | if (refcount_dec_and_test(&head->refs)) | |
404 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head); | |
405 | } | |
406 | ||
44e1c47d | 407 | int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, |
ed4f255b | 408 | struct btrfs_ref *generic_ref, |
2187374f | 409 | struct btrfs_delayed_extent_op *extent_op); |
88a979c6 | 410 | int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, |
76675593 | 411 | struct btrfs_ref *generic_ref, |
2187374f | 412 | u64 reserved); |
c6e340bc | 413 | int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, |
5d4f98a2 YZ |
414 | u64 bytenr, u64 num_bytes, |
415 | struct btrfs_delayed_extent_op *extent_op); | |
0c555c97 | 416 | void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info, |
ae1e206b JB |
417 | struct btrfs_delayed_ref_root *delayed_refs, |
418 | struct btrfs_delayed_ref_head *head); | |
56bec294 | 419 | |
1887be66 | 420 | struct btrfs_delayed_ref_head * |
f72ad18e LB |
421 | btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, |
422 | u64 bytenr); | |
9e920a6f | 423 | int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, |
c3e69d58 | 424 | struct btrfs_delayed_ref_head *head); |
093486c4 MX |
425 | static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head) |
426 | { | |
427 | mutex_unlock(&head->mutex); | |
428 | } | |
d7baffda JB |
429 | void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, |
430 | struct btrfs_delayed_ref_head *head); | |
d7df2c79 | 431 | |
5637c74b LF |
432 | struct btrfs_delayed_ref_head *btrfs_select_ref_head( |
433 | struct btrfs_delayed_ref_root *delayed_refs); | |
00f04b88 | 434 | |
41d0bd3b | 435 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq); |
00f04b88 | 436 | |
adb86dbe | 437 | void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums); |
6ef03deb | 438 | void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); |
9ef17228 FM |
439 | void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info); |
440 | void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info); | |
f66e0209 FM |
441 | void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); |
442 | void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); | |
6ef03deb JB |
443 | int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, |
444 | enum btrfs_reserve_flush_enum flush); | |
445 | void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, | |
6ef03deb | 446 | u64 num_bytes); |
6ef03deb JB |
447 | bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); |
448 | ||
56bec294 CM |
449 | /* |
450 | * helper functions to cast a node into its container | |
451 | */ | |
5d4f98a2 YZ |
452 | static inline struct btrfs_delayed_tree_ref * |
453 | btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node) | |
56bec294 | 454 | { |
5d4f98a2 YZ |
455 | return container_of(node, struct btrfs_delayed_tree_ref, node); |
456 | } | |
56bec294 | 457 | |
5d4f98a2 YZ |
458 | static inline struct btrfs_delayed_data_ref * |
459 | btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) | |
460 | { | |
5d4f98a2 | 461 | return container_of(node, struct btrfs_delayed_data_ref, node); |
56bec294 | 462 | } |
9888c340 | 463 | |
56bec294 | 464 | #endif |