]>
Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _BCACHEFS_BTREE_TYPES_H | |
3 | #define _BCACHEFS_BTREE_TYPES_H | |
4 | ||
5 | #include <linux/list.h> | |
6 | #include <linux/rhashtable.h> | |
7 | ||
1bd5bcc9 | 8 | #include "btree_key_cache_types.h" |
932aa837 | 9 | #include "buckets_types.h" |
2a6870ad | 10 | #include "darray.h" |
930c0c4c | 11 | #include "errcode.h" |
1c6fdbd8 | 12 | #include "journal_types.h" |
464b4155 | 13 | #include "replicas_types.h" |
1c6fdbd8 KO |
14 | #include "six.h" |
15 | ||
16 | struct open_bucket; | |
17 | struct btree_update; | |
9e5e5b9e | 18 | struct btree_trans; |
1c6fdbd8 KO |
19 | |
20 | #define MAX_BSETS 3U | |
21 | ||
22 | struct btree_nr_keys { | |
23 | ||
24 | /* | |
25 | * Amount of live metadata (i.e. size of node after a compaction) in | |
26 | * units of u64s | |
27 | */ | |
28 | u16 live_u64s; | |
29 | u16 bset_u64s[MAX_BSETS]; | |
30 | ||
31 | /* live keys only: */ | |
32 | u16 packed_keys; | |
33 | u16 unpacked_keys; | |
34 | }; | |
35 | ||
36 | struct bset_tree { | |
37 | /* | |
38 | * We construct a binary tree in an array as if the array | |
39 | * started at 1, so that things line up on the same cachelines | |
40 | * better: see comments in bset.c at cacheline_to_bkey() for | |
41 | * details | |
42 | */ | |
43 | ||
44 | /* size of the binary tree and prev array */ | |
45 | u16 size; | |
46 | ||
47 | /* function of size - precalculated for to_inorder() */ | |
48 | u16 extra; | |
49 | ||
50 | u16 data_offset; | |
51 | u16 aux_data_offset; | |
52 | u16 end_offset; | |
1c6fdbd8 KO |
53 | }; |
54 | ||
55 | struct btree_write { | |
56 | struct journal_entry_pin journal; | |
1c6fdbd8 KO |
57 | }; |
58 | ||
1c6fdbd8 | 59 | struct btree_alloc { |
ef337c54 | 60 | struct open_buckets ob; |
07a1006a | 61 | __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); |
1c6fdbd8 KO |
62 | }; |
63 | ||
c43a6ef9 KO |
64 | struct btree_bkey_cached_common { |
65 | struct six_lock lock; | |
66 | u8 level; | |
67 | u8 btree_id; | |
4e6defd1 | 68 | bool cached; |
c43a6ef9 KO |
69 | }; |
70 | ||
1c6fdbd8 | 71 | struct btree { |
c43a6ef9 KO |
72 | struct btree_bkey_cached_common c; |
73 | ||
1c6fdbd8 | 74 | struct rhash_head hash; |
237e8048 | 75 | u64 hash_val; |
1c6fdbd8 | 76 | |
1c6fdbd8 KO |
77 | unsigned long flags; |
78 | u16 written; | |
1c6fdbd8 KO |
79 | u8 nsets; |
80 | u8 nr_key_bits; | |
1889ad5a | 81 | u16 version_ondisk; |
1c6fdbd8 KO |
82 | |
83 | struct bkey_format format; | |
84 | ||
85 | struct btree_node *data; | |
86 | void *aux_data; | |
87 | ||
88 | /* | |
89 | * Sets of sorted keys - the real btree node - plus a binary search tree | |
90 | * | |
91 | * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point | |
92 | * to the memory we have allocated for this btree node. Additionally, | |
93 | * set[0]->data points to the entire btree node as it exists on disk. | |
94 | */ | |
95 | struct bset_tree set[MAX_BSETS]; | |
96 | ||
97 | struct btree_nr_keys nr; | |
98 | u16 sib_u64s[2]; | |
99 | u16 whiteout_u64s; | |
4580baec | 100 | u8 byte_order; |
1c6fdbd8 KO |
101 | u8 unpack_fn_len; |
102 | ||
3ce8b463 KO |
103 | struct btree_write writes[2]; |
104 | ||
105 | /* Key/pointer for this btree node */ | |
106 | __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); | |
107 | ||
1c6fdbd8 KO |
108 | /* |
109 | * XXX: add a delete sequence number, so when bch2_btree_node_relock() | |
110 | * fails because the lock sequence number has changed - i.e. the | |
111 | * contents were modified - we can still relock the node if it's still | |
112 | * the one we want, without redoing the traversal | |
113 | */ | |
114 | ||
115 | /* | |
116 | * For asynchronous splits/interior node updates: | |
117 | * When we do a split, we allocate new child nodes and update the parent | |
118 | * node to point to them: we update the parent in memory immediately, | |
119 | * but then we must wait until the children have been written out before | |
120 | * the update to the parent can be written - this is a list of the | |
121 | * btree_updates that are blocking this node from being | |
122 | * written: | |
123 | */ | |
124 | struct list_head write_blocked; | |
125 | ||
126 | /* | |
127 | * Also for asynchronous splits/interior node updates: | |
128 | * If a btree node isn't reachable yet, we don't want to kick off | |
129 | * another write - because that write also won't yet be reachable and | |
130 | * marking it as completed before it's reachable would be incorrect: | |
131 | */ | |
132 | unsigned long will_make_reachable; | |
133 | ||
ef337c54 | 134 | struct open_buckets ob; |
1c6fdbd8 KO |
135 | |
136 | /* lru list */ | |
137 | struct list_head list; | |
1c6fdbd8 KO |
138 | }; |
139 | ||
140 | struct btree_cache { | |
141 | struct rhashtable table; | |
142 | bool table_init_done; | |
143 | /* | |
144 | * We never free a struct btree, except on shutdown - we just put it on | |
145 | * the btree_cache_freed list and reuse it later. This simplifies the | |
146 | * code, and it doesn't cost us much memory as the memory usage is | |
147 | * dominated by buffers that hold the actual btree node data and those | |
148 | * can be freed - and the number of struct btrees allocated is | |
149 | * effectively bounded. | |
150 | * | |
151 | * btree_cache_freeable effectively is a small cache - we use it because | |
152 | * high order page allocations can be rather expensive, and it's quite | |
153 | * common to delete and allocate btree nodes in quick succession. It | |
154 | * should never grow past ~2-3 nodes in practice. | |
155 | */ | |
156 | struct mutex lock; | |
157 | struct list_head live; | |
158 | struct list_head freeable; | |
30985537 KO |
159 | struct list_head freed_pcpu; |
160 | struct list_head freed_nonpcpu; | |
1c6fdbd8 KO |
161 | |
162 | /* Number of elements in live + freeable lists */ | |
163 | unsigned used; | |
164 | unsigned reserve; | |
6a747c46 | 165 | atomic_t dirty; |
ecae0bd5 | 166 | struct shrinker *shrink; |
1c6fdbd8 KO |
167 | |
168 | /* | |
169 | * If we need to allocate memory for a new btree node and that | |
170 | * allocation fails, we can cannibalize another node in the btree cache | |
171 | * to satisfy the allocation - lock to guarantee only one thread does | |
172 | * this at a time: | |
173 | */ | |
174 | struct task_struct *alloc_lock; | |
175 | struct closure_waitlist alloc_wait; | |
176 | }; | |
177 | ||
178 | struct btree_node_iter { | |
1c6fdbd8 KO |
179 | struct btree_node_iter_set { |
180 | u16 k, end; | |
181 | } data[MAX_BSETS]; | |
182 | }; | |
183 | ||
d98a5e39 KO |
184 | /* |
185 | * Iterate over all possible positions, synthesizing deleted keys for holes: | |
186 | */ | |
96dea3d5 KO |
187 | static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0; |
188 | static const __maybe_unused u16 BTREE_ITER_ALL_LEVELS = 1 << 1; | |
d98a5e39 KO |
189 | /* |
190 | * Indicates that intent locks should be taken on leaf nodes, because we expect | |
191 | * to be doing updates: | |
192 | */ | |
96dea3d5 | 193 | static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 2; |
d98a5e39 KO |
194 | /* |
195 | * Causes the btree iterator code to prefetch additional btree nodes from disk: | |
196 | */ | |
96dea3d5 | 197 | static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 3; |
1c6fdbd8 KO |
198 | /* |
199 | * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for | |
200 | * @pos or the first key strictly greater than @pos | |
201 | */ | |
96dea3d5 KO |
202 | static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 4; |
203 | static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 5; | |
204 | static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 6; | |
205 | static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7; | |
206 | static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 8; | |
207 | static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 9; | |
208 | static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; | |
209 | static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11; | |
210 | static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12; | |
211 | static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 13; | |
212 | static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 14; | |
213 | static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15; | |
214 | #define __BTREE_ITER_FLAGS_END 16 | |
2ca88e5a | 215 | |
67e0dd8f | 216 | enum btree_path_uptodate { |
1c6fdbd8 | 217 | BTREE_ITER_UPTODATE = 0, |
deb0e573 KO |
218 | BTREE_ITER_NEED_RELOCK = 1, |
219 | BTREE_ITER_NEED_TRAVERSE = 2, | |
1c6fdbd8 KO |
220 | }; |
221 | ||
94c69faf KO |
222 | #if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG) |
223 | #define TRACK_PATH_ALLOCATED | |
224 | #endif | |
225 | ||
67e0dd8f | 226 | struct btree_path { |
509d3e0a | 227 | u8 idx; |
0423fb71 | 228 | u8 sorted_idx; |
67e0dd8f KO |
229 | u8 ref; |
230 | u8 intent_ref; | |
be9e782d KO |
231 | u32 alloc_seq; |
232 | u32 downgrade_seq; | |
509d3e0a KO |
233 | |
234 | /* btree_iter_copy starts here: */ | |
509d3e0a | 235 | struct bpos pos; |
1c6fdbd8 | 236 | |
a16b19cd | 237 | enum btree_id btree_id:5; |
f21566f1 | 238 | bool cached:1; |
67e0dd8f KO |
239 | bool preserve:1; |
240 | enum btree_path_uptodate uptodate:2; | |
66a0a497 | 241 | /* |
67e0dd8f KO |
242 | * When true, failing to relock this path will cause the transaction to |
243 | * restart: | |
66a0a497 KO |
244 | */ |
245 | bool should_be_locked:1; | |
67e0dd8f | 246 | unsigned level:3, |
a16b19cd | 247 | locks_want:3; |
2e27f656 | 248 | u8 nodes_locked; |
1c6fdbd8 | 249 | |
67e0dd8f | 250 | struct btree_path_level { |
1c6fdbd8 KO |
251 | struct btree *b; |
252 | struct btree_node_iter iter; | |
e4ccb251 | 253 | u32 lock_seq; |
c807ca95 DH |
254 | #ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS |
255 | u64 lock_taken_time; | |
256 | #endif | |
1c6fdbd8 | 257 | } l[BTREE_MAX_DEPTH]; |
94c69faf | 258 | #ifdef TRACK_PATH_ALLOCATED |
67e0dd8f KO |
259 | unsigned long ip_allocated; |
260 | #endif | |
261 | }; | |
1c6fdbd8 | 262 | |
67e0dd8f KO |
263 | static inline struct btree_path_level *path_l(struct btree_path *path) |
264 | { | |
265 | return path->l + path->level; | |
266 | } | |
267 | ||
94c69faf KO |
268 | static inline unsigned long btree_path_ip_allocated(struct btree_path *path) |
269 | { | |
270 | #ifdef TRACK_PATH_ALLOCATED | |
271 | return path->ip_allocated; | |
272 | #else | |
273 | return _THIS_IP_; | |
274 | #endif | |
275 | } | |
276 | ||
67e0dd8f KO |
277 | /* |
278 | * @pos - iterator's current position | |
279 | * @level - current btree depth | |
280 | * @locks_want - btree level below which we start taking intent locks | |
281 | * @nodes_locked - bitmask indicating which nodes in @nodes are locked | |
282 | * @nodes_intent_locked - bitmask indicating which locks are intent locks | |
283 | */ | |
284 | struct btree_iter { | |
285 | struct btree_trans *trans; | |
286 | struct btree_path *path; | |
1f2d9192 | 287 | struct btree_path *update_path; |
f7b6ca23 | 288 | struct btree_path *key_cache_path; |
67e0dd8f | 289 | |
a16b19cd | 290 | enum btree_id btree_id:8; |
b0babf2a KO |
291 | unsigned min_depth:3; |
292 | unsigned advanced:1; | |
67e0dd8f KO |
293 | |
294 | /* btree_iter_copy starts here: */ | |
295 | u16 flags; | |
296 | ||
297 | /* When we're filtering by snapshot, the snapshot ID we're looking for: */ | |
298 | unsigned snapshot; | |
299 | ||
300 | struct bpos pos; | |
1c6fdbd8 KO |
301 | /* |
302 | * Current unpacked key - so that bch2_btree_iter_next()/ | |
303 | * bch2_btree_iter_next_slot() can correctly advance pos. | |
304 | */ | |
305 | struct bkey k; | |
30525f68 KO |
306 | |
307 | /* BTREE_ITER_WITH_JOURNAL: */ | |
308 | size_t journal_idx; | |
309 | struct bpos journal_pos; | |
94c69faf | 310 | #ifdef TRACK_PATH_ALLOCATED |
ee2c6ea7 KO |
311 | unsigned long ip_allocated; |
312 | #endif | |
1c6fdbd8 KO |
313 | }; |
314 | ||
12590720 KO |
315 | #define BKEY_CACHED_ACCESSED 0 |
316 | #define BKEY_CACHED_DIRTY 1 | |
2ca88e5a KO |
317 | |
318 | struct bkey_cached { | |
319 | struct btree_bkey_cached_common c; | |
320 | ||
321 | unsigned long flags; | |
3a306f3c | 322 | u16 u64s; |
2ca88e5a | 323 | bool valid; |
628a3ad2 | 324 | u32 btree_trans_barrier_seq; |
2ca88e5a KO |
325 | struct bkey_cached_key key; |
326 | ||
327 | struct rhash_head hash; | |
328 | struct list_head list; | |
329 | ||
2ca88e5a | 330 | struct journal_entry_pin journal; |
8322a937 | 331 | u64 seq; |
2ca88e5a KO |
332 | |
333 | struct bkey_i *k; | |
334 | }; | |
335 | ||
4e6defd1 | 336 | static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b) |
cd5afabe | 337 | { |
4e6defd1 | 338 | return !b->cached |
cd5afabe KO |
339 | ? container_of(b, struct btree, c)->key.k.p |
340 | : container_of(b, struct bkey_cached, c)->key.pos; | |
341 | } | |
342 | ||
1c6fdbd8 | 343 | struct btree_insert_entry { |
b00fde8f | 344 | unsigned flags; |
6333bd2f | 345 | u8 bkey_type; |
e751c01a | 346 | enum btree_id btree_id:8; |
2e63e180 | 347 | u8 level:4; |
6fba6b83 | 348 | bool cached:1; |
f0c3f88b KO |
349 | bool insert_trigger_run:1; |
350 | bool overwrite_trigger_run:1; | |
12ce5b7d | 351 | bool key_cache_already_flushed:1; |
2e63e180 KO |
352 | /* |
353 | * @old_k may be a key from the journal; @old_btree_u64s always refers | |
354 | * to the size of the key being overwritten in the btree: | |
355 | */ | |
356 | u8 old_btree_u64s; | |
3636ed48 | 357 | struct bkey_i *k; |
67e0dd8f | 358 | struct btree_path *path; |
eabb10dc | 359 | u64 seq; |
2e63e180 KO |
360 | /* key being overwritten: */ |
361 | struct bkey old_k; | |
362 | const struct bch_val *old_v; | |
84841b0d | 363 | unsigned long ip_allocated; |
1c6fdbd8 KO |
364 | }; |
365 | ||
a8e00bd4 KO |
366 | #define BTREE_ITER_MAX 64 |
367 | ||
43d00243 KO |
368 | struct btree_trans_commit_hook; |
369 | typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); | |
370 | ||
371 | struct btree_trans_commit_hook { | |
372 | btree_trans_commit_hook_fn *fn; | |
373 | struct btree_trans_commit_hook *next; | |
374 | }; | |
375 | ||
b7c11046 | 376 | #define BTREE_TRANS_MEM_MAX (1U << 16) |
e131b6aa | 377 | |
43de721a KO |
378 | #define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000 |
379 | ||
1c6fdbd8 KO |
380 | struct btree_trans { |
381 | struct bch_fs *c; | |
669f87a5 | 382 | const char *fn; |
33bd5d06 | 383 | struct closure ref; |
495aabed | 384 | struct list_head list; |
43de721a | 385 | u64 last_begin_time; |
33bd5d06 | 386 | |
33bd5d06 KO |
387 | u8 lock_may_not_fail; |
388 | u8 lock_must_abort; | |
389 | struct btree_bkey_cached_common *locking; | |
390 | struct six_lock_waiter locking_wait; | |
391 | ||
876c7af3 | 392 | int srcu_idx; |
1c6fdbd8 | 393 | |
4aba7d45 | 394 | u8 fn_idx; |
0423fb71 | 395 | u8 nr_sorted; |
1c6fdbd8 | 396 | u8 nr_updates; |
920e69bc KO |
397 | u8 nr_wb_updates; |
398 | u8 wb_updates_size; | |
c4accde4 | 399 | bool srcu_held:1; |
e5af273f | 400 | bool used_mempool:1; |
e5af273f | 401 | bool in_traverse_all:1; |
67e0dd8f | 402 | bool paths_sorted:1; |
8f9ad91a | 403 | bool memory_allocation_failure:1; |
fb64f3fd | 404 | bool journal_transaction_names:1; |
5222a460 | 405 | bool journal_replay_not_finished:1; |
60b55388 | 406 | bool notrace_relock_fail:1; |
3b8c4507 | 407 | bool write_locked:1; |
549d173c | 408 | enum bch_errcode restarted:16; |
e941ae7d | 409 | u32 restart_count; |
5e2d8be8 | 410 | unsigned long last_begin_ip; |
e941ae7d | 411 | unsigned long last_restarted_ip; |
e242b92a | 412 | unsigned long srcu_lock_time; |
e941ae7d | 413 | |
a49e9a05 KO |
414 | /* |
415 | * For when bch2_trans_update notices we'll be splitting a compressed | |
416 | * extent: | |
417 | */ | |
418 | unsigned extra_journal_res; | |
5c0bb66a | 419 | unsigned nr_max_paths; |
1c6fdbd8 | 420 | |
67e0dd8f | 421 | u64 paths_allocated; |
3eb26d01 | 422 | |
1c6fdbd8 | 423 | unsigned mem_top; |
616928c3 | 424 | unsigned mem_max; |
1c6fdbd8 KO |
425 | unsigned mem_bytes; |
426 | void *mem; | |
427 | ||
0423fb71 | 428 | u8 sorted[BTREE_ITER_MAX + 8]; |
6bd68ec2 KO |
429 | struct btree_path paths[BTREE_ITER_MAX]; |
430 | struct btree_insert_entry updates[BTREE_ITER_MAX]; | |
920e69bc | 431 | struct btree_write_buffered_key *wb_updates; |
0dc17247 KO |
432 | |
433 | /* update path: */ | |
43d00243 | 434 | struct btree_trans_commit_hook *hooks; |
5bbe3f2d | 435 | darray_u64 extra_journal_entries; |
00b8ccf7 | 436 | struct journal_entry_pin *journal_pin; |
96e2aa1b | 437 | |
0dc17247 | 438 | struct journal_res journal_res; |
0dc17247 | 439 | u64 *journal_seq; |
9623ab27 | 440 | struct disk_reservation *disk_res; |
87c3beb4 | 441 | unsigned journal_u64s; |
2a9101a9 | 442 | struct replicas_delta_list *fs_usage_deltas; |
1c6fdbd8 KO |
443 | }; |
444 | ||
42af0ad5 KO |
445 | #define BCH_BTREE_WRITE_TYPES() \ |
446 | x(initial, 0) \ | |
447 | x(init_next_bset, 1) \ | |
448 | x(cache_reclaim, 2) \ | |
449 | x(journal_reclaim, 3) \ | |
450 | x(interior, 4) | |
451 | ||
452 | enum btree_write_type { | |
453 | #define x(t, n) BTREE_WRITE_##t, | |
454 | BCH_BTREE_WRITE_TYPES() | |
455 | #undef x | |
456 | BTREE_WRITE_TYPE_NR, | |
457 | }; | |
458 | ||
459 | #define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1) | |
460 | #define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR)) | |
461 | ||
de517c95 KO |
462 | #define BTREE_FLAGS() \ |
463 | x(read_in_flight) \ | |
464 | x(read_error) \ | |
465 | x(dirty) \ | |
466 | x(need_write) \ | |
bf3efff5 KO |
467 | x(write_blocked) \ |
468 | x(will_make_reachable) \ | |
de517c95 KO |
469 | x(noevict) \ |
470 | x(write_idx) \ | |
471 | x(accessed) \ | |
472 | x(write_in_flight) \ | |
473 | x(write_in_flight_inner) \ | |
474 | x(just_written) \ | |
475 | x(dying) \ | |
476 | x(fake) \ | |
477 | x(need_rewrite) \ | |
478 | x(never_write) | |
479 | ||
480 | enum btree_flags { | |
42af0ad5 KO |
481 | /* First bits for btree node write type */ |
482 | BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1, | |
de517c95 KO |
483 | #define x(flag) BTREE_NODE_##flag, |
484 | BTREE_FLAGS() | |
485 | #undef x | |
486 | }; | |
487 | ||
488 | #define x(flag) \ | |
1c6fdbd8 KO |
489 | static inline bool btree_node_ ## flag(struct btree *b) \ |
490 | { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ | |
491 | \ | |
492 | static inline void set_btree_node_ ## flag(struct btree *b) \ | |
493 | { set_bit(BTREE_NODE_ ## flag, &b->flags); } \ | |
494 | \ | |
495 | static inline void clear_btree_node_ ## flag(struct btree *b) \ | |
496 | { clear_bit(BTREE_NODE_ ## flag, &b->flags); } | |
497 | ||
de517c95 KO |
498 | BTREE_FLAGS() |
499 | #undef x | |
1c6fdbd8 KO |
500 | |
501 | static inline struct btree_write *btree_current_write(struct btree *b) | |
502 | { | |
503 | return b->writes + btree_node_write_idx(b); | |
504 | } | |
505 | ||
506 | static inline struct btree_write *btree_prev_write(struct btree *b) | |
507 | { | |
508 | return b->writes + (btree_node_write_idx(b) ^ 1); | |
509 | } | |
510 | ||
511 | static inline struct bset_tree *bset_tree_last(struct btree *b) | |
512 | { | |
513 | EBUG_ON(!b->nsets); | |
514 | return b->set + b->nsets - 1; | |
515 | } | |
516 | ||
1fe08f31 KO |
517 | static inline void * |
518 | __btree_node_offset_to_ptr(const struct btree *b, u16 offset) | |
519 | { | |
520 | return (void *) ((u64 *) b->data + 1 + offset); | |
521 | } | |
522 | ||
523 | static inline u16 | |
524 | __btree_node_ptr_to_offset(const struct btree *b, const void *p) | |
525 | { | |
526 | u16 ret = (u64 *) p - 1 - (u64 *) b->data; | |
527 | ||
528 | EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p); | |
529 | return ret; | |
530 | } | |
531 | ||
1c6fdbd8 KO |
532 | static inline struct bset *bset(const struct btree *b, |
533 | const struct bset_tree *t) | |
534 | { | |
1fe08f31 KO |
535 | return __btree_node_offset_to_ptr(b, t->data_offset); |
536 | } | |
537 | ||
538 | static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t) | |
539 | { | |
540 | t->end_offset = | |
541 | __btree_node_ptr_to_offset(b, vstruct_last(bset(b, t))); | |
542 | } | |
543 | ||
544 | static inline void set_btree_bset(struct btree *b, struct bset_tree *t, | |
545 | const struct bset *i) | |
546 | { | |
547 | t->data_offset = __btree_node_ptr_to_offset(b, i); | |
548 | set_btree_bset_end(b, t); | |
1c6fdbd8 KO |
549 | } |
550 | ||
551 | static inline struct bset *btree_bset_first(struct btree *b) | |
552 | { | |
553 | return bset(b, b->set); | |
554 | } | |
555 | ||
556 | static inline struct bset *btree_bset_last(struct btree *b) | |
557 | { | |
558 | return bset(b, bset_tree_last(b)); | |
559 | } | |
560 | ||
561 | static inline u16 | |
562 | __btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k) | |
563 | { | |
1fe08f31 | 564 | return __btree_node_ptr_to_offset(b, k); |
1c6fdbd8 KO |
565 | } |
566 | ||
567 | static inline struct bkey_packed * | |
568 | __btree_node_offset_to_key(const struct btree *b, u16 k) | |
569 | { | |
1fe08f31 | 570 | return __btree_node_offset_to_ptr(b, k); |
1c6fdbd8 KO |
571 | } |
572 | ||
617391ba KO |
573 | static inline unsigned btree_bkey_first_offset(const struct bset_tree *t) |
574 | { | |
575 | return t->data_offset + offsetof(struct bset, _data) / sizeof(u64); | |
576 | } | |
577 | ||
1fe08f31 KO |
578 | #define btree_bkey_first(_b, _t) \ |
579 | ({ \ | |
580 | EBUG_ON(bset(_b, _t)->start != \ | |
581 | __btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\ | |
582 | \ | |
583 | bset(_b, _t)->start; \ | |
584 | }) | |
1c6fdbd8 KO |
585 | |
586 | #define btree_bkey_last(_b, _t) \ | |
587 | ({ \ | |
588 | EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) != \ | |
589 | vstruct_last(bset(_b, _t))); \ | |
590 | \ | |
591 | __btree_node_offset_to_key(_b, (_t)->end_offset); \ | |
592 | }) | |
593 | ||
2a9101a9 KO |
594 | static inline unsigned bset_u64s(struct bset_tree *t) |
595 | { | |
596 | return t->end_offset - t->data_offset - | |
597 | sizeof(struct bset) / sizeof(u64); | |
598 | } | |
599 | ||
c297a763 KO |
600 | static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t) |
601 | { | |
602 | return bset_u64s(t) - b->nr.bset_u64s[t - b->set]; | |
603 | } | |
604 | ||
1c6fdbd8 KO |
605 | static inline unsigned bset_byte_offset(struct btree *b, void *i) |
606 | { | |
607 | return i - (void *) b->data; | |
608 | } | |
609 | ||
26609b61 | 610 | enum btree_node_type { |
50a38ca1 KO |
611 | BKEY_TYPE_btree, |
612 | #define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1, | |
26609b61 KO |
613 | BCH_BTREE_IDS() |
614 | #undef x | |
50a38ca1 | 615 | BKEY_TYPE_NR |
26609b61 KO |
616 | }; |
617 | ||
618 | /* Type of a key in btree @id at level @level: */ | |
619 | static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) | |
620 | { | |
50a38ca1 | 621 | return level ? BKEY_TYPE_btree : (unsigned) id + 1; |
26609b61 KO |
622 | } |
623 | ||
1c6fdbd8 | 624 | /* Type of keys @b contains: */ |
26609b61 | 625 | static inline enum btree_node_type btree_node_type(struct btree *b) |
1c6fdbd8 | 626 | { |
c43a6ef9 | 627 | return __btree_node_type(b->c.level, b->c.btree_id); |
1c6fdbd8 KO |
628 | } |
629 | ||
50a38ca1 KO |
630 | const char *bch2_btree_node_type_str(enum btree_node_type); |
631 | ||
6333bd2f | 632 | #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ |
50a38ca1 KO |
633 | (BIT_ULL(BKEY_TYPE_extents)| \ |
634 | BIT_ULL(BKEY_TYPE_alloc)| \ | |
635 | BIT_ULL(BKEY_TYPE_inodes)| \ | |
636 | BIT_ULL(BKEY_TYPE_stripes)| \ | |
637 | BIT_ULL(BKEY_TYPE_reflink)| \ | |
638 | BIT_ULL(BKEY_TYPE_btree)) | |
8f196539 | 639 | |
6333bd2f | 640 | #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ |
50a38ca1 KO |
641 | (BIT_ULL(BKEY_TYPE_alloc)| \ |
642 | BIT_ULL(BKEY_TYPE_inodes)| \ | |
643 | BIT_ULL(BKEY_TYPE_stripes)| \ | |
644 | BIT_ULL(BKEY_TYPE_snapshots)) | |
6333bd2f KO |
645 | |
646 | #define BTREE_NODE_TYPE_HAS_TRIGGERS \ | |
647 | (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ | |
648 | BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) | |
8f196539 | 649 | |
e8d2fe3b KO |
650 | static inline bool btree_node_type_needs_gc(enum btree_node_type type) |
651 | { | |
50a38ca1 | 652 | return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type); |
e8d2fe3b | 653 | } |
c6b2826c KO |
654 | |
655 | static inline bool btree_node_type_is_extents(enum btree_node_type type) | |
656 | { | |
e8d2fe3b | 657 | const unsigned mask = 0 |
50a38ca1 | 658 | #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) |
e8d2fe3b KO |
659 | BCH_BTREE_IDS() |
660 | #undef x | |
661 | ; | |
662 | ||
663 | return (1U << type) & mask; | |
c6b2826c | 664 | } |
73bd774d KO |
665 | |
666 | static inline bool btree_id_is_extents(enum btree_id btree) | |
667 | { | |
50a38ca1 | 668 | return btree_node_type_is_extents(__btree_node_type(0, btree)); |
73bd774d | 669 | } |
c6b2826c | 670 | |
e751c01a KO |
671 | static inline bool btree_type_has_snapshots(enum btree_id id) |
672 | { | |
e8d2fe3b KO |
673 | const unsigned mask = 0 |
674 | #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) | |
675 | BCH_BTREE_IDS() | |
676 | #undef x | |
677 | ; | |
678 | ||
679 | return (1U << id) & mask; | |
e751c01a KO |
680 | } |
681 | ||
d3c7727b KO |
682 | static inline bool btree_type_has_snapshot_field(enum btree_id id) |
683 | { | |
684 | const unsigned mask = 0 | |
9fcdd23b | 685 | #define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) |
d3c7727b KO |
686 | BCH_BTREE_IDS() |
687 | #undef x | |
688 | ; | |
689 | ||
690 | return (1U << id) & mask; | |
691 | } | |
692 | ||
89333156 KO |
693 | static inline bool btree_type_has_ptrs(enum btree_id id) |
694 | { | |
e8d2fe3b KO |
695 | const unsigned mask = 0 |
696 | #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) | |
697 | BCH_BTREE_IDS() | |
698 | #undef x | |
699 | ; | |
89333156 | 700 | |
e8d2fe3b | 701 | return (1U << id) & mask; |
1c6fdbd8 KO |
702 | } |
703 | ||
704 | struct btree_root { | |
705 | struct btree *b; | |
706 | ||
1c6fdbd8 KO |
707 | /* On disk root - see async splits: */ |
708 | __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); | |
709 | u8 level; | |
710 | u8 alive; | |
42b72e0b | 711 | s8 error; |
1c6fdbd8 KO |
712 | }; |
713 | ||
1c6fdbd8 KO |
714 | enum btree_gc_coalesce_fail_reason { |
715 | BTREE_GC_COALESCE_FAIL_RESERVE_GET, | |
716 | BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC, | |
717 | BTREE_GC_COALESCE_FAIL_FORMAT_FITS, | |
718 | }; | |
719 | ||
720 | enum btree_node_sibling { | |
721 | btree_prev_sib, | |
722 | btree_next_sib, | |
723 | }; | |
724 | ||
1c6fdbd8 | 725 | #endif /* _BCACHEFS_BTREE_TYPES_H */ |