]>
Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
36e916e1 | 4 | #include "btree_gc.h" |
1c6fdbd8 KO |
5 | #include "btree_io.h" |
6 | #include "btree_iter.h" | |
401585fe | 7 | #include "btree_journal_iter.h" |
2ca88e5a | 8 | #include "btree_key_cache.h" |
8079aab0 | 9 | #include "btree_update_interior.h" |
920e69bc | 10 | #include "btree_write_buffer.h" |
b35b1925 | 11 | #include "buckets.h" |
549d173c | 12 | #include "errcode.h" |
3636ed48 | 13 | #include "error.h" |
1c6fdbd8 KO |
14 | #include "journal.h" |
15 | #include "journal_reclaim.h" | |
1d25849c | 16 | #include "replicas.h" |
8e877caa | 17 | #include "snapshot.h" |
1c6fdbd8 | 18 | |
b7ba66c8 | 19 | #include <linux/prefetch.h> |
a52a4da4 | 20 | |
08f78031 KO |
21 | static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i) |
22 | { | |
23 | #ifdef CONFIG_BCACHEFS_DEBUG | |
24 | struct bch_fs *c = trans->c; | |
25 | struct bkey u; | |
26 | struct bkey_s_c k = bch2_btree_path_peek_slot_exact(i->path, &u); | |
27 | ||
28 | if (unlikely(trans->journal_replay_not_finished)) { | |
29 | struct bkey_i *j_k = | |
30 | bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p); | |
31 | ||
32 | if (j_k) | |
33 | k = bkey_i_to_s_c(j_k); | |
34 | } | |
35 | ||
70f0b0fd KO |
36 | u = *k.k; |
37 | u.needs_whiteout = i->old_k.needs_whiteout; | |
08f78031 | 38 | |
70f0b0fd | 39 | BUG_ON(memcmp(&i->old_k, &u, sizeof(struct bkey))); |
08f78031 KO |
40 | BUG_ON(i->old_v != k.v); |
41 | #endif | |
42 | } | |
43 | ||
67e0dd8f | 44 | static inline struct btree_path_level *insert_l(struct btree_insert_entry *i) |
6fba6b83 | 45 | { |
67e0dd8f | 46 | return i->path->l + i->level; |
6fba6b83 KO |
47 | } |
48 | ||
36e9d698 | 49 | static inline bool same_leaf_as_prev(struct btree_trans *trans, |
24326cd1 | 50 | struct btree_insert_entry *i) |
36e9d698 | 51 | { |
cd8319fd | 52 | return i != trans->updates && |
6fba6b83 | 53 | insert_l(&i[0])->b == insert_l(&i[-1])->b; |
36e9d698 KO |
54 | } |
55 | ||
05046a96 KO |
56 | static inline bool same_leaf_as_next(struct btree_trans *trans, |
57 | struct btree_insert_entry *i) | |
58 | { | |
59 | return i + 1 < trans->updates + trans->nr_updates && | |
6fba6b83 | 60 | insert_l(&i[0])->b == insert_l(&i[1])->b; |
05046a96 KO |
61 | } |
62 | ||
1ff7849f KO |
63 | inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, |
64 | struct btree_path *path, | |
65 | struct btree *b) | |
9623ab27 | 66 | { |
e3a67bdb KO |
67 | struct bch_fs *c = trans->c; |
68 | ||
fdfab313 | 69 | if (unlikely(btree_node_just_written(b)) && |
9623ab27 | 70 | bch2_btree_post_write_cleanup(c, b)) |
f7a966a3 | 71 | bch2_trans_node_reinit_iter(trans, b); |
9623ab27 KO |
72 | |
73 | /* | |
74 | * If the last bset has been written, or if it's gotten too big - start | |
75 | * a new bset to insert into: | |
76 | */ | |
77 | if (want_new_bset(c, b)) | |
f7a966a3 | 78 | bch2_btree_init_next(trans, b); |
9623ab27 KO |
79 | } |
80 | ||
3b8c4507 KO |
81 | static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i) |
82 | { | |
83 | while (--i >= trans->updates) { | |
84 | if (same_leaf_as_prev(trans, i)) | |
85 | continue; | |
86 | ||
87 | bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b); | |
88 | } | |
89 | ||
90 | trace_and_count(trans->c, trans_restart_would_deadlock_write, trans); | |
91 | return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write); | |
92 | } | |
93 | ||
94 | static inline int bch2_trans_lock_write(struct btree_trans *trans) | |
95 | { | |
96 | struct btree_insert_entry *i; | |
97 | ||
98 | EBUG_ON(trans->write_locked); | |
99 | ||
100 | trans_for_each_update(trans, i) { | |
101 | if (same_leaf_as_prev(trans, i)) | |
102 | continue; | |
103 | ||
104 | if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c)) | |
105 | return trans_lock_write_fail(trans, i); | |
106 | ||
107 | if (!i->cached) | |
108 | bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); | |
109 | } | |
110 | ||
111 | trans->write_locked = true; | |
112 | return 0; | |
113 | } | |
114 | ||
115 | static inline void bch2_trans_unlock_write(struct btree_trans *trans) | |
116 | { | |
117 | if (likely(trans->write_locked)) { | |
118 | struct btree_insert_entry *i; | |
119 | ||
120 | trans_for_each_update(trans, i) | |
121 | if (!same_leaf_as_prev(trans, i)) | |
122 | bch2_btree_node_unlock_write_inlined(trans, i->path, | |
123 | insert_l(i)->b); | |
124 | trans->write_locked = false; | |
125 | } | |
126 | } | |
127 | ||
1c6fdbd8 KO |
128 | /* Inserting into a given leaf node (last stage of insert): */ |
129 | ||
130 | /* Handle overwrites and do insert, for non extents: */ | |
9f6bd307 | 131 | bool bch2_btree_bset_insert_key(struct btree_trans *trans, |
67e0dd8f | 132 | struct btree_path *path, |
1c6fdbd8 KO |
133 | struct btree *b, |
134 | struct btree_node_iter *node_iter, | |
135 | struct bkey_i *insert) | |
136 | { | |
1c6fdbd8 | 137 | struct bkey_packed *k; |
fdf22400 | 138 | unsigned clobber_u64s = 0, new_u64s = 0; |
1c6fdbd8 KO |
139 | |
140 | EBUG_ON(btree_node_just_written(b)); | |
141 | EBUG_ON(bset_written(b, btree_bset_last(b))); | |
142 | EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); | |
e88a75eb KO |
143 | EBUG_ON(bpos_lt(insert->k.p, b->data->min_key)); |
144 | EBUG_ON(bpos_gt(insert->k.p, b->data->max_key)); | |
e3e464ac | 145 | EBUG_ON(insert->k.u64s > |
9f6bd307 | 146 | bch_btree_keys_u64s_remaining(trans->c, b)); |
da525760 | 147 | EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos)); |
1c6fdbd8 KO |
148 | |
149 | k = bch2_btree_node_iter_peek_all(node_iter, b); | |
811d2bcd | 150 | if (k && bkey_cmp_left_packed(b, k, &insert->k.p)) |
ae54c453 | 151 | k = NULL; |
1c6fdbd8 | 152 | |
ae54c453 | 153 | /* @k is the key being overwritten/deleted, if any: */ |
c052cf82 | 154 | EBUG_ON(k && bkey_deleted(k)); |
1c6fdbd8 | 155 | |
fdf22400 | 156 | /* Deleting, but not found? nothing to do: */ |
c052cf82 | 157 | if (bkey_deleted(&insert->k) && !k) |
fdf22400 KO |
158 | return false; |
159 | ||
c052cf82 | 160 | if (bkey_deleted(&insert->k)) { |
ae54c453 | 161 | /* Deleting: */ |
ae54c453 KO |
162 | btree_account_key_drop(b, k); |
163 | k->type = KEY_TYPE_deleted; | |
c9bebae6 | 164 | |
fdf22400 | 165 | if (k->needs_whiteout) |
9f6bd307 | 166 | push_whiteout(trans->c, b, insert->k.p); |
fdf22400 | 167 | k->needs_whiteout = false; |
1c6fdbd8 | 168 | |
ae54c453 KO |
169 | if (k >= btree_bset_last(b)->start) { |
170 | clobber_u64s = k->u64s; | |
ae54c453 | 171 | bch2_bset_delete(b, k, clobber_u64s); |
fdf22400 | 172 | goto fix_iter; |
ae54c453 | 173 | } else { |
67e0dd8f | 174 | bch2_btree_path_fix_key_modified(trans, b, k); |
ae54c453 KO |
175 | } |
176 | ||
177 | return true; | |
178 | } | |
c9bebae6 | 179 | |
ae54c453 KO |
180 | if (k) { |
181 | /* Overwriting: */ | |
ae54c453 KO |
182 | btree_account_key_drop(b, k); |
183 | k->type = KEY_TYPE_deleted; | |
184 | ||
f2e8c69f KO |
185 | insert->k.needs_whiteout = k->needs_whiteout; |
186 | k->needs_whiteout = false; | |
187 | ||
c9bebae6 KO |
188 | if (k >= btree_bset_last(b)->start) { |
189 | clobber_u64s = k->u64s; | |
1c6fdbd8 | 190 | goto overwrite; |
ae54c453 | 191 | } else { |
67e0dd8f | 192 | bch2_btree_path_fix_key_modified(trans, b, k); |
1c6fdbd8 | 193 | } |
1c6fdbd8 KO |
194 | } |
195 | ||
216c9fac | 196 | k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b)); |
1c6fdbd8 KO |
197 | overwrite: |
198 | bch2_bset_insert(b, node_iter, k, insert, clobber_u64s); | |
fdf22400 KO |
199 | new_u64s = k->u64s; |
200 | fix_iter: | |
201 | if (clobber_u64s != new_u64s) | |
67e0dd8f | 202 | bch2_btree_node_iter_fix(trans, path, b, node_iter, k, |
fdf22400 | 203 | clobber_u64s, new_u64s); |
1c6fdbd8 KO |
204 | return true; |
205 | } | |
206 | ||
2940295c | 207 | static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, |
1c6fdbd8 KO |
208 | unsigned i, u64 seq) |
209 | { | |
210 | struct bch_fs *c = container_of(j, struct bch_fs, journal); | |
211 | struct btree_write *w = container_of(pin, struct btree_write, journal); | |
212 | struct btree *b = container_of(w, struct btree, writes[i]); | |
6bd68ec2 | 213 | struct btree_trans *trans = bch2_trans_get(c); |
6f5f747c KO |
214 | unsigned long old, new, v; |
215 | unsigned idx = w - b->writes; | |
1c6fdbd8 | 216 | |
6bd68ec2 | 217 | btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); |
6f5f747c KO |
218 | v = READ_ONCE(b->flags); |
219 | ||
220 | do { | |
221 | old = new = v; | |
222 | ||
223 | if (!(old & (1 << BTREE_NODE_dirty)) || | |
224 | !!(old & (1 << BTREE_NODE_write_idx)) != idx || | |
225 | w->journal.seq != seq) | |
226 | break; | |
227 | ||
42af0ad5 KO |
228 | new &= ~BTREE_WRITE_TYPE_MASK; |
229 | new |= BTREE_WRITE_journal_reclaim; | |
6f5f747c KO |
230 | new |= 1 << BTREE_NODE_need_write; |
231 | } while ((v = cmpxchg(&b->flags, old, new)) != old); | |
232 | ||
233 | btree_node_write_if_need(c, b, SIX_LOCK_read); | |
c43a6ef9 | 234 | six_unlock_read(&b->c.lock); |
ca7d8fca | 235 | |
6bd68ec2 | 236 | bch2_trans_put(trans); |
2940295c | 237 | return 0; |
1c6fdbd8 KO |
238 | } |
239 | ||
83ec519a | 240 | int bch2_btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq) |
1c6fdbd8 KO |
241 | { |
242 | return __btree_node_flush(j, pin, 0, seq); | |
243 | } | |
244 | ||
83ec519a | 245 | int bch2_btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq) |
1c6fdbd8 KO |
246 | { |
247 | return __btree_node_flush(j, pin, 1, seq); | |
248 | } | |
249 | ||
6357d607 KO |
250 | inline void bch2_btree_add_journal_pin(struct bch_fs *c, |
251 | struct btree *b, u64 seq) | |
252 | { | |
253 | struct btree_write *w = btree_current_write(b); | |
254 | ||
255 | bch2_journal_pin_add(&c->journal, seq, &w->journal, | |
256 | btree_node_write_idx(b) == 0 | |
83ec519a KO |
257 | ? bch2_btree_node_flush0 |
258 | : bch2_btree_node_flush1); | |
6357d607 KO |
259 | } |
260 | ||
1c6fdbd8 | 261 | /** |
96dea3d5 KO |
262 | * bch2_btree_insert_key_leaf() - insert a key one key into a leaf node |
263 | * @trans: btree transaction object | |
264 | * @path: path pointing to @insert's pos | |
265 | * @insert: key to insert | |
266 | * @journal_seq: sequence number of journal reservation | |
1c6fdbd8 | 267 | */ |
920e69bc KO |
268 | inline void bch2_btree_insert_key_leaf(struct btree_trans *trans, |
269 | struct btree_path *path, | |
270 | struct bkey_i *insert, | |
271 | u64 journal_seq) | |
1c6fdbd8 KO |
272 | { |
273 | struct bch_fs *c = trans->c; | |
920e69bc | 274 | struct btree *b = path_l(path)->b; |
2a9101a9 | 275 | struct bset_tree *t = bset_tree_last(b); |
4e8224ed | 276 | struct bset *i = bset(b, t); |
2a9101a9 | 277 | int old_u64s = bset_u64s(t); |
1c6fdbd8 KO |
278 | int old_live_u64s = b->nr.live_u64s; |
279 | int live_u64s_added, u64s_added; | |
280 | ||
920e69bc KO |
281 | if (unlikely(!bch2_btree_bset_insert_key(trans, path, b, |
282 | &path_l(path)->iter, insert))) | |
0576ba9a | 283 | return; |
4e8224ed | 284 | |
920e69bc | 285 | i->journal_seq = cpu_to_le64(max(journal_seq, le64_to_cpu(i->journal_seq))); |
bcd6f3e0 | 286 | |
920e69bc | 287 | bch2_btree_add_journal_pin(c, b, journal_seq); |
4e8224ed | 288 | |
30a8278a KO |
289 | if (unlikely(!btree_node_dirty(b))) { |
290 | EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); | |
de517c95 | 291 | set_btree_node_dirty_acct(c, b); |
30a8278a | 292 | } |
1c6fdbd8 KO |
293 | |
294 | live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; | |
2a9101a9 | 295 | u64s_added = (int) bset_u64s(t) - old_u64s; |
1c6fdbd8 KO |
296 | |
297 | if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0) | |
298 | b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added); | |
299 | if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0) | |
300 | b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added); | |
301 | ||
302 | if (u64s_added > live_u64s_added && | |
303 | bch2_maybe_compact_whiteouts(c, b)) | |
f7a966a3 | 304 | bch2_trans_node_reinit_iter(trans, b); |
1c6fdbd8 KO |
305 | } |
306 | ||
2ca88e5a KO |
307 | /* Cached btree updates: */ |
308 | ||
9623ab27 | 309 | /* Normal update interface: */ |
1c6fdbd8 | 310 | |
1a470560 | 311 | static inline void btree_insert_entry_checks(struct btree_trans *trans, |
6333bd2f | 312 | struct btree_insert_entry *i) |
1c6fdbd8 | 313 | { |
e88a75eb | 314 | BUG_ON(!bpos_eq(i->k->k.p, i->path->pos)); |
67e0dd8f KO |
315 | BUG_ON(i->cached != i->path->cached); |
316 | BUG_ON(i->level != i->path->level); | |
317 | BUG_ON(i->btree_id != i->path->btree_id); | |
14b393ee | 318 | EBUG_ON(!i->level && |
d3c7727b | 319 | btree_type_has_snapshots(i->btree_id) && |
14b393ee KO |
320 | !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && |
321 | test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && | |
322 | i->k->k.p.snapshot && | |
8479938d | 323 | bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot)); |
9623ab27 | 324 | } |
1c6fdbd8 | 325 | |
0cc455b3 | 326 | static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, |
30ca6ece | 327 | unsigned flags) |
c8cc5b3e | 328 | { |
87ced107 | 329 | return bch2_journal_res_get(&trans->c->journal, &trans->journal_res, |
30ca6ece | 330 | trans->journal_u64s, flags); |
9623ab27 | 331 | } |
1c6fdbd8 | 332 | |
fb64f3fd KO |
333 | #define JSET_ENTRY_LOG_U64s 4 |
334 | ||
335 | static noinline void journal_transaction_name(struct btree_trans *trans) | |
336 | { | |
337 | struct bch_fs *c = trans->c; | |
43ddf448 KO |
338 | struct journal *j = &c->journal; |
339 | struct jset_entry *entry = | |
340 | bch2_journal_add_entry(j, &trans->journal_res, | |
341 | BCH_JSET_ENTRY_log, 0, 0, | |
342 | JSET_ENTRY_LOG_U64s); | |
343 | struct jset_entry_log *l = | |
344 | container_of(entry, struct jset_entry_log, entry); | |
345 | ||
346 | strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64)); | |
fb64f3fd KO |
347 | } |
348 | ||
ac9fa4bd KO |
349 | static inline int btree_key_can_insert(struct btree_trans *trans, |
350 | struct btree *b, unsigned u64s) | |
b0004d8d KO |
351 | { |
352 | struct bch_fs *c = trans->c; | |
b0004d8d | 353 | |
f8058242 | 354 | if (!bch2_btree_node_insert_fits(c, b, u64s)) |
ac9fa4bd | 355 | return -BCH_ERR_btree_insert_btree_node_full; |
b0004d8d | 356 | |
ac9fa4bd | 357 | return 0; |
b0004d8d KO |
358 | } |
359 | ||
09b0283e KO |
360 | noinline static int |
361 | btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, | |
362 | struct btree_path *path, unsigned new_u64s) | |
363 | { | |
364 | struct bch_fs *c = trans->c; | |
365 | struct btree_insert_entry *i; | |
366 | struct bkey_cached *ck = (void *) path->l[0].b; | |
367 | struct bkey_i *new_k; | |
368 | int ret; | |
369 | ||
370 | bch2_trans_unlock_write(trans); | |
371 | bch2_trans_unlock(trans); | |
372 | ||
373 | new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); | |
374 | if (!new_k) { | |
375 | bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", | |
376 | bch2_btree_id_str(path->btree_id), new_u64s); | |
377 | return -BCH_ERR_ENOMEM_btree_key_cache_insert; | |
378 | } | |
379 | ||
380 | ret = bch2_trans_relock(trans) ?: | |
381 | bch2_trans_lock_write(trans); | |
382 | if (unlikely(ret)) { | |
383 | kfree(new_k); | |
384 | return ret; | |
385 | } | |
386 | ||
387 | memcpy(new_k, ck->k, ck->u64s * sizeof(u64)); | |
388 | ||
389 | trans_for_each_update(trans, i) | |
390 | if (i->old_v == &ck->k->v) | |
391 | i->old_v = &new_k->v; | |
392 | ||
393 | kfree(ck->k); | |
394 | ck->u64s = new_u64s; | |
395 | ck->k = new_k; | |
396 | return 0; | |
397 | } | |
398 | ||
30ca6ece KO |
399 | static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags, |
400 | struct btree_path *path, unsigned u64s) | |
2ca88e5a | 401 | { |
f0f41a6d | 402 | struct bch_fs *c = trans->c; |
67e0dd8f | 403 | struct bkey_cached *ck = (void *) path->l[0].b; |
08f78031 | 404 | struct btree_insert_entry *i; |
f83009cd | 405 | unsigned new_u64s; |
2ca88e5a KO |
406 | struct bkey_i *new_k; |
407 | ||
67e0dd8f | 408 | EBUG_ON(path->level); |
2ca88e5a | 409 | |
d5425a3b | 410 | if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && |
f0f41a6d | 411 | bch2_btree_key_cache_must_wait(c) && |
30ca6ece | 412 | !(flags & BTREE_INSERT_JOURNAL_RECLAIM)) |
ac9fa4bd | 413 | return -BCH_ERR_btree_insert_need_journal_reclaim; |
d5425a3b | 414 | |
bc2e5d5c KO |
415 | /* |
416 | * bch2_varint_decode can read past the end of the buffer by at most 7 | |
417 | * bytes (it won't be used): | |
418 | */ | |
419 | u64s += 1; | |
420 | ||
64f2a880 | 421 | if (u64s <= ck->u64s) |
ac9fa4bd | 422 | return 0; |
2ca88e5a | 423 | |
64f2a880 | 424 | new_u64s = roundup_pow_of_two(u64s); |
09b0283e KO |
425 | new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT); |
426 | if (unlikely(!new_k)) | |
427 | return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s); | |
2ca88e5a | 428 | |
08f78031 KO |
429 | trans_for_each_update(trans, i) |
430 | if (i->old_v == &ck->k->v) | |
431 | i->old_v = &new_k->v; | |
432 | ||
2ca88e5a KO |
433 | ck->u64s = new_u64s; |
434 | ck->k = new_k; | |
f83009cd | 435 | return 0; |
2ca88e5a KO |
436 | } |
437 | ||
3598c56e KO |
438 | /* Triggers: */ |
439 | ||
440 | static int run_one_mem_trigger(struct btree_trans *trans, | |
441 | struct btree_insert_entry *i, | |
442 | unsigned flags) | |
443 | { | |
96d3a0af | 444 | struct bkey_s_c old = { &i->old_k, i->old_v }; |
3598c56e | 445 | struct bkey_i *new = i->k; |
183e9c43 KO |
446 | const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type); |
447 | const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type); | |
3598c56e KO |
448 | int ret; |
449 | ||
08f78031 KO |
450 | verify_update_old_key(trans, i); |
451 | ||
3598c56e KO |
452 | if (unlikely(flags & BTREE_TRIGGER_NORUN)) |
453 | return 0; | |
454 | ||
50a38ca1 | 455 | if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) |
3598c56e KO |
456 | return 0; |
457 | ||
523f33ef | 458 | if (old_ops->atomic_trigger == new_ops->atomic_trigger) { |
2611a041 KO |
459 | ret = bch2_mark_key(trans, i->btree_id, i->level, |
460 | old, bkey_i_to_s_c(new), | |
3598c56e KO |
461 | BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); |
462 | } else { | |
96d3a0af KO |
463 | struct bkey _deleted = KEY(0, 0, 0); |
464 | struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; | |
465 | ||
466 | _deleted.p = i->path->pos; | |
467 | ||
2611a041 KO |
468 | ret = bch2_mark_key(trans, i->btree_id, i->level, |
469 | deleted, bkey_i_to_s_c(new), | |
3598c56e | 470 | BTREE_TRIGGER_INSERT|flags) ?: |
2611a041 KO |
471 | bch2_mark_key(trans, i->btree_id, i->level, |
472 | old, deleted, | |
3598c56e KO |
473 | BTREE_TRIGGER_OVERWRITE|flags); |
474 | } | |
475 | ||
476 | return ret; | |
477 | } | |
478 | ||
96d3a0af | 479 | static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i, |
f13fd87a | 480 | bool overwrite) |
3598c56e | 481 | { |
96d3a0af KO |
482 | /* |
483 | * Transactional triggers create new btree_insert_entries, so we can't | |
484 | * pass them a pointer to a btree_insert_entry, that memory is going to | |
485 | * move: | |
486 | */ | |
487 | struct bkey old_k = i->old_k; | |
488 | struct bkey_s_c old = { &old_k, i->old_v }; | |
183e9c43 KO |
489 | const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type); |
490 | const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type); | |
3598c56e | 491 | |
08f78031 KO |
492 | verify_update_old_key(trans, i); |
493 | ||
3598c56e KO |
494 | if ((i->flags & BTREE_TRIGGER_NORUN) || |
495 | !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type))) | |
496 | return 0; | |
497 | ||
f13fd87a KO |
498 | if (!i->insert_trigger_run && |
499 | !i->overwrite_trigger_run && | |
523f33ef | 500 | old_ops->trans_trigger == new_ops->trans_trigger) { |
3598c56e | 501 | i->overwrite_trigger_run = true; |
f13fd87a | 502 | i->insert_trigger_run = true; |
e1b8f5f5 | 503 | return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k, |
f13fd87a KO |
504 | BTREE_TRIGGER_INSERT| |
505 | BTREE_TRIGGER_OVERWRITE| | |
506 | i->flags) ?: 1; | |
507 | } else if (overwrite && !i->overwrite_trigger_run) { | |
508 | i->overwrite_trigger_run = true; | |
e1b8f5f5 | 509 | return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1; |
f13fd87a KO |
510 | } else if (!overwrite && !i->insert_trigger_run) { |
511 | i->insert_trigger_run = true; | |
e1b8f5f5 | 512 | return bch2_trans_mark_new(trans, i->btree_id, i->level, i->k, i->flags) ?: 1; |
3598c56e | 513 | } else { |
f13fd87a | 514 | return 0; |
3598c56e | 515 | } |
3598c56e KO |
516 | } |
517 | ||
518 | static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, | |
519 | struct btree_insert_entry *btree_id_start) | |
520 | { | |
521 | struct btree_insert_entry *i; | |
522 | bool trans_trigger_run; | |
523 | int ret, overwrite; | |
524 | ||
f13fd87a | 525 | for (overwrite = 1; overwrite >= 0; --overwrite) { |
3598c56e KO |
526 | |
527 | /* | |
528 | * Running triggers will append more updates to the list of updates as | |
529 | * we're walking it: | |
530 | */ | |
531 | do { | |
532 | trans_trigger_run = false; | |
533 | ||
534 | for (i = btree_id_start; | |
535 | i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; | |
536 | i++) { | |
0c10cf85 KO |
537 | if (i->btree_id != btree_id) |
538 | continue; | |
539 | ||
3598c56e KO |
540 | ret = run_one_trans_trigger(trans, i, overwrite); |
541 | if (ret < 0) | |
542 | return ret; | |
543 | if (ret) | |
544 | trans_trigger_run = true; | |
545 | } | |
546 | } while (trans_trigger_run); | |
547 | } | |
548 | ||
549 | return 0; | |
550 | } | |
551 | ||
552 | static int bch2_trans_commit_run_triggers(struct btree_trans *trans) | |
553 | { | |
554 | struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates; | |
555 | unsigned btree_id = 0; | |
556 | int ret = 0; | |
557 | ||
558 | /* | |
559 | * | |
560 | * For a given btree, this algorithm runs insert triggers before | |
561 | * overwrite triggers: this is so that when extents are being moved | |
562 | * (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before | |
563 | * they are re-added. | |
564 | */ | |
565 | for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { | |
0c10cf85 KO |
566 | if (btree_id == BTREE_ID_alloc) |
567 | continue; | |
568 | ||
3598c56e KO |
569 | while (btree_id_start < trans->updates + trans->nr_updates && |
570 | btree_id_start->btree_id < btree_id) | |
571 | btree_id_start++; | |
572 | ||
573 | ret = run_btree_triggers(trans, btree_id, btree_id_start); | |
574 | if (ret) | |
575 | return ret; | |
576 | } | |
577 | ||
0c10cf85 KO |
578 | trans_for_each_update(trans, i) { |
579 | if (i->btree_id > BTREE_ID_alloc) | |
580 | break; | |
581 | if (i->btree_id == BTREE_ID_alloc) { | |
582 | ret = run_btree_triggers(trans, BTREE_ID_alloc, i); | |
583 | if (ret) | |
584 | return ret; | |
585 | break; | |
586 | } | |
587 | } | |
588 | ||
c9ee99ad | 589 | #ifdef CONFIG_BCACHEFS_DEBUG |
3598c56e KO |
590 | trans_for_each_update(trans, i) |
591 | BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) && | |
592 | (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) && | |
593 | (!i->insert_trigger_run || !i->overwrite_trigger_run)); | |
c9ee99ad | 594 | #endif |
3598c56e KO |
595 | return 0; |
596 | } | |
597 | ||
598 | static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) | |
1c6fdbd8 KO |
599 | { |
600 | struct bch_fs *c = trans->c; | |
601 | struct btree_insert_entry *i; | |
21aec962 | 602 | int ret = 0; |
1c6fdbd8 | 603 | |
2ca88e5a KO |
604 | trans_for_each_update(trans, i) { |
605 | /* | |
606 | * XXX: synchronization of cached update triggers with gc | |
67e0dd8f | 607 | * XXX: synchronization of interior node updates with gc |
2ca88e5a | 608 | */ |
6fba6b83 | 609 | BUG_ON(i->cached || i->level); |
2ca88e5a | 610 | |
21aec962 | 611 | if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b))) { |
3598c56e | 612 | ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC); |
21aec962 KO |
613 | if (ret) |
614 | break; | |
615 | } | |
2ca88e5a | 616 | } |
21aec962 KO |
617 | |
618 | return ret; | |
2a9101a9 | 619 | } |
36e9d698 | 620 | |
2a9101a9 | 621 | static inline int |
30ca6ece | 622 | bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, |
531a0095 KO |
623 | struct btree_insert_entry **stopped_at, |
624 | unsigned long trace_ip) | |
2a9101a9 KO |
625 | { |
626 | struct bch_fs *c = trans->c; | |
2a9101a9 | 627 | struct btree_insert_entry *i; |
920e69bc | 628 | struct btree_write_buffered_key *wb; |
43d00243 | 629 | struct btree_trans_commit_hook *h; |
24326cd1 | 630 | unsigned u64s = 0; |
2a9101a9 | 631 | int ret; |
4d8100da | 632 | |
1c6fdbd8 | 633 | if (race_fault()) { |
674cfc26 | 634 | trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); |
549d173c | 635 | return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); |
1c6fdbd8 KO |
636 | } |
637 | ||
b0004d8d KO |
638 | /* |
639 | * Check if the insert will fit in the leaf node with the write lock | |
640 | * held, otherwise another thread could write the node changing the | |
641 | * amount of space available: | |
642 | */ | |
c8cc5b3e | 643 | |
b7ba66c8 | 644 | prefetch(&trans->c->journal.flags); |
932aa837 | 645 | |
cd8319fd | 646 | trans_for_each_update(trans, i) { |
b7ba66c8 | 647 | /* Multiple inserts might go to same leaf: */ |
24326cd1 | 648 | if (!same_leaf_as_prev(trans, i)) |
b7ba66c8 | 649 | u64s = 0; |
932aa837 | 650 | |
b7ba66c8 | 651 | u64s += i->k->k.u64s; |
6fba6b83 KO |
652 | ret = !i->cached |
653 | ? btree_key_can_insert(trans, insert_l(i)->b, u64s) | |
30ca6ece | 654 | : btree_key_can_insert_cached(trans, flags, i->path, u64s); |
b7ba66c8 KO |
655 | if (ret) { |
656 | *stopped_at = i; | |
657 | return ret; | |
932aa837 | 658 | } |
b7ba66c8 KO |
659 | } |
660 | ||
920e69bc KO |
661 | if (trans->nr_wb_updates && |
662 | trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size) | |
663 | return -BCH_ERR_btree_insert_need_flush_buffer; | |
664 | ||
9623ab27 KO |
665 | /* |
666 | * Don't get journal reservation until after we know insert will | |
667 | * succeed: | |
668 | */ | |
30ca6ece | 669 | if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { |
2a9101a9 | 670 | ret = bch2_trans_journal_res_get(trans, |
ec14fc60 | 671 | (flags & BCH_WATERMARK_MASK)| |
2a9101a9 | 672 | JOURNAL_RES_GET_NONBLOCK); |
87c3beb4 | 673 | if (ret) |
502cfb35 | 674 | return ret; |
fb64f3fd KO |
675 | |
676 | if (unlikely(trans->journal_transaction_names)) | |
677 | journal_transaction_name(trans); | |
4e8224ed KO |
678 | } else { |
679 | trans->journal_res.seq = c->journal.replay_journal_seq; | |
87c3beb4 | 680 | } |
c8cc5b3e | 681 | |
2a9101a9 KO |
682 | /* |
683 | * Not allowed to fail after we've gotten our journal reservation - we | |
684 | * have to use it: | |
685 | */ | |
686 | ||
1ae40fd8 | 687 | if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && |
30ca6ece | 688 | !(flags & BTREE_INSERT_JOURNAL_REPLAY)) { |
29364f34 | 689 | if (bch2_journal_seq_verify) |
cd8319fd | 690 | trans_for_each_update(trans, i) |
1c6fdbd8 | 691 | i->k->k.version.lo = trans->journal_res.seq; |
29364f34 | 692 | else if (bch2_inject_invalid_keys) |
cd8319fd | 693 | trans_for_each_update(trans, i) |
1c6fdbd8 KO |
694 | i->k->k.version = MAX_VERSION; |
695 | } | |
696 | ||
58e1ea4b KO |
697 | if (trans->fs_usage_deltas && |
698 | bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas)) | |
ac9fa4bd | 699 | return -BCH_ERR_btree_insert_need_mark_replicas; |
502cfb35 | 700 | |
920e69bc KO |
701 | if (trans->nr_wb_updates) { |
702 | EBUG_ON(flags & BTREE_INSERT_JOURNAL_REPLAY); | |
703 | ||
704 | ret = bch2_btree_insert_keys_write_buffer(trans); | |
705 | if (ret) | |
706 | goto revert_fs_usage; | |
707 | } | |
708 | ||
56cc033d KO |
709 | h = trans->hooks; |
710 | while (h) { | |
711 | ret = h->fn(trans, h); | |
712 | if (ret) | |
713 | goto revert_fs_usage; | |
714 | h = h->next; | |
715 | } | |
716 | ||
a7199432 | 717 | trans_for_each_update(trans, i) |
21aec962 | 718 | if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) { |
3598c56e | 719 | ret = run_one_mem_trigger(trans, i, i->flags); |
21aec962 | 720 | if (ret) |
920e69bc | 721 | goto fatal_err; |
21aec962 | 722 | } |
932aa837 | 723 | |
21aec962 | 724 | if (unlikely(c->gc_pos.phase)) { |
3598c56e | 725 | ret = bch2_trans_commit_run_gc_triggers(trans); |
21aec962 | 726 | if (ret) |
920e69bc | 727 | goto fatal_err; |
21aec962 | 728 | } |
932aa837 | 729 | |
1ae40fd8 KO |
730 | if (unlikely(trans->extra_journal_entries.nr)) { |
731 | memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), | |
732 | trans->extra_journal_entries.data, | |
733 | trans->extra_journal_entries.nr); | |
734 | ||
735 | trans->journal_res.offset += trans->extra_journal_entries.nr; | |
736 | trans->journal_res.u64s -= trans->extra_journal_entries.nr; | |
737 | } | |
738 | ||
30ca6ece | 739 | if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { |
920e69bc KO |
740 | struct journal *j = &c->journal; |
741 | struct jset_entry *entry; | |
cb685ce7 | 742 | |
920e69bc | 743 | trans_for_each_update(trans, i) { |
cb685ce7 KO |
744 | if (i->key_cache_already_flushed) |
745 | continue; | |
746 | ||
747 | if (i->flags & BTREE_UPDATE_NOJOURNAL) | |
748 | continue; | |
749 | ||
08f78031 KO |
750 | verify_update_old_key(trans, i); |
751 | ||
cb685ce7 KO |
752 | if (trans->journal_transaction_names) { |
753 | entry = bch2_journal_add_entry(j, &trans->journal_res, | |
754 | BCH_JSET_ENTRY_overwrite, | |
755 | i->btree_id, i->level, | |
756 | i->old_k.u64s); | |
6dfa10ab | 757 | bkey_reassemble((struct bkey_i *) entry->start, |
cb685ce7 KO |
758 | (struct bkey_s_c) { &i->old_k, i->old_v }); |
759 | } | |
760 | ||
761 | entry = bch2_journal_add_entry(j, &trans->journal_res, | |
762 | BCH_JSET_ENTRY_btree_keys, | |
763 | i->btree_id, i->level, | |
764 | i->k->k.u64s); | |
6dfa10ab | 765 | bkey_copy((struct bkey_i *) entry->start, i->k); |
cb685ce7 KO |
766 | } |
767 | ||
920e69bc KO |
768 | trans_for_each_wb_update(trans, wb) { |
769 | entry = bch2_journal_add_entry(j, &trans->journal_res, | |
770 | BCH_JSET_ENTRY_btree_keys, | |
771 | wb->btree, 0, | |
772 | wb->k.k.u64s); | |
6dfa10ab | 773 | bkey_copy((struct bkey_i *) entry->start, &wb->k); |
920e69bc KO |
774 | } |
775 | ||
cb685ce7 KO |
776 | if (trans->journal_seq) |
777 | *trans->journal_seq = trans->journal_res.seq; | |
778 | } | |
779 | ||
780 | trans_for_each_update(trans, i) { | |
781 | i->k->k.needs_whiteout = false; | |
782 | ||
eabb10dc BF |
783 | if (!i->cached) { |
784 | u64 seq = trans->journal_res.seq; | |
785 | ||
786 | if (i->flags & BTREE_UPDATE_PREJOURNAL) | |
787 | seq = i->seq; | |
788 | ||
789 | bch2_btree_insert_key_leaf(trans, i->path, i->k, seq); | |
790 | } else if (!i->key_cache_already_flushed) | |
e53d03fe | 791 | bch2_btree_insert_key_cached(trans, flags, i); |
7c812ab7 | 792 | else { |
cb685ce7 | 793 | bch2_btree_key_cache_drop(trans, i->path); |
7c812ab7 KO |
794 | btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE); |
795 | } | |
cb685ce7 | 796 | } |
502cfb35 | 797 | |
920e69bc KO |
798 | return 0; |
799 | fatal_err: | |
800 | bch2_fatal_error(c); | |
801 | revert_fs_usage: | |
802 | if (trans->fs_usage_deltas) | |
803 | bch2_trans_fs_usage_revert(trans, trans->fs_usage_deltas); | |
2a9101a9 KO |
804 | return ret; |
805 | } | |
806 | ||
dfd41fb9 KO |
807 | static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) |
808 | { | |
809 | struct btree_insert_entry *i; | |
920e69bc | 810 | struct btree_write_buffered_key *wb; |
dfd41fb9 KO |
811 | |
812 | trans_for_each_update(trans, i) | |
813 | bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); | |
920e69bc KO |
814 | |
815 | trans_for_each_wb_update(trans, wb) | |
816 | bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p); | |
dfd41fb9 KO |
817 | } |
818 | ||
50a38ca1 KO |
819 | static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, |
820 | enum bkey_invalid_flags flags, | |
03e83f63 KO |
821 | struct btree_insert_entry *i, |
822 | struct printbuf *err) | |
823 | { | |
824 | struct bch_fs *c = trans->c; | |
03e83f63 KO |
825 | |
826 | printbuf_reset(err); | |
827 | prt_printf(err, "invalid bkey on insert from %s -> %ps", | |
828 | trans->fn, (void *) i->ip_allocated); | |
829 | prt_newline(err); | |
830 | printbuf_indent_add(err, 2); | |
831 | ||
832 | bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); | |
833 | prt_newline(err); | |
834 | ||
50a38ca1 | 835 | bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); |
03e83f63 KO |
836 | bch2_print_string_as_lines(KERN_ERR, err->buf); |
837 | ||
838 | bch2_inconsistent_error(c); | |
839 | bch2_dump_trans_updates(trans); | |
03e83f63 KO |
840 | |
841 | return -EINVAL; | |
842 | } | |
843 | ||
2a9101a9 KO |
844 | /* |
845 | * Get journal reservation, take write locks, and attempt to do btree update(s): | |
846 | */ | |
30ca6ece | 847 | static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags, |
531a0095 KO |
848 | struct btree_insert_entry **stopped_at, |
849 | unsigned long trace_ip) | |
2a9101a9 | 850 | { |
b182ff60 | 851 | struct bch_fs *c = trans->c; |
2a9101a9 | 852 | struct btree_insert_entry *i; |
3f4ab4c1 | 853 | int ret = 0, u64s_delta = 0; |
2a9101a9 | 854 | |
cd8319fd | 855 | trans_for_each_update(trans, i) { |
2e63e180 | 856 | if (i->cached) |
05046a96 | 857 | continue; |
b182ff60 | 858 | |
05046a96 | 859 | u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0; |
2e63e180 | 860 | u64s_delta -= i->old_btree_u64s; |
05046a96 KO |
861 | |
862 | if (!same_leaf_as_next(trans, i)) { | |
863 | if (u64s_delta <= 0) { | |
67e0dd8f | 864 | ret = bch2_foreground_maybe_merge(trans, i->path, |
30ca6ece | 865 | i->level, flags); |
05046a96 KO |
866 | if (unlikely(ret)) |
867 | return ret; | |
868 | } | |
b182ff60 | 869 | |
05046a96 | 870 | u64s_delta = 0; |
b182ff60 KO |
871 | } |
872 | } | |
873 | ||
3b8c4507 | 874 | ret = bch2_trans_lock_write(trans); |
caaa66aa KO |
875 | if (unlikely(ret)) |
876 | return ret; | |
b7ba66c8 | 877 | |
30ca6ece | 878 | ret = bch2_trans_commit_write_locked(trans, flags, stopped_at, trace_ip); |
b7ba66c8 | 879 | |
5222a460 | 880 | if (!ret && unlikely(trans->journal_replay_not_finished)) |
dfd41fb9 KO |
881 | bch2_drop_overwrites_from_journal(trans); |
882 | ||
3b8c4507 | 883 | bch2_trans_unlock_write(trans); |
2a9101a9 | 884 | |
00b8ccf7 | 885 | if (!ret && trans->journal_pin) |
b182ff60 | 886 | bch2_journal_pin_add(&c->journal, trans->journal_res.seq, |
00b8ccf7 KO |
887 | trans->journal_pin, NULL); |
888 | ||
2a9101a9 KO |
889 | /* |
890 | * Drop journal reservation after dropping write locks, since dropping | |
891 | * the journal reservation may kick off a journal write: | |
892 | */ | |
b182ff60 | 893 | bch2_journal_res_put(&c->journal, &trans->journal_res); |
2a9101a9 | 894 | |
be9e782d | 895 | return ret; |
1c6fdbd8 KO |
896 | } |
897 | ||
24db24c7 KO |
898 | static int journal_reclaim_wait_done(struct bch_fs *c) |
899 | { | |
0ef10785 KO |
900 | int ret = bch2_journal_error(&c->journal) ?: |
901 | !bch2_btree_key_cache_must_wait(c); | |
24db24c7 KO |
902 | |
903 | if (!ret) | |
0ef10785 | 904 | journal_reclaim_kick(&c->journal); |
24db24c7 KO |
905 | return ret; |
906 | } | |
907 | ||
11e6f19a | 908 | static noinline |
30ca6ece | 909 | int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, |
11e6f19a | 910 | struct btree_insert_entry *i, |
531a0095 | 911 | int ret, unsigned long trace_ip) |
1c6fdbd8 KO |
912 | { |
913 | struct bch_fs *c = trans->c; | |
1c6fdbd8 | 914 | |
1d25849c | 915 | switch (ret) { |
ac9fa4bd | 916 | case -BCH_ERR_btree_insert_btree_node_full: |
30ca6ece | 917 | ret = bch2_btree_split_leaf(trans, i->path, flags); |
549d173c | 918 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
674cfc26 | 919 | trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path); |
1d25849c | 920 | break; |
ac9fa4bd | 921 | case -BCH_ERR_btree_insert_need_mark_replicas: |
49c7cd9d KO |
922 | ret = drop_locks_do(trans, |
923 | bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas)); | |
1d25849c | 924 | break; |
87ced107 | 925 | case -BCH_ERR_journal_res_get_blocked: |
01e691e8 KO |
926 | /* |
927 | * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK | |
928 | * flag | |
929 | */ | |
30ca6ece | 930 | if ((flags & BTREE_INSERT_JOURNAL_RECLAIM) && |
ec14fc60 | 931 | (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) { |
549d173c | 932 | ret = -BCH_ERR_journal_reclaim_would_deadlock; |
e5af273f KO |
933 | break; |
934 | } | |
2940295c | 935 | |
49c7cd9d KO |
936 | ret = drop_locks_do(trans, |
937 | bch2_trans_journal_res_get(trans, | |
ec14fc60 | 938 | (flags & BCH_WATERMARK_MASK)| |
49c7cd9d | 939 | JOURNAL_RES_GET_CHECK)); |
d5425a3b | 940 | break; |
ac9fa4bd | 941 | case -BCH_ERR_btree_insert_need_journal_reclaim: |
d5425a3b KO |
942 | bch2_trans_unlock(trans); |
943 | ||
674cfc26 | 944 | trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); |
4f6dad46 | 945 | |
0ef10785 KO |
946 | wait_event_freezable(c->journal.reclaim_wait, |
947 | (ret = journal_reclaim_wait_done(c))); | |
24db24c7 | 948 | if (ret < 0) |
e5af273f | 949 | break; |
d5425a3b | 950 | |
549d173c | 951 | ret = bch2_trans_relock(trans); |
9623ab27 | 952 | break; |
920e69bc KO |
953 | case -BCH_ERR_btree_insert_need_flush_buffer: { |
954 | struct btree_write_buffer *wb = &c->btree_write_buffer; | |
955 | ||
956 | ret = 0; | |
957 | ||
958 | if (wb->state.nr > wb->size * 3 / 4) { | |
920e69bc | 959 | bch2_trans_unlock(trans); |
920e69bc KO |
960 | mutex_lock(&wb->flush_lock); |
961 | ||
49c7cd9d KO |
962 | if (wb->state.nr > wb->size * 3 / 4) { |
963 | bch2_trans_begin(trans); | |
920e69bc KO |
964 | ret = __bch2_btree_write_buffer_flush(trans, |
965 | flags|BTREE_INSERT_NOCHECK_RW, true); | |
49c7cd9d KO |
966 | if (!ret) { |
967 | trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); | |
968 | ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); | |
969 | } | |
970 | } else { | |
920e69bc | 971 | mutex_unlock(&wb->flush_lock); |
49c7cd9d | 972 | ret = bch2_trans_relock(trans); |
920e69bc KO |
973 | } |
974 | } | |
975 | break; | |
976 | } | |
1d25849c KO |
977 | default: |
978 | BUG_ON(ret >= 0); | |
979 | break; | |
1c6fdbd8 KO |
980 | } |
981 | ||
549d173c | 982 | BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); |
098ef98d KO |
983 | |
984 | bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) && | |
30ca6ece KO |
985 | !(flags & BTREE_INSERT_NOWAIT) && |
986 | (flags & BTREE_INSERT_NOFAIL), c, | |
098ef98d | 987 | "%s: incorrectly got %s\n", __func__, bch2_err_str(ret)); |
a6336910 | 988 | |
11e6f19a KO |
989 | return ret; |
990 | } | |
991 | ||
2a9101a9 | 992 | static noinline int |
30ca6ece | 993 | bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags) |
11e6f19a KO |
994 | { |
995 | struct bch_fs *c = trans->c; | |
11e6f19a KO |
996 | int ret; |
997 | ||
30ca6ece | 998 | if (likely(!(flags & BTREE_INSERT_LAZY_RW)) || |
6214485b | 999 | test_bit(BCH_FS_STARTED, &c->flags)) |
858536c7 | 1000 | return -BCH_ERR_erofs_trans_commit; |
11e6f19a | 1001 | |
b5fd7566 | 1002 | ret = drop_locks_do(trans, bch2_fs_read_write_early(c)); |
2a9101a9 KO |
1003 | if (ret) |
1004 | return ret; | |
11e6f19a | 1005 | |
d94189ad | 1006 | bch2_write_ref_get(c, BCH_WRITE_REF_trans); |
2a9101a9 | 1007 | return 0; |
1c6fdbd8 KO |
1008 | } |
1009 | ||
78c8fe20 KO |
1010 | /* |
1011 | * This is for updates done in the early part of fsck - btree_gc - before we've | |
1012 | * gone RW. we only add the new key to the list of keys for journal replay to | |
1013 | * do. | |
1014 | */ | |
1015 | static noinline int | |
1016 | do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) | |
1017 | { | |
1018 | struct bch_fs *c = trans->c; | |
1019 | struct btree_insert_entry *i; | |
1020 | int ret = 0; | |
1021 | ||
1022 | trans_for_each_update(trans, i) { | |
1023 | ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); | |
1024 | if (ret) | |
1025 | break; | |
1026 | } | |
1027 | ||
1028 | return ret; | |
1029 | } | |
1030 | ||
30ca6ece | 1031 | int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) |
1c6fdbd8 | 1032 | { |
fb64f3fd | 1033 | struct bch_fs *c = trans->c; |
d9b022fe | 1034 | struct btree_insert_entry *i = NULL; |
920e69bc | 1035 | struct btree_write_buffered_key *wb; |
dc3b63dc | 1036 | int ret = 0; |
1c6fdbd8 | 1037 | |
9f1833ca | 1038 | if (!trans->nr_updates && |
920e69bc | 1039 | !trans->nr_wb_updates && |
2a6870ad | 1040 | !trans->extra_journal_entries.nr) |
b7cf4bd7 | 1041 | goto out_reset; |
1c6fdbd8 | 1042 | |
30ca6ece | 1043 | if (flags & BTREE_INSERT_GC_LOCK_HELD) |
fb64f3fd | 1044 | lockdep_assert_held(&c->gc_lock); |
11e6f19a | 1045 | |
06a98c96 KO |
1046 | ret = bch2_trans_commit_run_triggers(trans); |
1047 | if (ret) | |
1048 | goto out_reset; | |
1049 | ||
cc07773f KO |
1050 | trans_for_each_update(trans, i) { |
1051 | struct printbuf buf = PRINTBUF; | |
1052 | enum bkey_invalid_flags invalid_flags = 0; | |
1053 | ||
1054 | if (!(flags & BTREE_INSERT_JOURNAL_REPLAY)) | |
1055 | invalid_flags |= BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT; | |
1056 | ||
1057 | if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), | |
1058 | i->bkey_type, invalid_flags, &buf))) | |
50a38ca1 | 1059 | ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); |
cc07773f KO |
1060 | btree_insert_entry_checks(trans, i); |
1061 | printbuf_exit(&buf); | |
1062 | ||
1063 | if (ret) | |
1064 | return ret; | |
1065 | } | |
cc07773f | 1066 | |
78c8fe20 KO |
1067 | if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) { |
1068 | ret = do_bch2_trans_commit_to_journal_replay(trans); | |
1069 | goto out_reset; | |
1070 | } | |
1071 | ||
30ca6ece | 1072 | if (!(flags & BTREE_INSERT_NOCHECK_RW) && |
d94189ad | 1073 | unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { |
30ca6ece | 1074 | ret = bch2_trans_commit_get_rw_cold(trans, flags); |
06a98c96 KO |
1075 | if (ret) |
1076 | goto out_reset; | |
1077 | } | |
1078 | ||
920e69bc KO |
1079 | if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 && |
1080 | mutex_trylock(&c->btree_write_buffer.flush_lock)) { | |
1081 | bch2_trans_begin(trans); | |
1082 | bch2_trans_unlock(trans); | |
1083 | ||
1084 | ret = __bch2_btree_write_buffer_flush(trans, | |
1085 | flags|BTREE_INSERT_NOCHECK_RW, true); | |
1086 | if (!ret) { | |
1087 | trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); | |
1088 | ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); | |
1089 | } | |
1090 | goto out; | |
1091 | } | |
1092 | ||
c0960603 KO |
1093 | EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); |
1094 | ||
2a6870ad | 1095 | trans->journal_u64s = trans->extra_journal_entries.nr; |
fb64f3fd | 1096 | trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); |
fb64f3fd | 1097 | if (trans->journal_transaction_names) |
43ddf448 | 1098 | trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); |
fb64f3fd | 1099 | |
8b3bbe2c | 1100 | trans_for_each_update(trans, i) { |
c9ee99ad | 1101 | EBUG_ON(!i->path->should_be_locked); |
9f631dc1 | 1102 | |
367d72dd KO |
1103 | ret = bch2_btree_path_upgrade(trans, i->path, i->level + 1); |
1104 | if (unlikely(ret)) | |
6333bd2f | 1105 | goto out; |
6333bd2f | 1106 | |
c9ee99ad | 1107 | EBUG_ON(!btree_node_intent_locked(i->path, i->level)); |
5c1d808a | 1108 | |
cb685ce7 KO |
1109 | if (i->key_cache_already_flushed) |
1110 | continue; | |
1111 | ||
cb685ce7 KO |
1112 | if (i->flags & BTREE_UPDATE_NOJOURNAL) |
1113 | continue; | |
1114 | ||
006ccc30 KO |
1115 | /* we're going to journal the key being updated: */ |
1116 | trans->journal_u64s += jset_u64s(i->k->k.u64s); | |
cb685ce7 KO |
1117 | |
1118 | /* and we're also going to log the overwrite: */ | |
1119 | if (trans->journal_transaction_names) | |
1120 | trans->journal_u64s += jset_u64s(i->old_k.u64s); | |
8b3bbe2c | 1121 | } |
a49e9a05 | 1122 | |
920e69bc KO |
1123 | trans_for_each_wb_update(trans, wb) |
1124 | trans->journal_u64s += jset_u64s(wb->k.k.u64s); | |
1125 | ||
a49e9a05 | 1126 | if (trans->extra_journal_res) { |
fb64f3fd | 1127 | ret = bch2_disk_reservation_add(c, trans->disk_res, |
a49e9a05 | 1128 | trans->extra_journal_res, |
30ca6ece | 1129 | (flags & BTREE_INSERT_NOFAIL) |
a49e9a05 KO |
1130 | ? BCH_DISK_RESERVATION_NOFAIL : 0); |
1131 | if (ret) | |
1132 | goto err; | |
1133 | } | |
2a9101a9 | 1134 | retry: |
12344c7c | 1135 | bch2_trans_verify_not_in_restart(trans); |
2a9101a9 | 1136 | memset(&trans->journal_res, 0, sizeof(trans->journal_res)); |
134915f3 | 1137 | |
30ca6ece | 1138 | ret = do_bch2_trans_commit(trans, flags, &i, _RET_IP_); |
134915f3 | 1139 | |
2a9101a9 | 1140 | /* make sure we didn't drop or screw up locks: */ |
a0a56879 | 1141 | bch2_trans_verify_locks(trans); |
2a9101a9 | 1142 | |
11e6f19a KO |
1143 | if (ret) |
1144 | goto err; | |
1f93726e | 1145 | |
674cfc26 | 1146 | trace_and_count(c, transaction_commit, trans, _RET_IP_); |
11e6f19a | 1147 | out: |
30ca6ece | 1148 | if (likely(!(flags & BTREE_INSERT_NOCHECK_RW))) |
d94189ad | 1149 | bch2_write_ref_put(c, BCH_WRITE_REF_trans); |
b7cf4bd7 | 1150 | out_reset: |
be9e782d KO |
1151 | if (!ret) |
1152 | bch2_trans_downgrade(trans); | |
0fbf71f8 | 1153 | bch2_trans_reset_updates(trans); |
955af634 | 1154 | |
0dc17247 | 1155 | return ret; |
11e6f19a | 1156 | err: |
30ca6ece | 1157 | ret = bch2_trans_commit_error(trans, flags, i, ret, _RET_IP_); |
2a9101a9 KO |
1158 | if (ret) |
1159 | goto out; | |
932aa837 | 1160 | |
2a9101a9 | 1161 | goto retry; |
1c6fdbd8 | 1162 | } |