]> git.ipfire.org Git - thirdparty/linux.git/blame - fs/btrfs/delayed-inode.c
btrfs: use KMEM_CACHE() to create btrfs_delayed_node cache
[thirdparty/linux.git] / fs / btrfs / delayed-inode.c
CommitLineData
c1d7c514 1// SPDX-License-Identifier: GPL-2.0
16cdcec7
MX
2/*
3 * Copyright (C) 2011 Fujitsu. All rights reserved.
4 * Written by Miao Xie <miaox@cn.fujitsu.com>
16cdcec7
MX
5 */
6
7#include <linux/slab.h>
c7f88c4e 8#include <linux/iversion.h>
ec8eb376
JB
9#include "ctree.h"
10#include "fs.h"
9b569ea0 11#include "messages.h"
602cbe91 12#include "misc.h"
16cdcec7
MX
13#include "delayed-inode.h"
14#include "disk-io.h"
15#include "transaction.h"
4f5427cc 16#include "qgroup.h"
1f95ec01 17#include "locking.h"
26c2c454 18#include "inode-item.h"
f1e5c618 19#include "space-info.h"
07e81dc9 20#include "accessors.h"
7c8ede16 21#include "file-item.h"
16cdcec7 22
de3cb945
CM
23#define BTRFS_DELAYED_WRITEBACK 512
24#define BTRFS_DELAYED_BACKGROUND 128
25#define BTRFS_DELAYED_BATCH 16
16cdcec7
MX
26
27static struct kmem_cache *delayed_node_cache;
28
29int __init btrfs_delayed_inode_init(void)
30{
625c1e06 31 delayed_node_cache = KMEM_CACHE(btrfs_delayed_node, SLAB_MEM_SPREAD);
16cdcec7
MX
32 if (!delayed_node_cache)
33 return -ENOMEM;
34 return 0;
35}
36
e67c718b 37void __cold btrfs_delayed_inode_exit(void)
16cdcec7 38{
5598e900 39 kmem_cache_destroy(delayed_node_cache);
16cdcec7
MX
40}
41
585ab692
DS
42void btrfs_init_delayed_root(struct btrfs_delayed_root *delayed_root)
43{
44 atomic_set(&delayed_root->items, 0);
45 atomic_set(&delayed_root->items_seq, 0);
46 delayed_root->nodes = 0;
47 spin_lock_init(&delayed_root->lock);
48 init_waitqueue_head(&delayed_root->wait);
49 INIT_LIST_HEAD(&delayed_root->node_list);
50 INIT_LIST_HEAD(&delayed_root->prepare_list);
51}
52
16cdcec7
MX
53static inline void btrfs_init_delayed_node(
54 struct btrfs_delayed_node *delayed_node,
55 struct btrfs_root *root, u64 inode_id)
56{
57 delayed_node->root = root;
58 delayed_node->inode_id = inode_id;
6de5f18e 59 refcount_set(&delayed_node->refs, 0);
03a1d4c8
LB
60 delayed_node->ins_root = RB_ROOT_CACHED;
61 delayed_node->del_root = RB_ROOT_CACHED;
16cdcec7 62 mutex_init(&delayed_node->mutex);
16cdcec7
MX
63 INIT_LIST_HEAD(&delayed_node->n_list);
64 INIT_LIST_HEAD(&delayed_node->p_list);
16cdcec7
MX
65}
66
f85b7379
DS
67static struct btrfs_delayed_node *btrfs_get_delayed_node(
68 struct btrfs_inode *btrfs_inode)
16cdcec7 69{
16cdcec7 70 struct btrfs_root *root = btrfs_inode->root;
4a0cc7ca 71 u64 ino = btrfs_ino(btrfs_inode);
2f7e33d4 72 struct btrfs_delayed_node *node;
16cdcec7 73
20c7bcec 74 node = READ_ONCE(btrfs_inode->delayed_node);
16cdcec7 75 if (node) {
6de5f18e 76 refcount_inc(&node->refs);
16cdcec7
MX
77 return node;
78 }
79
80 spin_lock(&root->inode_lock);
6140ba8a 81 node = xa_load(&root->delayed_nodes, ino);
ec35e48b 82
16cdcec7
MX
83 if (node) {
84 if (btrfs_inode->delayed_node) {
6de5f18e 85 refcount_inc(&node->refs); /* can be accessed */
2f7e33d4 86 BUG_ON(btrfs_inode->delayed_node != node);
16cdcec7 87 spin_unlock(&root->inode_lock);
2f7e33d4 88 return node;
16cdcec7 89 }
ec35e48b
CM
90
91 /*
92 * It's possible that we're racing into the middle of removing
6140ba8a 93 * this node from the xarray. In this case, the refcount
ec35e48b 94 * was zero and it should never go back to one. Just return
6140ba8a 95 * NULL like it was never in the xarray at all; our release
ec35e48b
CM
96 * function is in the process of removing it.
97 *
98 * Some implementations of refcount_inc refuse to bump the
99 * refcount once it has hit zero. If we don't do this dance
100 * here, refcount_inc() may decide to just WARN_ONCE() instead
101 * of actually bumping the refcount.
102 *
6140ba8a 103 * If this node is properly in the xarray, we want to bump the
ec35e48b
CM
104 * refcount twice, once for the inode and once for this get
105 * operation.
106 */
107 if (refcount_inc_not_zero(&node->refs)) {
108 refcount_inc(&node->refs);
109 btrfs_inode->delayed_node = node;
110 } else {
111 node = NULL;
112 }
113
16cdcec7
MX
114 spin_unlock(&root->inode_lock);
115 return node;
116 }
117 spin_unlock(&root->inode_lock);
118
2f7e33d4
MX
119 return NULL;
120}
121
79787eaa 122/* Will return either the node or PTR_ERR(-ENOMEM) */
2f7e33d4 123static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
f85b7379 124 struct btrfs_inode *btrfs_inode)
2f7e33d4
MX
125{
126 struct btrfs_delayed_node *node;
2f7e33d4 127 struct btrfs_root *root = btrfs_inode->root;
4a0cc7ca 128 u64 ino = btrfs_ino(btrfs_inode);
2f7e33d4 129 int ret;
6140ba8a 130 void *ptr;
2f7e33d4 131
088aea3b
DS
132again:
133 node = btrfs_get_delayed_node(btrfs_inode);
134 if (node)
135 return node;
2f7e33d4 136
088aea3b
DS
137 node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
138 if (!node)
139 return ERR_PTR(-ENOMEM);
140 btrfs_init_delayed_node(node, root, ino);
16cdcec7 141
6140ba8a 142 /* Cached in the inode and can be accessed. */
088aea3b 143 refcount_set(&node->refs, 2);
16cdcec7 144
6140ba8a
DS
145 /* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */
146 ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS);
147 if (ret == -ENOMEM) {
088aea3b 148 kmem_cache_free(delayed_node_cache, node);
6140ba8a 149 return ERR_PTR(-ENOMEM);
088aea3b 150 }
088aea3b 151 spin_lock(&root->inode_lock);
6140ba8a
DS
152 ptr = xa_load(&root->delayed_nodes, ino);
153 if (ptr) {
154 /* Somebody inserted it, go back and read it. */
088aea3b
DS
155 spin_unlock(&root->inode_lock);
156 kmem_cache_free(delayed_node_cache, node);
6140ba8a 157 node = NULL;
088aea3b
DS
158 goto again;
159 }
6140ba8a
DS
160 ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC);
161 ASSERT(xa_err(ptr) != -EINVAL);
162 ASSERT(xa_err(ptr) != -ENOMEM);
163 ASSERT(ptr == NULL);
16cdcec7
MX
164 btrfs_inode->delayed_node = node;
165 spin_unlock(&root->inode_lock);
16cdcec7
MX
166
167 return node;
168}
169
170/*
171 * Call it when holding delayed_node->mutex
172 *
173 * If mod = 1, add this node into the prepared list.
174 */
175static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
176 struct btrfs_delayed_node *node,
177 int mod)
178{
179 spin_lock(&root->lock);
7cf35d91 180 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
16cdcec7
MX
181 if (!list_empty(&node->p_list))
182 list_move_tail(&node->p_list, &root->prepare_list);
183 else if (mod)
184 list_add_tail(&node->p_list, &root->prepare_list);
185 } else {
186 list_add_tail(&node->n_list, &root->node_list);
187 list_add_tail(&node->p_list, &root->prepare_list);
6de5f18e 188 refcount_inc(&node->refs); /* inserted into list */
16cdcec7 189 root->nodes++;
7cf35d91 190 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
16cdcec7
MX
191 }
192 spin_unlock(&root->lock);
193}
194
195/* Call it when holding delayed_node->mutex */
196static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
197 struct btrfs_delayed_node *node)
198{
199 spin_lock(&root->lock);
7cf35d91 200 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
16cdcec7 201 root->nodes--;
6de5f18e 202 refcount_dec(&node->refs); /* not in the list */
16cdcec7
MX
203 list_del_init(&node->n_list);
204 if (!list_empty(&node->p_list))
205 list_del_init(&node->p_list);
7cf35d91 206 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
16cdcec7
MX
207 }
208 spin_unlock(&root->lock);
209}
210
48a3b636 211static struct btrfs_delayed_node *btrfs_first_delayed_node(
16cdcec7
MX
212 struct btrfs_delayed_root *delayed_root)
213{
214 struct list_head *p;
215 struct btrfs_delayed_node *node = NULL;
216
217 spin_lock(&delayed_root->lock);
218 if (list_empty(&delayed_root->node_list))
219 goto out;
220
221 p = delayed_root->node_list.next;
222 node = list_entry(p, struct btrfs_delayed_node, n_list);
6de5f18e 223 refcount_inc(&node->refs);
16cdcec7
MX
224out:
225 spin_unlock(&delayed_root->lock);
226
227 return node;
228}
229
48a3b636 230static struct btrfs_delayed_node *btrfs_next_delayed_node(
16cdcec7
MX
231 struct btrfs_delayed_node *node)
232{
233 struct btrfs_delayed_root *delayed_root;
234 struct list_head *p;
235 struct btrfs_delayed_node *next = NULL;
236
237 delayed_root = node->root->fs_info->delayed_root;
238 spin_lock(&delayed_root->lock);
7cf35d91
MX
239 if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
240 /* not in the list */
16cdcec7
MX
241 if (list_empty(&delayed_root->node_list))
242 goto out;
243 p = delayed_root->node_list.next;
244 } else if (list_is_last(&node->n_list, &delayed_root->node_list))
245 goto out;
246 else
247 p = node->n_list.next;
248
249 next = list_entry(p, struct btrfs_delayed_node, n_list);
6de5f18e 250 refcount_inc(&next->refs);
16cdcec7
MX
251out:
252 spin_unlock(&delayed_root->lock);
253
254 return next;
255}
256
257static void __btrfs_release_delayed_node(
258 struct btrfs_delayed_node *delayed_node,
259 int mod)
260{
261 struct btrfs_delayed_root *delayed_root;
262
263 if (!delayed_node)
264 return;
265
266 delayed_root = delayed_node->root->fs_info->delayed_root;
267
268 mutex_lock(&delayed_node->mutex);
269 if (delayed_node->count)
270 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
271 else
272 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
273 mutex_unlock(&delayed_node->mutex);
274
6de5f18e 275 if (refcount_dec_and_test(&delayed_node->refs)) {
16cdcec7 276 struct btrfs_root *root = delayed_node->root;
ec35e48b 277
16cdcec7 278 spin_lock(&root->inode_lock);
ec35e48b
CM
279 /*
280 * Once our refcount goes to zero, nobody is allowed to bump it
281 * back up. We can delete it now.
282 */
283 ASSERT(refcount_read(&delayed_node->refs) == 0);
6140ba8a 284 xa_erase(&root->delayed_nodes, delayed_node->inode_id);
16cdcec7 285 spin_unlock(&root->inode_lock);
ec35e48b 286 kmem_cache_free(delayed_node_cache, delayed_node);
16cdcec7
MX
287 }
288}
289
290static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
291{
292 __btrfs_release_delayed_node(node, 0);
293}
294
48a3b636 295static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
16cdcec7
MX
296 struct btrfs_delayed_root *delayed_root)
297{
298 struct list_head *p;
299 struct btrfs_delayed_node *node = NULL;
300
301 spin_lock(&delayed_root->lock);
302 if (list_empty(&delayed_root->prepare_list))
303 goto out;
304
305 p = delayed_root->prepare_list.next;
306 list_del_init(p);
307 node = list_entry(p, struct btrfs_delayed_node, p_list);
6de5f18e 308 refcount_inc(&node->refs);
16cdcec7
MX
309out:
310 spin_unlock(&delayed_root->lock);
311
312 return node;
313}
314
315static inline void btrfs_release_prepared_delayed_node(
316 struct btrfs_delayed_node *node)
317{
318 __btrfs_release_delayed_node(node, 1);
319}
320
4c469798
FM
321static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len,
322 struct btrfs_delayed_node *node,
323 enum btrfs_delayed_item_type type)
16cdcec7
MX
324{
325 struct btrfs_delayed_item *item;
4c469798 326
75f5f60b 327 item = kmalloc(struct_size(item, data, data_len), GFP_NOFS);
16cdcec7
MX
328 if (item) {
329 item->data_len = data_len;
4c469798 330 item->type = type;
16cdcec7 331 item->bytes_reserved = 0;
96d89923
FM
332 item->delayed_node = node;
333 RB_CLEAR_NODE(&item->rb_node);
30b80f3c
FM
334 INIT_LIST_HEAD(&item->log_list);
335 item->logged = false;
089e77e1 336 refcount_set(&item->refs, 1);
16cdcec7
MX
337 }
338 return item;
339}
340
341/*
9580503b
DS
342 * Look up the delayed item by key.
343 *
16cdcec7 344 * @delayed_node: pointer to the delayed node
96d89923 345 * @index: the dir index value to lookup (offset of a dir index key)
16cdcec7
MX
346 *
347 * Note: if we don't find the right item, we will return the prev item and
348 * the next item.
349 */
350static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
351 struct rb_root *root,
4cbf37f5 352 u64 index)
16cdcec7 353{
4cbf37f5 354 struct rb_node *node = root->rb_node;
16cdcec7 355 struct btrfs_delayed_item *delayed_item = NULL;
16cdcec7
MX
356
357 while (node) {
358 delayed_item = rb_entry(node, struct btrfs_delayed_item,
359 rb_node);
96d89923 360 if (delayed_item->index < index)
16cdcec7 361 node = node->rb_right;
96d89923 362 else if (delayed_item->index > index)
16cdcec7
MX
363 node = node->rb_left;
364 else
365 return delayed_item;
366 }
367
16cdcec7
MX
368 return NULL;
369}
370
16cdcec7 371static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
c9d02ab4 372 struct btrfs_delayed_item *ins)
16cdcec7
MX
373{
374 struct rb_node **p, *node;
375 struct rb_node *parent_node = NULL;
03a1d4c8 376 struct rb_root_cached *root;
16cdcec7 377 struct btrfs_delayed_item *item;
03a1d4c8 378 bool leftmost = true;
16cdcec7 379
4c469798 380 if (ins->type == BTRFS_DELAYED_INSERTION_ITEM)
16cdcec7 381 root = &delayed_node->ins_root;
16cdcec7 382 else
4c469798
FM
383 root = &delayed_node->del_root;
384
03a1d4c8 385 p = &root->rb_root.rb_node;
16cdcec7
MX
386 node = &ins->rb_node;
387
388 while (*p) {
389 parent_node = *p;
390 item = rb_entry(parent_node, struct btrfs_delayed_item,
391 rb_node);
392
96d89923 393 if (item->index < ins->index) {
16cdcec7 394 p = &(*p)->rb_right;
03a1d4c8 395 leftmost = false;
96d89923 396 } else if (item->index > ins->index) {
16cdcec7 397 p = &(*p)->rb_left;
03a1d4c8 398 } else {
16cdcec7 399 return -EEXIST;
03a1d4c8 400 }
16cdcec7
MX
401 }
402
403 rb_link_node(node, parent_node, p);
03a1d4c8 404 rb_insert_color_cached(node, root, leftmost);
a176affe 405
4c469798 406 if (ins->type == BTRFS_DELAYED_INSERTION_ITEM &&
96d89923
FM
407 ins->index >= delayed_node->index_cnt)
408 delayed_node->index_cnt = ins->index + 1;
16cdcec7
MX
409
410 delayed_node->count++;
411 atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
412 return 0;
413}
414
de3cb945
CM
415static void finish_one_item(struct btrfs_delayed_root *delayed_root)
416{
417 int seq = atomic_inc_return(&delayed_root->items_seq);
ee863954 418
093258e6 419 /* atomic_dec_return implies a barrier */
de3cb945 420 if ((atomic_dec_return(&delayed_root->items) <
093258e6
DS
421 BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
422 cond_wake_up_nomb(&delayed_root->wait);
de3cb945
CM
423}
424
16cdcec7
MX
425static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
426{
a57c2d4e 427 struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
03a1d4c8 428 struct rb_root_cached *root;
16cdcec7
MX
429 struct btrfs_delayed_root *delayed_root;
430
96d89923
FM
431 /* Not inserted, ignore it. */
432 if (RB_EMPTY_NODE(&delayed_item->rb_node))
933c22a7 433 return;
96d89923 434
a57c2d4e
FM
435 /* If it's in a rbtree, then we need to have delayed node locked. */
436 lockdep_assert_held(&delayed_node->mutex);
437
438 delayed_root = delayed_node->root->fs_info->delayed_root;
16cdcec7 439
4c469798 440 if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
a57c2d4e 441 root = &delayed_node->ins_root;
16cdcec7 442 else
a57c2d4e 443 root = &delayed_node->del_root;
16cdcec7 444
03a1d4c8 445 rb_erase_cached(&delayed_item->rb_node, root);
96d89923 446 RB_CLEAR_NODE(&delayed_item->rb_node);
a57c2d4e 447 delayed_node->count--;
de3cb945
CM
448
449 finish_one_item(delayed_root);
16cdcec7
MX
450}
451
452static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
453{
454 if (item) {
455 __btrfs_remove_delayed_item(item);
089e77e1 456 if (refcount_dec_and_test(&item->refs))
16cdcec7
MX
457 kfree(item);
458 }
459}
460
48a3b636 461static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
16cdcec7
MX
462 struct btrfs_delayed_node *delayed_node)
463{
464 struct rb_node *p;
465 struct btrfs_delayed_item *item = NULL;
466
03a1d4c8 467 p = rb_first_cached(&delayed_node->ins_root);
16cdcec7
MX
468 if (p)
469 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
470
471 return item;
472}
473
48a3b636 474static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
16cdcec7
MX
475 struct btrfs_delayed_node *delayed_node)
476{
477 struct rb_node *p;
478 struct btrfs_delayed_item *item = NULL;
479
03a1d4c8 480 p = rb_first_cached(&delayed_node->del_root);
16cdcec7
MX
481 if (p)
482 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
483
484 return item;
485}
486
48a3b636 487static struct btrfs_delayed_item *__btrfs_next_delayed_item(
16cdcec7
MX
488 struct btrfs_delayed_item *item)
489{
490 struct rb_node *p;
491 struct btrfs_delayed_item *next = NULL;
492
493 p = rb_next(&item->rb_node);
494 if (p)
495 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
496
497 return next;
498}
499
16cdcec7 500static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
16cdcec7
MX
501 struct btrfs_delayed_item *item)
502{
503 struct btrfs_block_rsv *src_rsv;
504 struct btrfs_block_rsv *dst_rsv;
df492881 505 struct btrfs_fs_info *fs_info = trans->fs_info;
16cdcec7
MX
506 u64 num_bytes;
507 int ret;
508
509 if (!trans->bytes_reserved)
510 return 0;
511
512 src_rsv = trans->block_rsv;
0b246afa 513 dst_rsv = &fs_info->delayed_block_rsv;
16cdcec7 514
2bd36e7b 515 num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
f218ea6c
QW
516
517 /*
518 * Here we migrate space rsv from transaction rsv, since have already
519 * reserved space when starting a transaction. So no need to reserve
520 * qgroup space here.
521 */
3a584174 522 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
8c2a3ca2 523 if (!ret) {
0b246afa 524 trace_btrfs_space_reservation(fs_info, "delayed_item",
96d89923 525 item->delayed_node->inode_id,
8c2a3ca2 526 num_bytes, 1);
763748b2
FM
527 /*
528 * For insertions we track reserved metadata space by accounting
529 * for the number of leaves that will be used, based on the delayed
01fc062b 530 * node's curr_index_batch_size and index_item_leaves fields.
763748b2 531 */
4c469798 532 if (item->type == BTRFS_DELAYED_DELETION_ITEM)
763748b2 533 item->bytes_reserved = num_bytes;
8c2a3ca2 534 }
16cdcec7
MX
535
536 return ret;
537}
538
4f5427cc 539static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
16cdcec7
MX
540 struct btrfs_delayed_item *item)
541{
19fd2949 542 struct btrfs_block_rsv *rsv;
4f5427cc 543 struct btrfs_fs_info *fs_info = root->fs_info;
19fd2949 544
16cdcec7
MX
545 if (!item->bytes_reserved)
546 return;
547
0b246afa 548 rsv = &fs_info->delayed_block_rsv;
f218ea6c
QW
549 /*
550 * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
551 * to release/reserve qgroup space.
552 */
0b246afa 553 trace_btrfs_space_reservation(fs_info, "delayed_item",
96d89923
FM
554 item->delayed_node->inode_id,
555 item->bytes_reserved, 0);
63f018be 556 btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
16cdcec7
MX
557}
558
763748b2
FM
559static void btrfs_delayed_item_release_leaves(struct btrfs_delayed_node *node,
560 unsigned int num_leaves)
561{
562 struct btrfs_fs_info *fs_info = node->root->fs_info;
563 const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, num_leaves);
564
565 /* There are no space reservations during log replay, bail out. */
566 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
567 return;
568
569 trace_btrfs_space_reservation(fs_info, "delayed_item", node->inode_id,
570 bytes, 0);
571 btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv, bytes, NULL);
572}
573
16cdcec7
MX
574static int btrfs_delayed_inode_reserve_metadata(
575 struct btrfs_trans_handle *trans,
576 struct btrfs_root *root,
577 struct btrfs_delayed_node *node)
578{
0b246afa 579 struct btrfs_fs_info *fs_info = root->fs_info;
16cdcec7
MX
580 struct btrfs_block_rsv *src_rsv;
581 struct btrfs_block_rsv *dst_rsv;
582 u64 num_bytes;
583 int ret;
584
16cdcec7 585 src_rsv = trans->block_rsv;
0b246afa 586 dst_rsv = &fs_info->delayed_block_rsv;
16cdcec7 587
bcacf5f3 588 num_bytes = btrfs_calc_metadata_size(fs_info, 1);
c06a0e12
JB
589
590 /*
591 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
592 * which doesn't reserve space for speed. This is a problem since we
593 * still need to reserve space for this update, so try to reserve the
594 * space.
595 *
596 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
69fe2d75 597 * we always reserve enough to update the inode item.
c06a0e12 598 */
e755d9ab 599 if (!src_rsv || (!trans->bytes_reserved &&
66d8f3dd 600 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
4d14c5cd
NB
601 ret = btrfs_qgroup_reserve_meta(root, num_bytes,
602 BTRFS_QGROUP_RSV_META_PREALLOC, true);
f218ea6c
QW
603 if (ret < 0)
604 return ret;
9270501c 605 ret = btrfs_block_rsv_add(fs_info, dst_rsv, num_bytes,
08e007d2 606 BTRFS_RESERVE_NO_FLUSH);
98686ffc
NB
607 /* NO_FLUSH could only fail with -ENOSPC */
608 ASSERT(ret == 0 || ret == -ENOSPC);
609 if (ret)
0f9c03d8 610 btrfs_qgroup_free_meta_prealloc(root, num_bytes);
98686ffc
NB
611 } else {
612 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
c06a0e12
JB
613 }
614
8c2a3ca2 615 if (!ret) {
0b246afa 616 trace_btrfs_space_reservation(fs_info, "delayed_inode",
8e3c9d3c 617 node->inode_id, num_bytes, 1);
16cdcec7 618 node->bytes_reserved = num_bytes;
8c2a3ca2 619 }
16cdcec7
MX
620
621 return ret;
622}
623
2ff7e61e 624static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
4f5427cc
QW
625 struct btrfs_delayed_node *node,
626 bool qgroup_free)
16cdcec7
MX
627{
628 struct btrfs_block_rsv *rsv;
629
630 if (!node->bytes_reserved)
631 return;
632
0b246afa
JM
633 rsv = &fs_info->delayed_block_rsv;
634 trace_btrfs_space_reservation(fs_info, "delayed_inode",
8c2a3ca2 635 node->inode_id, node->bytes_reserved, 0);
63f018be 636 btrfs_block_rsv_release(fs_info, rsv, node->bytes_reserved, NULL);
4f5427cc
QW
637 if (qgroup_free)
638 btrfs_qgroup_free_meta_prealloc(node->root,
639 node->bytes_reserved);
640 else
641 btrfs_qgroup_convert_reserved_meta(node->root,
642 node->bytes_reserved);
16cdcec7
MX
643 node->bytes_reserved = 0;
644}
645
646/*
06ac264f
FM
647 * Insert a single delayed item or a batch of delayed items, as many as possible
648 * that fit in a leaf. The delayed items (dir index keys) are sorted by their key
649 * in the rbtree, and if there's a gap between two consecutive dir index items,
650 * then it means at some point we had delayed dir indexes to add but they got
651 * removed (by btrfs_delete_delayed_dir_index()) before we attempted to flush them
652 * into the subvolume tree. Dir index keys also have their offsets coming from a
653 * monotonically increasing counter, so we can't get new keys with an offset that
654 * fits within a gap between delayed dir index items.
16cdcec7 655 */
506650dc
FM
656static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
657 struct btrfs_root *root,
658 struct btrfs_path *path,
659 struct btrfs_delayed_item *first_item)
16cdcec7 660{
763748b2
FM
661 struct btrfs_fs_info *fs_info = root->fs_info;
662 struct btrfs_delayed_node *node = first_item->delayed_node;
b7ef5f3a 663 LIST_HEAD(item_list);
506650dc
FM
664 struct btrfs_delayed_item *curr;
665 struct btrfs_delayed_item *next;
763748b2 666 const int max_size = BTRFS_LEAF_DATA_SIZE(fs_info);
b7ef5f3a 667 struct btrfs_item_batch batch;
96d89923 668 struct btrfs_key first_key;
4c469798 669 const u32 first_data_size = first_item->data_len;
506650dc 670 int total_size;
506650dc 671 char *ins_data = NULL;
506650dc 672 int ret;
71b68e9e 673 bool continuous_keys_only = false;
16cdcec7 674
763748b2
FM
675 lockdep_assert_held(&node->mutex);
676
71b68e9e
JB
677 /*
678 * During normal operation the delayed index offset is continuously
679 * increasing, so we can batch insert all items as there will not be any
680 * overlapping keys in the tree.
681 *
682 * The exception to this is log replay, where we may have interleaved
683 * offsets in the tree, so our batch needs to be continuous keys only in
684 * order to ensure we do not end up with out of order items in our leaf.
685 */
686 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
687 continuous_keys_only = true;
688
763748b2
FM
689 /*
690 * For delayed items to insert, we track reserved metadata bytes based
691 * on the number of leaves that we will use.
692 * See btrfs_insert_delayed_dir_index() and
693 * btrfs_delayed_item_reserve_metadata()).
694 */
695 ASSERT(first_item->bytes_reserved == 0);
696
b7ef5f3a 697 list_add_tail(&first_item->tree_list, &item_list);
4c469798 698 batch.total_data_size = first_data_size;
b7ef5f3a 699 batch.nr = 1;
4c469798 700 total_size = first_data_size + sizeof(struct btrfs_item);
506650dc 701 curr = first_item;
16cdcec7 702
506650dc
FM
703 while (true) {
704 int next_size;
16cdcec7 705
16cdcec7 706 next = __btrfs_next_delayed_item(curr);
06ac264f 707 if (!next)
16cdcec7
MX
708 break;
709
71b68e9e
JB
710 /*
711 * We cannot allow gaps in the key space if we're doing log
712 * replay.
713 */
96d89923 714 if (continuous_keys_only && (next->index != curr->index + 1))
71b68e9e
JB
715 break;
716
763748b2
FM
717 ASSERT(next->bytes_reserved == 0);
718
506650dc
FM
719 next_size = next->data_len + sizeof(struct btrfs_item);
720 if (total_size + next_size > max_size)
16cdcec7 721 break;
16cdcec7 722
b7ef5f3a
FM
723 list_add_tail(&next->tree_list, &item_list);
724 batch.nr++;
506650dc 725 total_size += next_size;
b7ef5f3a 726 batch.total_data_size += next->data_len;
506650dc 727 curr = next;
16cdcec7
MX
728 }
729
b7ef5f3a 730 if (batch.nr == 1) {
96d89923
FM
731 first_key.objectid = node->inode_id;
732 first_key.type = BTRFS_DIR_INDEX_KEY;
733 first_key.offset = first_item->index;
734 batch.keys = &first_key;
4c469798 735 batch.data_sizes = &first_data_size;
506650dc 736 } else {
b7ef5f3a
FM
737 struct btrfs_key *ins_keys;
738 u32 *ins_sizes;
506650dc 739 int i = 0;
16cdcec7 740
b7ef5f3a
FM
741 ins_data = kmalloc(batch.nr * sizeof(u32) +
742 batch.nr * sizeof(struct btrfs_key), GFP_NOFS);
506650dc
FM
743 if (!ins_data) {
744 ret = -ENOMEM;
745 goto out;
746 }
747 ins_sizes = (u32 *)ins_data;
b7ef5f3a
FM
748 ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32));
749 batch.keys = ins_keys;
750 batch.data_sizes = ins_sizes;
751 list_for_each_entry(curr, &item_list, tree_list) {
96d89923
FM
752 ins_keys[i].objectid = node->inode_id;
753 ins_keys[i].type = BTRFS_DIR_INDEX_KEY;
754 ins_keys[i].offset = curr->index;
506650dc
FM
755 ins_sizes[i] = curr->data_len;
756 i++;
757 }
16cdcec7
MX
758 }
759
b7ef5f3a 760 ret = btrfs_insert_empty_items(trans, root, path, &batch);
506650dc
FM
761 if (ret)
762 goto out;
16cdcec7 763
b7ef5f3a 764 list_for_each_entry(curr, &item_list, tree_list) {
506650dc 765 char *data_ptr;
16cdcec7 766
506650dc
FM
767 data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
768 write_extent_buffer(path->nodes[0], &curr->data,
769 (unsigned long)data_ptr, curr->data_len);
770 path->slots[0]++;
771 }
16cdcec7 772
506650dc
FM
773 /*
774 * Now release our path before releasing the delayed items and their
775 * metadata reservations, so that we don't block other tasks for more
776 * time than needed.
777 */
778 btrfs_release_path(path);
16cdcec7 779
763748b2
FM
780 ASSERT(node->index_item_leaves > 0);
781
71b68e9e
JB
782 /*
783 * For normal operations we will batch an entire leaf's worth of delayed
784 * items, so if there are more items to process we can decrement
785 * index_item_leaves by 1 as we inserted 1 leaf's worth of items.
786 *
787 * However for log replay we may not have inserted an entire leaf's
788 * worth of items, we may have not had continuous items, so decrementing
789 * here would mess up the index_item_leaves accounting. For this case
790 * only clean up the accounting when there are no items left.
791 */
792 if (next && !continuous_keys_only) {
763748b2
FM
793 /*
794 * We inserted one batch of items into a leaf a there are more
795 * items to flush in a future batch, now release one unit of
796 * metadata space from the delayed block reserve, corresponding
797 * the leaf we just flushed to.
798 */
799 btrfs_delayed_item_release_leaves(node, 1);
800 node->index_item_leaves--;
71b68e9e 801 } else if (!next) {
763748b2
FM
802 /*
803 * There are no more items to insert. We can have a number of
804 * reserved leaves > 1 here - this happens when many dir index
805 * items are added and then removed before they are flushed (file
806 * names with a very short life, never span a transaction). So
807 * release all remaining leaves.
808 */
809 btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
810 node->index_item_leaves = 0;
811 }
812
b7ef5f3a 813 list_for_each_entry_safe(curr, next, &item_list, tree_list) {
16cdcec7
MX
814 list_del(&curr->tree_list);
815 btrfs_release_delayed_item(curr);
816 }
16cdcec7 817out:
506650dc 818 kfree(ins_data);
16cdcec7
MX
819 return ret;
820}
821
16cdcec7
MX
822static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
823 struct btrfs_path *path,
824 struct btrfs_root *root,
825 struct btrfs_delayed_node *node)
826{
16cdcec7
MX
827 int ret = 0;
828
506650dc
FM
829 while (ret == 0) {
830 struct btrfs_delayed_item *curr;
16cdcec7 831
506650dc
FM
832 mutex_lock(&node->mutex);
833 curr = __btrfs_first_delayed_insertion_item(node);
834 if (!curr) {
835 mutex_unlock(&node->mutex);
836 break;
837 }
838 ret = btrfs_insert_delayed_item(trans, root, path, curr);
839 mutex_unlock(&node->mutex);
16cdcec7 840 }
16cdcec7 841
16cdcec7
MX
842 return ret;
843}
844
845static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
846 struct btrfs_root *root,
847 struct btrfs_path *path,
848 struct btrfs_delayed_item *item)
849{
96d89923 850 const u64 ino = item->delayed_node->inode_id;
1f4f639f 851 struct btrfs_fs_info *fs_info = root->fs_info;
16cdcec7 852 struct btrfs_delayed_item *curr, *next;
659192e6 853 struct extent_buffer *leaf = path->nodes[0];
4bd02d90
FM
854 LIST_HEAD(batch_list);
855 int nitems, slot, last_slot;
856 int ret;
1f4f639f 857 u64 total_reserved_size = item->bytes_reserved;
16cdcec7 858
659192e6 859 ASSERT(leaf != NULL);
16cdcec7 860
4bd02d90
FM
861 slot = path->slots[0];
862 last_slot = btrfs_header_nritems(leaf) - 1;
659192e6
FM
863 /*
864 * Our caller always gives us a path pointing to an existing item, so
865 * this can not happen.
866 */
4bd02d90
FM
867 ASSERT(slot <= last_slot);
868 if (WARN_ON(slot > last_slot))
659192e6 869 return -ENOENT;
16cdcec7 870
4bd02d90
FM
871 nitems = 1;
872 curr = item;
873 list_add_tail(&curr->tree_list, &batch_list);
874
16cdcec7 875 /*
4bd02d90
FM
876 * Keep checking if the next delayed item matches the next item in the
877 * leaf - if so, we can add it to the batch of items to delete from the
878 * leaf.
16cdcec7 879 */
4bd02d90
FM
880 while (slot < last_slot) {
881 struct btrfs_key key;
16cdcec7 882
16cdcec7
MX
883 next = __btrfs_next_delayed_item(curr);
884 if (!next)
885 break;
886
4bd02d90
FM
887 slot++;
888 btrfs_item_key_to_cpu(leaf, &key, slot);
96d89923
FM
889 if (key.objectid != ino ||
890 key.type != BTRFS_DIR_INDEX_KEY ||
891 key.offset != next->index)
16cdcec7 892 break;
4bd02d90
FM
893 nitems++;
894 curr = next;
895 list_add_tail(&curr->tree_list, &batch_list);
1f4f639f 896 total_reserved_size += curr->bytes_reserved;
16cdcec7
MX
897 }
898
16cdcec7
MX
899 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
900 if (ret)
4bd02d90 901 return ret;
16cdcec7 902
1f4f639f
NB
903 /* In case of BTRFS_FS_LOG_RECOVERING items won't have reserved space */
904 if (total_reserved_size > 0) {
905 /*
906 * Check btrfs_delayed_item_reserve_metadata() to see why we
907 * don't need to release/reserve qgroup space.
908 */
96d89923
FM
909 trace_btrfs_space_reservation(fs_info, "delayed_item", ino,
910 total_reserved_size, 0);
1f4f639f
NB
911 btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv,
912 total_reserved_size, NULL);
913 }
914
4bd02d90 915 list_for_each_entry_safe(curr, next, &batch_list, tree_list) {
16cdcec7
MX
916 list_del(&curr->tree_list);
917 btrfs_release_delayed_item(curr);
918 }
919
4bd02d90 920 return 0;
16cdcec7
MX
921}
922
923static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
924 struct btrfs_path *path,
925 struct btrfs_root *root,
926 struct btrfs_delayed_node *node)
927{
96d89923 928 struct btrfs_key key;
16cdcec7
MX
929 int ret = 0;
930
96d89923
FM
931 key.objectid = node->inode_id;
932 key.type = BTRFS_DIR_INDEX_KEY;
933
36baa2c7
FM
934 while (ret == 0) {
935 struct btrfs_delayed_item *item;
936
937 mutex_lock(&node->mutex);
938 item = __btrfs_first_delayed_deletion_item(node);
939 if (!item) {
940 mutex_unlock(&node->mutex);
941 break;
942 }
943
96d89923
FM
944 key.offset = item->index;
945 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
36baa2c7
FM
946 if (ret > 0) {
947 /*
948 * There's no matching item in the leaf. This means we
949 * have already deleted this item in a past run of the
950 * delayed items. We ignore errors when running delayed
951 * items from an async context, through a work queue job
952 * running btrfs_async_run_delayed_root(), and don't
953 * release delayed items that failed to complete. This
954 * is because we will retry later, and at transaction
955 * commit time we always run delayed items and will
956 * then deal with errors if they fail to run again.
957 *
958 * So just release delayed items for which we can't find
959 * an item in the tree, and move to the next item.
960 */
961 btrfs_release_path(path);
962 btrfs_release_delayed_item(item);
963 ret = 0;
964 } else if (ret == 0) {
965 ret = btrfs_batch_delete_items(trans, root, path, item);
966 btrfs_release_path(path);
967 }
16cdcec7 968
16cdcec7 969 /*
36baa2c7
FM
970 * We unlock and relock on each iteration, this is to prevent
971 * blocking other tasks for too long while we are being run from
972 * the async context (work queue job). Those tasks are typically
973 * running system calls like creat/mkdir/rename/unlink/etc which
974 * need to add delayed items to this delayed node.
16cdcec7 975 */
36baa2c7 976 mutex_unlock(&node->mutex);
16cdcec7
MX
977 }
978
16cdcec7
MX
979 return ret;
980}
981
982static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
983{
984 struct btrfs_delayed_root *delayed_root;
985
7cf35d91
MX
986 if (delayed_node &&
987 test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
be73f444 988 ASSERT(delayed_node->root);
7cf35d91 989 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
16cdcec7
MX
990 delayed_node->count--;
991
992 delayed_root = delayed_node->root->fs_info->delayed_root;
de3cb945 993 finish_one_item(delayed_root);
16cdcec7
MX
994 }
995}
996
67de1176
MX
997static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
998{
67de1176 999
a4cb90dc
JB
1000 if (test_and_clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) {
1001 struct btrfs_delayed_root *delayed_root;
67de1176 1002
a4cb90dc
JB
1003 ASSERT(delayed_node->root);
1004 delayed_node->count--;
1005
1006 delayed_root = delayed_node->root->fs_info->delayed_root;
1007 finish_one_item(delayed_root);
1008 }
67de1176
MX
1009}
1010
0e8c36a9
MX
1011static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1012 struct btrfs_root *root,
1013 struct btrfs_path *path,
1014 struct btrfs_delayed_node *node)
16cdcec7 1015{
2ff7e61e 1016 struct btrfs_fs_info *fs_info = root->fs_info;
16cdcec7
MX
1017 struct btrfs_key key;
1018 struct btrfs_inode_item *inode_item;
1019 struct extent_buffer *leaf;
67de1176 1020 int mod;
16cdcec7
MX
1021 int ret;
1022
16cdcec7 1023 key.objectid = node->inode_id;
962a298f 1024 key.type = BTRFS_INODE_ITEM_KEY;
16cdcec7 1025 key.offset = 0;
0e8c36a9 1026
67de1176
MX
1027 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1028 mod = -1;
1029 else
1030 mod = 1;
1031
1032 ret = btrfs_lookup_inode(trans, root, path, &key, mod);
bb385bed
JB
1033 if (ret > 0)
1034 ret = -ENOENT;
1035 if (ret < 0)
1036 goto out;
16cdcec7 1037
16cdcec7
MX
1038 leaf = path->nodes[0];
1039 inode_item = btrfs_item_ptr(leaf, path->slots[0],
1040 struct btrfs_inode_item);
1041 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1042 sizeof(struct btrfs_inode_item));
50564b65 1043 btrfs_mark_buffer_dirty(trans, leaf);
16cdcec7 1044
67de1176 1045 if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
a4cb90dc 1046 goto out;
67de1176 1047
9ba7c686
QW
1048 /*
1049 * Now we're going to delete the INODE_REF/EXTREF, which should be the
1050 * only one ref left. Check if the next item is an INODE_REF/EXTREF.
1051 *
1052 * But if we're the last item already, release and search for the last
1053 * INODE_REF/EXTREF.
1054 */
1055 if (path->slots[0] + 1 >= btrfs_header_nritems(leaf)) {
1056 key.objectid = node->inode_id;
1057 key.type = BTRFS_INODE_EXTREF_KEY;
1058 key.offset = (u64)-1;
1059
1060 btrfs_release_path(path);
1061 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1062 if (ret < 0)
1063 goto err_out;
1064 ASSERT(ret > 0);
1065 ASSERT(path->slots[0] > 0);
1066 ret = 0;
1067 path->slots[0]--;
1068 leaf = path->nodes[0];
1069 } else {
1070 path->slots[0]++;
1071 }
67de1176
MX
1072 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1073 if (key.objectid != node->inode_id)
1074 goto out;
67de1176
MX
1075 if (key.type != BTRFS_INODE_REF_KEY &&
1076 key.type != BTRFS_INODE_EXTREF_KEY)
1077 goto out;
1078
1079 /*
1080 * Delayed iref deletion is for the inode who has only one link,
1081 * so there is only one iref. The case that several irefs are
1082 * in the same item doesn't exist.
1083 */
c06016a0 1084 ret = btrfs_del_item(trans, root, path);
67de1176
MX
1085out:
1086 btrfs_release_delayed_iref(node);
67de1176
MX
1087 btrfs_release_path(path);
1088err_out:
4f5427cc 1089 btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
16cdcec7 1090 btrfs_release_delayed_inode(node);
16cdcec7 1091
04587ad9
JB
1092 /*
1093 * If we fail to update the delayed inode we need to abort the
1094 * transaction, because we could leave the inode with the improper
1095 * counts behind.
1096 */
1097 if (ret && ret != -ENOENT)
1098 btrfs_abort_transaction(trans, ret);
1099
67de1176 1100 return ret;
16cdcec7
MX
1101}
1102
0e8c36a9
MX
1103static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1104 struct btrfs_root *root,
1105 struct btrfs_path *path,
1106 struct btrfs_delayed_node *node)
1107{
1108 int ret;
1109
1110 mutex_lock(&node->mutex);
7cf35d91 1111 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
0e8c36a9
MX
1112 mutex_unlock(&node->mutex);
1113 return 0;
1114 }
1115
1116 ret = __btrfs_update_delayed_inode(trans, root, path, node);
1117 mutex_unlock(&node->mutex);
1118 return ret;
1119}
1120
4ea41ce0
MX
1121static inline int
1122__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1123 struct btrfs_path *path,
1124 struct btrfs_delayed_node *node)
1125{
1126 int ret;
1127
1128 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1129 if (ret)
1130 return ret;
1131
1132 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1133 if (ret)
1134 return ret;
1135
1136 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1137 return ret;
1138}
1139
79787eaa
JM
1140/*
1141 * Called when committing the transaction.
1142 * Returns 0 on success.
1143 * Returns < 0 on error and returns with an aborted transaction with any
1144 * outstanding delayed items cleaned up.
1145 */
b84acab3 1146static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
16cdcec7 1147{
b84acab3 1148 struct btrfs_fs_info *fs_info = trans->fs_info;
16cdcec7
MX
1149 struct btrfs_delayed_root *delayed_root;
1150 struct btrfs_delayed_node *curr_node, *prev_node;
1151 struct btrfs_path *path;
19fd2949 1152 struct btrfs_block_rsv *block_rsv;
16cdcec7 1153 int ret = 0;
96c3f433 1154 bool count = (nr > 0);
16cdcec7 1155
bf31f87f 1156 if (TRANS_ABORTED(trans))
79787eaa
JM
1157 return -EIO;
1158
16cdcec7
MX
1159 path = btrfs_alloc_path();
1160 if (!path)
1161 return -ENOMEM;
16cdcec7 1162
19fd2949 1163 block_rsv = trans->block_rsv;
0b246afa 1164 trans->block_rsv = &fs_info->delayed_block_rsv;
19fd2949 1165
ccdf9b30 1166 delayed_root = fs_info->delayed_root;
16cdcec7
MX
1167
1168 curr_node = btrfs_first_delayed_node(delayed_root);
a4559e6f 1169 while (curr_node && (!count || nr--)) {
4ea41ce0
MX
1170 ret = __btrfs_commit_inode_delayed_items(trans, path,
1171 curr_node);
16cdcec7 1172 if (ret) {
66642832 1173 btrfs_abort_transaction(trans, ret);
16cdcec7
MX
1174 break;
1175 }
1176
1177 prev_node = curr_node;
1178 curr_node = btrfs_next_delayed_node(curr_node);
e110f891
FM
1179 /*
1180 * See the comment below about releasing path before releasing
1181 * node. If the commit of delayed items was successful the path
1182 * should always be released, but in case of an error, it may
1183 * point to locked extent buffers (a leaf at the very least).
1184 */
1185 ASSERT(path->nodes[0] == NULL);
16cdcec7
MX
1186 btrfs_release_delayed_node(prev_node);
1187 }
1188
e110f891
FM
1189 /*
1190 * Release the path to avoid a potential deadlock and lockdep splat when
1191 * releasing the delayed node, as that requires taking the delayed node's
1192 * mutex. If another task starts running delayed items before we take
1193 * the mutex, it will first lock the mutex and then it may try to lock
1194 * the same btree path (leaf).
1195 */
1196 btrfs_free_path(path);
1197
96c3f433
JB
1198 if (curr_node)
1199 btrfs_release_delayed_node(curr_node);
19fd2949 1200 trans->block_rsv = block_rsv;
79787eaa 1201
16cdcec7
MX
1202 return ret;
1203}
1204
e5c304e6 1205int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
96c3f433 1206{
b84acab3 1207 return __btrfs_run_delayed_items(trans, -1);
96c3f433
JB
1208}
1209
e5c304e6 1210int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
96c3f433 1211{
b84acab3 1212 return __btrfs_run_delayed_items(trans, nr);
96c3f433
JB
1213}
1214
16cdcec7 1215int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
5f4b32e9 1216 struct btrfs_inode *inode)
16cdcec7 1217{
5f4b32e9 1218 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
4ea41ce0
MX
1219 struct btrfs_path *path;
1220 struct btrfs_block_rsv *block_rsv;
16cdcec7
MX
1221 int ret;
1222
1223 if (!delayed_node)
1224 return 0;
1225
1226 mutex_lock(&delayed_node->mutex);
1227 if (!delayed_node->count) {
1228 mutex_unlock(&delayed_node->mutex);
1229 btrfs_release_delayed_node(delayed_node);
1230 return 0;
1231 }
1232 mutex_unlock(&delayed_node->mutex);
1233
4ea41ce0 1234 path = btrfs_alloc_path();
3c77bd94
FDBM
1235 if (!path) {
1236 btrfs_release_delayed_node(delayed_node);
4ea41ce0 1237 return -ENOMEM;
3c77bd94 1238 }
4ea41ce0
MX
1239
1240 block_rsv = trans->block_rsv;
1241 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1242
1243 ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1244
16cdcec7 1245 btrfs_release_delayed_node(delayed_node);
4ea41ce0
MX
1246 btrfs_free_path(path);
1247 trans->block_rsv = block_rsv;
1248
16cdcec7
MX
1249 return ret;
1250}
1251
aa79021f 1252int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
0e8c36a9 1253{
3ffbd68c 1254 struct btrfs_fs_info *fs_info = inode->root->fs_info;
0e8c36a9 1255 struct btrfs_trans_handle *trans;
aa79021f 1256 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
0e8c36a9
MX
1257 struct btrfs_path *path;
1258 struct btrfs_block_rsv *block_rsv;
1259 int ret;
1260
1261 if (!delayed_node)
1262 return 0;
1263
1264 mutex_lock(&delayed_node->mutex);
7cf35d91 1265 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
0e8c36a9
MX
1266 mutex_unlock(&delayed_node->mutex);
1267 btrfs_release_delayed_node(delayed_node);
1268 return 0;
1269 }
1270 mutex_unlock(&delayed_node->mutex);
1271
1272 trans = btrfs_join_transaction(delayed_node->root);
1273 if (IS_ERR(trans)) {
1274 ret = PTR_ERR(trans);
1275 goto out;
1276 }
1277
1278 path = btrfs_alloc_path();
1279 if (!path) {
1280 ret = -ENOMEM;
1281 goto trans_out;
1282 }
0e8c36a9
MX
1283
1284 block_rsv = trans->block_rsv;
2ff7e61e 1285 trans->block_rsv = &fs_info->delayed_block_rsv;
0e8c36a9
MX
1286
1287 mutex_lock(&delayed_node->mutex);
7cf35d91 1288 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
0e8c36a9
MX
1289 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1290 path, delayed_node);
1291 else
1292 ret = 0;
1293 mutex_unlock(&delayed_node->mutex);
1294
1295 btrfs_free_path(path);
1296 trans->block_rsv = block_rsv;
1297trans_out:
3a45bb20 1298 btrfs_end_transaction(trans);
2ff7e61e 1299 btrfs_btree_balance_dirty(fs_info);
0e8c36a9
MX
1300out:
1301 btrfs_release_delayed_node(delayed_node);
1302
1303 return ret;
1304}
1305
f48d1cf5 1306void btrfs_remove_delayed_node(struct btrfs_inode *inode)
16cdcec7
MX
1307{
1308 struct btrfs_delayed_node *delayed_node;
1309
f48d1cf5 1310 delayed_node = READ_ONCE(inode->delayed_node);
16cdcec7
MX
1311 if (!delayed_node)
1312 return;
1313
f48d1cf5 1314 inode->delayed_node = NULL;
16cdcec7
MX
1315 btrfs_release_delayed_node(delayed_node);
1316}
1317
de3cb945
CM
1318struct btrfs_async_delayed_work {
1319 struct btrfs_delayed_root *delayed_root;
1320 int nr;
d458b054 1321 struct btrfs_work work;
16cdcec7
MX
1322};
1323
d458b054 1324static void btrfs_async_run_delayed_root(struct btrfs_work *work)
16cdcec7 1325{
de3cb945
CM
1326 struct btrfs_async_delayed_work *async_work;
1327 struct btrfs_delayed_root *delayed_root;
16cdcec7
MX
1328 struct btrfs_trans_handle *trans;
1329 struct btrfs_path *path;
1330 struct btrfs_delayed_node *delayed_node = NULL;
1331 struct btrfs_root *root;
19fd2949 1332 struct btrfs_block_rsv *block_rsv;
de3cb945 1333 int total_done = 0;
16cdcec7 1334
de3cb945
CM
1335 async_work = container_of(work, struct btrfs_async_delayed_work, work);
1336 delayed_root = async_work->delayed_root;
16cdcec7
MX
1337
1338 path = btrfs_alloc_path();
1339 if (!path)
1340 goto out;
16cdcec7 1341
617c54a8
NB
1342 do {
1343 if (atomic_read(&delayed_root->items) <
1344 BTRFS_DELAYED_BACKGROUND / 2)
1345 break;
de3cb945 1346
617c54a8
NB
1347 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1348 if (!delayed_node)
1349 break;
de3cb945 1350
617c54a8 1351 root = delayed_node->root;
16cdcec7 1352
617c54a8
NB
1353 trans = btrfs_join_transaction(root);
1354 if (IS_ERR(trans)) {
1355 btrfs_release_path(path);
1356 btrfs_release_prepared_delayed_node(delayed_node);
1357 total_done++;
1358 continue;
1359 }
16cdcec7 1360
617c54a8
NB
1361 block_rsv = trans->block_rsv;
1362 trans->block_rsv = &root->fs_info->delayed_block_rsv;
19fd2949 1363
617c54a8 1364 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
16cdcec7 1365
617c54a8
NB
1366 trans->block_rsv = block_rsv;
1367 btrfs_end_transaction(trans);
1368 btrfs_btree_balance_dirty_nodelay(root->fs_info);
de3cb945 1369
617c54a8
NB
1370 btrfs_release_path(path);
1371 btrfs_release_prepared_delayed_node(delayed_node);
1372 total_done++;
de3cb945 1373
617c54a8
NB
1374 } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1375 || total_done < async_work->nr);
de3cb945 1376
16cdcec7
MX
1377 btrfs_free_path(path);
1378out:
de3cb945
CM
1379 wake_up(&delayed_root->wait);
1380 kfree(async_work);
16cdcec7
MX
1381}
1382
de3cb945 1383
16cdcec7 1384static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
a585e948 1385 struct btrfs_fs_info *fs_info, int nr)
16cdcec7 1386{
de3cb945 1387 struct btrfs_async_delayed_work *async_work;
16cdcec7 1388
de3cb945
CM
1389 async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1390 if (!async_work)
16cdcec7 1391 return -ENOMEM;
16cdcec7 1392
de3cb945 1393 async_work->delayed_root = delayed_root;
078b8b90 1394 btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL);
de3cb945 1395 async_work->nr = nr;
16cdcec7 1396
a585e948 1397 btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
16cdcec7
MX
1398 return 0;
1399}
1400
ccdf9b30 1401void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
e999376f 1402{
ccdf9b30 1403 WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
e999376f
CM
1404}
1405
0353808c 1406static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
de3cb945
CM
1407{
1408 int val = atomic_read(&delayed_root->items_seq);
1409
0353808c 1410 if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
de3cb945 1411 return 1;
0353808c
MX
1412
1413 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1414 return 1;
1415
de3cb945
CM
1416 return 0;
1417}
1418
2ff7e61e 1419void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
16cdcec7 1420{
2ff7e61e 1421 struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
16cdcec7 1422
8577787f
NB
1423 if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1424 btrfs_workqueue_normal_congested(fs_info->delayed_workers))
16cdcec7
MX
1425 return;
1426
1427 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
0353808c 1428 int seq;
16cdcec7 1429 int ret;
0353808c
MX
1430
1431 seq = atomic_read(&delayed_root->items_seq);
de3cb945 1432
a585e948 1433 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
16cdcec7
MX
1434 if (ret)
1435 return;
1436
0353808c
MX
1437 wait_event_interruptible(delayed_root->wait,
1438 could_end_wait(delayed_root, seq));
4dd466d3 1439 return;
16cdcec7
MX
1440 }
1441
a585e948 1442 btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
16cdcec7
MX
1443}
1444
2c58c393
FM
1445static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
1446{
1447 struct btrfs_fs_info *fs_info = trans->fs_info;
1448 const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
1449
1450 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1451 return;
1452
1453 /*
1454 * Adding the new dir index item does not require touching another
1455 * leaf, so we can release 1 unit of metadata that was previously
1456 * reserved when starting the transaction. This applies only to
1457 * the case where we had a transaction start and excludes the
1458 * transaction join case (when replaying log trees).
1459 */
1460 trace_btrfs_space_reservation(fs_info, "transaction",
1461 trans->transid, bytes, 0);
1462 btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
1463 ASSERT(trans->bytes_reserved >= bytes);
1464 trans->bytes_reserved -= bytes;
1465}
1466
1467/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
16cdcec7 1468int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
2ff7e61e 1469 const char *name, int name_len,
6f45d185 1470 struct btrfs_inode *dir,
94a48aef 1471 struct btrfs_disk_key *disk_key, u8 flags,
16cdcec7
MX
1472 u64 index)
1473{
763748b2
FM
1474 struct btrfs_fs_info *fs_info = trans->fs_info;
1475 const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info);
16cdcec7
MX
1476 struct btrfs_delayed_node *delayed_node;
1477 struct btrfs_delayed_item *delayed_item;
1478 struct btrfs_dir_item *dir_item;
763748b2
FM
1479 bool reserve_leaf_space;
1480 u32 data_len;
16cdcec7
MX
1481 int ret;
1482
6f45d185 1483 delayed_node = btrfs_get_or_create_delayed_node(dir);
16cdcec7
MX
1484 if (IS_ERR(delayed_node))
1485 return PTR_ERR(delayed_node);
1486
96d89923 1487 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len,
4c469798
FM
1488 delayed_node,
1489 BTRFS_DELAYED_INSERTION_ITEM);
16cdcec7
MX
1490 if (!delayed_item) {
1491 ret = -ENOMEM;
1492 goto release_node;
1493 }
1494
96d89923 1495 delayed_item->index = index;
16cdcec7
MX
1496
1497 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1498 dir_item->location = *disk_key;
3cae210f
QW
1499 btrfs_set_stack_dir_transid(dir_item, trans->transid);
1500 btrfs_set_stack_dir_data_len(dir_item, 0);
1501 btrfs_set_stack_dir_name_len(dir_item, name_len);
94a48aef 1502 btrfs_set_stack_dir_flags(dir_item, flags);
16cdcec7
MX
1503 memcpy((char *)(dir_item + 1), name, name_len);
1504
763748b2 1505 data_len = delayed_item->data_len + sizeof(struct btrfs_item);
8c2a3ca2 1506
16cdcec7 1507 mutex_lock(&delayed_node->mutex);
763748b2 1508
2c58c393
FM
1509 /*
1510 * First attempt to insert the delayed item. This is to make the error
1511 * handling path simpler in case we fail (-EEXIST). There's no risk of
1512 * any other task coming in and running the delayed item before we do
1513 * the metadata space reservation below, because we are holding the
1514 * delayed node's mutex and that mutex must also be locked before the
1515 * node's delayed items can be run.
1516 */
1517 ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
1518 if (unlikely(ret)) {
1519 btrfs_err(trans->fs_info,
1520"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
1521 name_len, name, index, btrfs_root_id(delayed_node->root),
1522 delayed_node->inode_id, dir->index_cnt,
1523 delayed_node->index_cnt, ret);
1524 btrfs_release_delayed_item(delayed_item);
1525 btrfs_release_dir_index_item_space(trans);
1526 mutex_unlock(&delayed_node->mutex);
1527 goto release_node;
1528 }
1529
763748b2
FM
1530 if (delayed_node->index_item_leaves == 0 ||
1531 delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
1532 delayed_node->curr_index_batch_size = data_len;
1533 reserve_leaf_space = true;
1534 } else {
1535 delayed_node->curr_index_batch_size += data_len;
1536 reserve_leaf_space = false;
1537 }
1538
1539 if (reserve_leaf_space) {
df492881 1540 ret = btrfs_delayed_item_reserve_metadata(trans, delayed_item);
763748b2
FM
1541 /*
1542 * Space was reserved for a dir index item insertion when we
1543 * started the transaction, so getting a failure here should be
1544 * impossible.
1545 */
1546 if (WARN_ON(ret)) {
763748b2 1547 btrfs_release_delayed_item(delayed_item);
2c58c393 1548 mutex_unlock(&delayed_node->mutex);
763748b2
FM
1549 goto release_node;
1550 }
1551
1552 delayed_node->index_item_leaves++;
2c58c393
FM
1553 } else {
1554 btrfs_release_dir_index_item_space(trans);
16cdcec7
MX
1555 }
1556 mutex_unlock(&delayed_node->mutex);
1557
1558release_node:
1559 btrfs_release_delayed_node(delayed_node);
1560 return ret;
1561}
1562
2ff7e61e 1563static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
16cdcec7 1564 struct btrfs_delayed_node *node,
96d89923 1565 u64 index)
16cdcec7
MX
1566{
1567 struct btrfs_delayed_item *item;
1568
1569 mutex_lock(&node->mutex);
4cbf37f5 1570 item = __btrfs_lookup_delayed_item(&node->ins_root.rb_root, index);
16cdcec7
MX
1571 if (!item) {
1572 mutex_unlock(&node->mutex);
1573 return 1;
1574 }
1575
763748b2
FM
1576 /*
1577 * For delayed items to insert, we track reserved metadata bytes based
1578 * on the number of leaves that we will use.
1579 * See btrfs_insert_delayed_dir_index() and
1580 * btrfs_delayed_item_reserve_metadata()).
1581 */
1582 ASSERT(item->bytes_reserved == 0);
1583 ASSERT(node->index_item_leaves > 0);
1584
1585 /*
1586 * If there's only one leaf reserved, we can decrement this item from the
1587 * current batch, otherwise we can not because we don't know which leaf
1588 * it belongs to. With the current limit on delayed items, we rarely
1589 * accumulate enough dir index items to fill more than one leaf (even
1590 * when using a leaf size of 4K).
1591 */
1592 if (node->index_item_leaves == 1) {
1593 const u32 data_len = item->data_len + sizeof(struct btrfs_item);
1594
1595 ASSERT(node->curr_index_batch_size >= data_len);
1596 node->curr_index_batch_size -= data_len;
1597 }
1598
16cdcec7 1599 btrfs_release_delayed_item(item);
763748b2
FM
1600
1601 /* If we now have no more dir index items, we can release all leaves. */
1602 if (RB_EMPTY_ROOT(&node->ins_root.rb_root)) {
1603 btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
1604 node->index_item_leaves = 0;
1605 }
1606
16cdcec7
MX
1607 mutex_unlock(&node->mutex);
1608 return 0;
1609}
1610
1611int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
e67bbbb9 1612 struct btrfs_inode *dir, u64 index)
16cdcec7
MX
1613{
1614 struct btrfs_delayed_node *node;
1615 struct btrfs_delayed_item *item;
16cdcec7
MX
1616 int ret;
1617
e67bbbb9 1618 node = btrfs_get_or_create_delayed_node(dir);
16cdcec7
MX
1619 if (IS_ERR(node))
1620 return PTR_ERR(node);
1621
96d89923 1622 ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node, index);
16cdcec7
MX
1623 if (!ret)
1624 goto end;
1625
4c469798 1626 item = btrfs_alloc_delayed_item(0, node, BTRFS_DELAYED_DELETION_ITEM);
16cdcec7
MX
1627 if (!item) {
1628 ret = -ENOMEM;
1629 goto end;
1630 }
1631
96d89923 1632 item->index = index;
16cdcec7 1633
df492881 1634 ret = btrfs_delayed_item_reserve_metadata(trans, item);
16cdcec7
MX
1635 /*
1636 * we have reserved enough space when we start a new transaction,
1637 * so reserving metadata failure is impossible.
1638 */
933c22a7
QW
1639 if (ret < 0) {
1640 btrfs_err(trans->fs_info,
1641"metadata reservation failed for delayed dir item deltiona, should have been reserved");
1642 btrfs_release_delayed_item(item);
1643 goto end;
1644 }
16cdcec7
MX
1645
1646 mutex_lock(&node->mutex);
c9d02ab4 1647 ret = __btrfs_add_delayed_item(node, item);
16cdcec7 1648 if (unlikely(ret)) {
9add2945 1649 btrfs_err(trans->fs_info,
5d163e0e 1650 "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
4fd786e6
MT
1651 index, node->root->root_key.objectid,
1652 node->inode_id, ret);
933c22a7
QW
1653 btrfs_delayed_item_release_metadata(dir->root, item);
1654 btrfs_release_delayed_item(item);
16cdcec7
MX
1655 }
1656 mutex_unlock(&node->mutex);
1657end:
1658 btrfs_release_delayed_node(node);
1659 return ret;
1660}
1661
f5cc7b80 1662int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
16cdcec7 1663{
f5cc7b80 1664 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
16cdcec7
MX
1665
1666 if (!delayed_node)
1667 return -ENOENT;
1668
1669 /*
1670 * Since we have held i_mutex of this directory, it is impossible that
1671 * a new directory index is added into the delayed node and index_cnt
1672 * is updated now. So we needn't lock the delayed node.
1673 */
2f7e33d4
MX
1674 if (!delayed_node->index_cnt) {
1675 btrfs_release_delayed_node(delayed_node);
16cdcec7 1676 return -EINVAL;
2f7e33d4 1677 }
16cdcec7 1678
f5cc7b80 1679 inode->index_cnt = delayed_node->index_cnt;
2f7e33d4
MX
1680 btrfs_release_delayed_node(delayed_node);
1681 return 0;
16cdcec7
MX
1682}
1683
02dbfc99 1684bool btrfs_readdir_get_delayed_items(struct inode *inode,
9b378f6a 1685 u64 last_index,
02dbfc99
OS
1686 struct list_head *ins_list,
1687 struct list_head *del_list)
16cdcec7
MX
1688{
1689 struct btrfs_delayed_node *delayed_node;
1690 struct btrfs_delayed_item *item;
1691
340c6ca9 1692 delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
16cdcec7 1693 if (!delayed_node)
02dbfc99
OS
1694 return false;
1695
1696 /*
1697 * We can only do one readdir with delayed items at a time because of
1698 * item->readdir_list.
1699 */
e5d4d75b 1700 btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
29b6352b 1701 btrfs_inode_lock(BTRFS_I(inode), 0);
16cdcec7
MX
1702
1703 mutex_lock(&delayed_node->mutex);
1704 item = __btrfs_first_delayed_insertion_item(delayed_node);
9b378f6a 1705 while (item && item->index <= last_index) {
089e77e1 1706 refcount_inc(&item->refs);
16cdcec7
MX
1707 list_add_tail(&item->readdir_list, ins_list);
1708 item = __btrfs_next_delayed_item(item);
1709 }
1710
1711 item = __btrfs_first_delayed_deletion_item(delayed_node);
9b378f6a 1712 while (item && item->index <= last_index) {
089e77e1 1713 refcount_inc(&item->refs);
16cdcec7
MX
1714 list_add_tail(&item->readdir_list, del_list);
1715 item = __btrfs_next_delayed_item(item);
1716 }
1717 mutex_unlock(&delayed_node->mutex);
1718 /*
1719 * This delayed node is still cached in the btrfs inode, so refs
1720 * must be > 1 now, and we needn't check it is going to be freed
1721 * or not.
1722 *
1723 * Besides that, this function is used to read dir, we do not
1724 * insert/delete delayed items in this period. So we also needn't
1725 * requeue or dequeue this delayed node.
1726 */
6de5f18e 1727 refcount_dec(&delayed_node->refs);
02dbfc99
OS
1728
1729 return true;
16cdcec7
MX
1730}
1731
02dbfc99
OS
1732void btrfs_readdir_put_delayed_items(struct inode *inode,
1733 struct list_head *ins_list,
1734 struct list_head *del_list)
16cdcec7
MX
1735{
1736 struct btrfs_delayed_item *curr, *next;
1737
1738 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1739 list_del(&curr->readdir_list);
089e77e1 1740 if (refcount_dec_and_test(&curr->refs))
16cdcec7
MX
1741 kfree(curr);
1742 }
1743
1744 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1745 list_del(&curr->readdir_list);
089e77e1 1746 if (refcount_dec_and_test(&curr->refs))
16cdcec7
MX
1747 kfree(curr);
1748 }
02dbfc99
OS
1749
1750 /*
1751 * The VFS is going to do up_read(), so we need to downgrade back to a
1752 * read lock.
1753 */
1754 downgrade_write(&inode->i_rwsem);
16cdcec7
MX
1755}
1756
1757int btrfs_should_delete_dir_index(struct list_head *del_list,
1758 u64 index)
1759{
e4fd493c
JB
1760 struct btrfs_delayed_item *curr;
1761 int ret = 0;
16cdcec7 1762
e4fd493c 1763 list_for_each_entry(curr, del_list, readdir_list) {
96d89923 1764 if (curr->index > index)
16cdcec7 1765 break;
96d89923 1766 if (curr->index == index) {
e4fd493c
JB
1767 ret = 1;
1768 break;
1769 }
16cdcec7 1770 }
e4fd493c 1771 return ret;
16cdcec7
MX
1772}
1773
1774/*
9580503b 1775 * Read dir info stored in the delayed tree.
16cdcec7 1776 */
9cdda8d3 1777int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
d2fbb2b5 1778 struct list_head *ins_list)
16cdcec7
MX
1779{
1780 struct btrfs_dir_item *di;
1781 struct btrfs_delayed_item *curr, *next;
1782 struct btrfs_key location;
1783 char *name;
1784 int name_len;
1785 int over = 0;
1786 unsigned char d_type;
1787
16cdcec7
MX
1788 /*
1789 * Changing the data of the delayed item is impossible. So
1790 * we needn't lock them. And we have held i_mutex of the
1791 * directory, nobody can delete any directory indexes now.
1792 */
1793 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1794 list_del(&curr->readdir_list);
1795
96d89923 1796 if (curr->index < ctx->pos) {
089e77e1 1797 if (refcount_dec_and_test(&curr->refs))
16cdcec7
MX
1798 kfree(curr);
1799 continue;
1800 }
1801
96d89923 1802 ctx->pos = curr->index;
16cdcec7
MX
1803
1804 di = (struct btrfs_dir_item *)curr->data;
1805 name = (char *)(di + 1);
3cae210f 1806 name_len = btrfs_stack_dir_name_len(di);
16cdcec7 1807
94a48aef 1808 d_type = fs_ftype_to_dtype(btrfs_dir_flags_to_ftype(di->type));
16cdcec7
MX
1809 btrfs_disk_key_to_cpu(&location, &di->location);
1810
9cdda8d3 1811 over = !dir_emit(ctx, name, name_len,
16cdcec7
MX
1812 location.objectid, d_type);
1813
089e77e1 1814 if (refcount_dec_and_test(&curr->refs))
16cdcec7
MX
1815 kfree(curr);
1816
1817 if (over)
1818 return 1;
42e9cc46 1819 ctx->pos++;
16cdcec7
MX
1820 }
1821 return 0;
1822}
1823
16cdcec7
MX
1824static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1825 struct btrfs_inode_item *inode_item,
1826 struct inode *inode)
1827{
77eea05e
BB
1828 u64 flags;
1829
2f2f43d3
EB
1830 btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1831 btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
16cdcec7
MX
1832 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1833 btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1834 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1835 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1836 btrfs_set_stack_inode_generation(inode_item,
1837 BTRFS_I(inode)->generation);
c7f88c4e
JL
1838 btrfs_set_stack_inode_sequence(inode_item,
1839 inode_peek_iversion(inode));
16cdcec7
MX
1840 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1841 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
77eea05e
BB
1842 flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
1843 BTRFS_I(inode)->ro_flags);
1844 btrfs_set_stack_inode_flags(inode_item, flags);
ff5714cc 1845 btrfs_set_stack_inode_block_group(inode_item, 0);
16cdcec7 1846
a937b979 1847 btrfs_set_stack_timespec_sec(&inode_item->atime,
b1c38a13 1848 inode_get_atime_sec(inode));
a937b979 1849 btrfs_set_stack_timespec_nsec(&inode_item->atime,
b1c38a13 1850 inode_get_atime_nsec(inode));
16cdcec7 1851
a937b979 1852 btrfs_set_stack_timespec_sec(&inode_item->mtime,
b1c38a13 1853 inode_get_mtime_sec(inode));
a937b979 1854 btrfs_set_stack_timespec_nsec(&inode_item->mtime,
b1c38a13 1855 inode_get_mtime_nsec(inode));
16cdcec7 1856
a937b979 1857 btrfs_set_stack_timespec_sec(&inode_item->ctime,
b1c38a13 1858 inode_get_ctime_sec(inode));
a937b979 1859 btrfs_set_stack_timespec_nsec(&inode_item->ctime,
b1c38a13 1860 inode_get_ctime_nsec(inode));
9cc97d64 1861
c6e8f898
DS
1862 btrfs_set_stack_timespec_sec(&inode_item->otime, BTRFS_I(inode)->i_otime_sec);
1863 btrfs_set_stack_timespec_nsec(&inode_item->otime, BTRFS_I(inode)->i_otime_nsec);
16cdcec7
MX
1864}
1865
2f7e33d4
MX
1866int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1867{
9ddc959e 1868 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2f7e33d4
MX
1869 struct btrfs_delayed_node *delayed_node;
1870 struct btrfs_inode_item *inode_item;
2f7e33d4 1871
340c6ca9 1872 delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
2f7e33d4
MX
1873 if (!delayed_node)
1874 return -ENOENT;
1875
1876 mutex_lock(&delayed_node->mutex);
7cf35d91 1877 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
2f7e33d4
MX
1878 mutex_unlock(&delayed_node->mutex);
1879 btrfs_release_delayed_node(delayed_node);
1880 return -ENOENT;
1881 }
1882
1883 inode_item = &delayed_node->inode_item;
1884
2f2f43d3
EB
1885 i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1886 i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
6ef06d27 1887 btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
9ddc959e
JB
1888 btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
1889 round_up(i_size_read(inode), fs_info->sectorsize));
2f7e33d4 1890 inode->i_mode = btrfs_stack_inode_mode(inode_item);
bfe86848 1891 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
2f7e33d4
MX
1892 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1893 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
6e17d30b
YD
1894 BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1895
c7f88c4e
JL
1896 inode_set_iversion_queried(inode,
1897 btrfs_stack_inode_sequence(inode_item));
2f7e33d4
MX
1898 inode->i_rdev = 0;
1899 *rdev = btrfs_stack_inode_rdev(inode_item);
77eea05e
BB
1900 btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item),
1901 &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
2f7e33d4 1902
b1c38a13
JL
1903 inode_set_atime(inode, btrfs_stack_timespec_sec(&inode_item->atime),
1904 btrfs_stack_timespec_nsec(&inode_item->atime));
2f7e33d4 1905
b1c38a13
JL
1906 inode_set_mtime(inode, btrfs_stack_timespec_sec(&inode_item->mtime),
1907 btrfs_stack_timespec_nsec(&inode_item->mtime));
2f7e33d4 1908
2a9462de
JL
1909 inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime),
1910 btrfs_stack_timespec_nsec(&inode_item->ctime));
2f7e33d4 1911
c6e8f898
DS
1912 BTRFS_I(inode)->i_otime_sec = btrfs_stack_timespec_sec(&inode_item->otime);
1913 BTRFS_I(inode)->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime);
9cc97d64 1914
2f7e33d4
MX
1915 inode->i_generation = BTRFS_I(inode)->generation;
1916 BTRFS_I(inode)->index_cnt = (u64)-1;
1917
1918 mutex_unlock(&delayed_node->mutex);
1919 btrfs_release_delayed_node(delayed_node);
1920 return 0;
1921}
1922
16cdcec7 1923int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
f3fbcaef 1924 struct btrfs_inode *inode)
16cdcec7 1925{
04bd8e94 1926 struct btrfs_root *root = inode->root;
16cdcec7 1927 struct btrfs_delayed_node *delayed_node;
aa0467d8 1928 int ret = 0;
16cdcec7 1929
f3fbcaef 1930 delayed_node = btrfs_get_or_create_delayed_node(inode);
16cdcec7
MX
1931 if (IS_ERR(delayed_node))
1932 return PTR_ERR(delayed_node);
1933
1934 mutex_lock(&delayed_node->mutex);
7cf35d91 1935 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
f3fbcaef
NB
1936 fill_stack_inode_item(trans, &delayed_node->inode_item,
1937 &inode->vfs_inode);
16cdcec7
MX
1938 goto release_node;
1939 }
1940
8e3c9d3c 1941 ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
c06a0e12
JB
1942 if (ret)
1943 goto release_node;
16cdcec7 1944
f3fbcaef 1945 fill_stack_inode_item(trans, &delayed_node->inode_item, &inode->vfs_inode);
7cf35d91 1946 set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
16cdcec7
MX
1947 delayed_node->count++;
1948 atomic_inc(&root->fs_info->delayed_root->items);
1949release_node:
1950 mutex_unlock(&delayed_node->mutex);
1951 btrfs_release_delayed_node(delayed_node);
1952 return ret;
1953}
1954
e07222c7 1955int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
67de1176 1956{
3ffbd68c 1957 struct btrfs_fs_info *fs_info = inode->root->fs_info;
67de1176
MX
1958 struct btrfs_delayed_node *delayed_node;
1959
6f896054
CM
1960 /*
1961 * we don't do delayed inode updates during log recovery because it
1962 * leads to enospc problems. This means we also can't do
1963 * delayed inode refs
1964 */
0b246afa 1965 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
6f896054
CM
1966 return -EAGAIN;
1967
e07222c7 1968 delayed_node = btrfs_get_or_create_delayed_node(inode);
67de1176
MX
1969 if (IS_ERR(delayed_node))
1970 return PTR_ERR(delayed_node);
1971
1972 /*
1973 * We don't reserve space for inode ref deletion is because:
1974 * - We ONLY do async inode ref deletion for the inode who has only
1975 * one link(i_nlink == 1), it means there is only one inode ref.
1976 * And in most case, the inode ref and the inode item are in the
1977 * same leaf, and we will deal with them at the same time.
1978 * Since we are sure we will reserve the space for the inode item,
1979 * it is unnecessary to reserve space for inode ref deletion.
1980 * - If the inode ref and the inode item are not in the same leaf,
1981 * We also needn't worry about enospc problem, because we reserve
1982 * much more space for the inode update than it needs.
1983 * - At the worst, we can steal some space from the global reservation.
1984 * It is very rare.
1985 */
1986 mutex_lock(&delayed_node->mutex);
1987 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1988 goto release_node;
1989
1990 set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1991 delayed_node->count++;
0b246afa 1992 atomic_inc(&fs_info->delayed_root->items);
67de1176
MX
1993release_node:
1994 mutex_unlock(&delayed_node->mutex);
1995 btrfs_release_delayed_node(delayed_node);
1996 return 0;
1997}
1998
16cdcec7
MX
1999static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
2000{
2001 struct btrfs_root *root = delayed_node->root;
2ff7e61e 2002 struct btrfs_fs_info *fs_info = root->fs_info;
16cdcec7
MX
2003 struct btrfs_delayed_item *curr_item, *prev_item;
2004
2005 mutex_lock(&delayed_node->mutex);
2006 curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
2007 while (curr_item) {
16cdcec7
MX
2008 prev_item = curr_item;
2009 curr_item = __btrfs_next_delayed_item(prev_item);
2010 btrfs_release_delayed_item(prev_item);
2011 }
2012
763748b2
FM
2013 if (delayed_node->index_item_leaves > 0) {
2014 btrfs_delayed_item_release_leaves(delayed_node,
2015 delayed_node->index_item_leaves);
2016 delayed_node->index_item_leaves = 0;
2017 }
2018
16cdcec7
MX
2019 curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
2020 while (curr_item) {
4f5427cc 2021 btrfs_delayed_item_release_metadata(root, curr_item);
16cdcec7
MX
2022 prev_item = curr_item;
2023 curr_item = __btrfs_next_delayed_item(prev_item);
2024 btrfs_release_delayed_item(prev_item);
2025 }
2026
a4cb90dc 2027 btrfs_release_delayed_iref(delayed_node);
67de1176 2028
7cf35d91 2029 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
4f5427cc 2030 btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
16cdcec7
MX
2031 btrfs_release_delayed_inode(delayed_node);
2032 }
2033 mutex_unlock(&delayed_node->mutex);
2034}
2035
4ccb5c72 2036void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
16cdcec7
MX
2037{
2038 struct btrfs_delayed_node *delayed_node;
2039
4ccb5c72 2040 delayed_node = btrfs_get_delayed_node(inode);
16cdcec7
MX
2041 if (!delayed_node)
2042 return;
2043
2044 __btrfs_kill_delayed_node(delayed_node);
2045 btrfs_release_delayed_node(delayed_node);
2046}
2047
2048void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
2049{
6140ba8a 2050 unsigned long index = 0;
16cdcec7 2051 struct btrfs_delayed_node *delayed_nodes[8];
16cdcec7
MX
2052
2053 while (1) {
6140ba8a
DS
2054 struct btrfs_delayed_node *node;
2055 int count;
2056
16cdcec7 2057 spin_lock(&root->inode_lock);
6140ba8a 2058 if (xa_empty(&root->delayed_nodes)) {
16cdcec7 2059 spin_unlock(&root->inode_lock);
6140ba8a 2060 return;
16cdcec7
MX
2061 }
2062
6140ba8a
DS
2063 count = 0;
2064 xa_for_each_start(&root->delayed_nodes, index, node, index) {
baf320b9
JB
2065 /*
2066 * Don't increase refs in case the node is dead and
2067 * about to be removed from the tree in the loop below
2068 */
6140ba8a
DS
2069 if (refcount_inc_not_zero(&node->refs)) {
2070 delayed_nodes[count] = node;
2071 count++;
2072 }
2073 if (count >= ARRAY_SIZE(delayed_nodes))
2074 break;
baf320b9 2075 }
16cdcec7 2076 spin_unlock(&root->inode_lock);
6140ba8a 2077 index++;
16cdcec7 2078
6140ba8a 2079 for (int i = 0; i < count; i++) {
16cdcec7
MX
2080 __btrfs_kill_delayed_node(delayed_nodes[i]);
2081 btrfs_release_delayed_node(delayed_nodes[i]);
2082 }
2083 }
2084}
67cde344 2085
ccdf9b30 2086void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
67cde344 2087{
67cde344
MX
2088 struct btrfs_delayed_node *curr_node, *prev_node;
2089
ccdf9b30 2090 curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
67cde344
MX
2091 while (curr_node) {
2092 __btrfs_kill_delayed_node(curr_node);
2093
2094 prev_node = curr_node;
2095 curr_node = btrfs_next_delayed_node(curr_node);
2096 btrfs_release_delayed_node(prev_node);
2097 }
2098}
2099
30b80f3c
FM
2100void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
2101 struct list_head *ins_list,
2102 struct list_head *del_list)
2103{
2104 struct btrfs_delayed_node *node;
2105 struct btrfs_delayed_item *item;
2106
2107 node = btrfs_get_delayed_node(inode);
2108 if (!node)
2109 return;
2110
2111 mutex_lock(&node->mutex);
2112 item = __btrfs_first_delayed_insertion_item(node);
2113 while (item) {
2114 /*
2115 * It's possible that the item is already in a log list. This
2116 * can happen in case two tasks are trying to log the same
2117 * directory. For example if we have tasks A and task B:
2118 *
2119 * Task A collected the delayed items into a log list while
2120 * under the inode's log_mutex (at btrfs_log_inode()), but it
2121 * only releases the items after logging the inodes they point
2122 * to (if they are new inodes), which happens after unlocking
2123 * the log mutex;
2124 *
2125 * Task B enters btrfs_log_inode() and acquires the log_mutex
2126 * of the same directory inode, before task B releases the
2127 * delayed items. This can happen for example when logging some
2128 * inode we need to trigger logging of its parent directory, so
2129 * logging two files that have the same parent directory can
2130 * lead to this.
2131 *
2132 * If this happens, just ignore delayed items already in a log
2133 * list. All the tasks logging the directory are under a log
2134 * transaction and whichever finishes first can not sync the log
2135 * before the other completes and leaves the log transaction.
2136 */
2137 if (!item->logged && list_empty(&item->log_list)) {
2138 refcount_inc(&item->refs);
2139 list_add_tail(&item->log_list, ins_list);
2140 }
2141 item = __btrfs_next_delayed_item(item);
2142 }
2143
2144 item = __btrfs_first_delayed_deletion_item(node);
2145 while (item) {
2146 /* It may be non-empty, for the same reason mentioned above. */
2147 if (!item->logged && list_empty(&item->log_list)) {
2148 refcount_inc(&item->refs);
2149 list_add_tail(&item->log_list, del_list);
2150 }
2151 item = __btrfs_next_delayed_item(item);
2152 }
2153 mutex_unlock(&node->mutex);
2154
2155 /*
2156 * We are called during inode logging, which means the inode is in use
2157 * and can not be evicted before we finish logging the inode. So we never
2158 * have the last reference on the delayed inode.
2159 * Also, we don't use btrfs_release_delayed_node() because that would
2160 * requeue the delayed inode (change its order in the list of prepared
2161 * nodes) and we don't want to do such change because we don't create or
2162 * delete delayed items.
2163 */
2164 ASSERT(refcount_read(&node->refs) > 1);
2165 refcount_dec(&node->refs);
2166}
2167
2168void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
2169 struct list_head *ins_list,
2170 struct list_head *del_list)
2171{
2172 struct btrfs_delayed_node *node;
2173 struct btrfs_delayed_item *item;
2174 struct btrfs_delayed_item *next;
2175
2176 node = btrfs_get_delayed_node(inode);
2177 if (!node)
2178 return;
2179
2180 mutex_lock(&node->mutex);
2181
2182 list_for_each_entry_safe(item, next, ins_list, log_list) {
2183 item->logged = true;
2184 list_del_init(&item->log_list);
2185 if (refcount_dec_and_test(&item->refs))
2186 kfree(item);
2187 }
2188
2189 list_for_each_entry_safe(item, next, del_list, log_list) {
2190 item->logged = true;
2191 list_del_init(&item->log_list);
2192 if (refcount_dec_and_test(&item->refs))
2193 kfree(item);
2194 }
2195
2196 mutex_unlock(&node->mutex);
2197
2198 /*
2199 * We are called during inode logging, which means the inode is in use
2200 * and can not be evicted before we finish logging the inode. So we never
2201 * have the last reference on the delayed inode.
2202 * Also, we don't use btrfs_release_delayed_node() because that would
2203 * requeue the delayed inode (change its order in the list of prepared
2204 * nodes) and we don't want to do such change because we don't create or
2205 * delete delayed items.
2206 */
2207 ASSERT(refcount_read(&node->refs) > 1);
2208 refcount_dec(&node->refs);
2209}