]> git.ipfire.org Git - thirdparty/linux.git/blame - fs/btrfs/extent-io-tree.c
btrfs: make sure we cache next state in find_first_extent_bit()
[thirdparty/linux.git] / fs / btrfs / extent-io-tree.c
CommitLineData
83cf709a
JB
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/slab.h>
4#include <trace/events/btrfs.h>
9b569ea0 5#include "messages.h"
83cf709a
JB
6#include "ctree.h"
7#include "extent-io-tree.h"
6962541e 8#include "btrfs_inode.h"
38830018 9#include "misc.h"
83cf709a
JB
10
11static struct kmem_cache *extent_state_cache;
12
a4055213
JB
13static inline bool extent_state_in_tree(const struct extent_state *state)
14{
15 return !RB_EMPTY_NODE(&state->rb_node);
16}
17
83cf709a
JB
18#ifdef CONFIG_BTRFS_DEBUG
19static LIST_HEAD(states);
20static DEFINE_SPINLOCK(leak_lock);
21
22static inline void btrfs_leak_debug_add_state(struct extent_state *state)
23{
24 unsigned long flags;
25
26 spin_lock_irqsave(&leak_lock, flags);
27 list_add(&state->leak_list, &states);
28 spin_unlock_irqrestore(&leak_lock, flags);
29}
30
31static inline void btrfs_leak_debug_del_state(struct extent_state *state)
32{
33 unsigned long flags;
34
35 spin_lock_irqsave(&leak_lock, flags);
36 list_del(&state->leak_list);
37 spin_unlock_irqrestore(&leak_lock, flags);
38}
39
40static inline void btrfs_extent_state_leak_debug_check(void)
41{
42 struct extent_state *state;
43
44 while (!list_empty(&states)) {
45 state = list_entry(states.next, struct extent_state, leak_list);
46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
47 state->start, state->end, state->state,
48 extent_state_in_tree(state),
49 refcount_read(&state->refs));
50 list_del(&state->leak_list);
51 kmem_cache_free(extent_state_cache, state);
52 }
53}
6962541e 54
d8038a1f
JB
55#define btrfs_debug_check_extent_io_range(tree, start, end) \
56 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
57static inline void __btrfs_debug_check_extent_io_range(const char *caller,
58 struct extent_io_tree *tree,
59 u64 start, u64 end)
6962541e 60{
0988fc7b 61 struct btrfs_inode *inode = tree->inode;
6962541e
JB
62 u64 isize;
63
23408d81 64 if (!inode)
6962541e
JB
65 return;
66
0988fc7b 67 isize = i_size_read(&inode->vfs_inode);
6962541e 68 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
0988fc7b 69 btrfs_debug_rl(inode->root->fs_info,
6962541e 70 "%s: ino %llu isize %llu odd range [%llu,%llu]",
0988fc7b 71 caller, btrfs_ino(inode), isize, start, end);
6962541e
JB
72 }
73}
83cf709a
JB
74#else
75#define btrfs_leak_debug_add_state(state) do {} while (0)
76#define btrfs_leak_debug_del_state(state) do {} while (0)
77#define btrfs_extent_state_leak_debug_check() do {} while (0)
d8038a1f 78#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
83cf709a
JB
79#endif
80
81/*
82 * For the file_extent_tree, we want to hold the inode lock when we lookup and
83 * update the disk_i_size, but lockdep will complain because our io_tree we hold
84 * the tree lock and get the inode lock when setting delalloc. These two things
85 * are unrelated, so make a class for the file_extent_tree so we don't get the
86 * two locking patterns mixed up.
87 */
88static struct lock_class_key file_extent_tree_class;
89
a4055213
JB
90struct tree_entry {
91 u64 start;
92 u64 end;
93 struct rb_node rb_node;
94};
95
83cf709a 96void extent_io_tree_init(struct btrfs_fs_info *fs_info,
35da5a7e 97 struct extent_io_tree *tree, unsigned int owner)
83cf709a
JB
98{
99 tree->fs_info = fs_info;
100 tree->state = RB_ROOT;
83cf709a 101 spin_lock_init(&tree->lock);
0988fc7b 102 tree->inode = NULL;
83cf709a
JB
103 tree->owner = owner;
104 if (owner == IO_TREE_INODE_FILE_EXTENT)
105 lockdep_set_class(&tree->lock, &file_extent_tree_class);
106}
107
bea22a58
FM
108/*
109 * Empty an io tree, removing and freeing every extent state record from the
110 * tree. This should be called once we are sure no other task can access the
111 * tree anymore, so no tree updates happen after we empty the tree and there
112 * aren't any waiters on any extent state record (EXTENT_LOCKED bit is never
113 * set on any extent state when calling this function).
114 */
83cf709a
JB
115void extent_io_tree_release(struct extent_io_tree *tree)
116{
63ffc1f7
FM
117 struct rb_root root;
118 struct extent_state *state;
119 struct extent_state *tmp;
83cf709a 120
63ffc1f7
FM
121 spin_lock(&tree->lock);
122 root = tree->state;
123 tree->state = RB_ROOT;
124 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) {
125 /* Clear node to keep free_extent_state() happy. */
83cf709a 126 RB_CLEAR_NODE(&state->rb_node);
bea22a58 127 ASSERT(!(state->state & EXTENT_LOCKED));
28967c76
FM
128 /*
129 * No need for a memory barrier here, as we are holding the tree
130 * lock and we only change the waitqueue while holding that lock
df2a8e70 131 * (see wait_extent_bit()).
28967c76 132 */
83cf709a
JB
133 ASSERT(!waitqueue_active(&state->wq));
134 free_extent_state(state);
83cf709a
JB
135 cond_resched_lock(&tree->lock);
136 }
63ffc1f7
FM
137 /*
138 * Should still be empty even after a reschedule, no other task should
139 * be accessing the tree anymore.
140 */
141 ASSERT(RB_EMPTY_ROOT(&tree->state));
83cf709a
JB
142 spin_unlock(&tree->lock);
143}
144
a4055213 145static struct extent_state *alloc_extent_state(gfp_t mask)
83cf709a
JB
146{
147 struct extent_state *state;
148
149 /*
150 * The given mask might be not appropriate for the slab allocator,
151 * drop the unsupported bits
152 */
153 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
154 state = kmem_cache_alloc(extent_state_cache, mask);
155 if (!state)
156 return state;
157 state->state = 0;
158 RB_CLEAR_NODE(&state->rb_node);
159 btrfs_leak_debug_add_state(state);
160 refcount_set(&state->refs, 1);
161 init_waitqueue_head(&state->wq);
162 trace_alloc_extent_state(state, mask, _RET_IP_);
163 return state;
164}
165
a4055213 166static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc)
83cf709a
JB
167{
168 if (!prealloc)
169 prealloc = alloc_extent_state(GFP_ATOMIC);
170
171 return prealloc;
172}
173
174void free_extent_state(struct extent_state *state)
175{
176 if (!state)
177 return;
178 if (refcount_dec_and_test(&state->refs)) {
179 WARN_ON(extent_state_in_tree(state));
180 btrfs_leak_debug_del_state(state);
181 trace_free_extent_state(state, _RET_IP_);
182 kmem_cache_free(extent_state_cache, state);
183 }
184}
185
04eba893
JB
186static int add_extent_changeset(struct extent_state *state, u32 bits,
187 struct extent_changeset *changeset,
188 int set)
189{
190 int ret;
191
192 if (!changeset)
193 return 0;
194 if (set && (state->state & bits) == bits)
195 return 0;
196 if (!set && (state->state & bits) == 0)
197 return 0;
198 changeset->bytes_changed += state->end - state->start + 1;
199 ret = ulist_add(&changeset->range_changed, state->start, state->end,
200 GFP_ATOMIC);
201 return ret;
202}
203
a4055213
JB
204static inline struct extent_state *next_state(struct extent_state *state)
205{
206 struct rb_node *next = rb_next(&state->rb_node);
207
208 if (next)
209 return rb_entry(next, struct extent_state, rb_node);
210 else
211 return NULL;
212}
213
43b068ca
JB
214static inline struct extent_state *prev_state(struct extent_state *state)
215{
216 struct rb_node *next = rb_prev(&state->rb_node);
217
218 if (next)
219 return rb_entry(next, struct extent_state, rb_node);
220 else
221 return NULL;
222}
223
91af24e4
JB
224/*
225 * Search @tree for an entry that contains @offset. Such entry would have
226 * entry->start <= offset && entry->end >= offset.
227 *
228 * @tree: the tree to search
229 * @offset: offset that should fall within an entry in @tree
230 * @node_ret: pointer where new node should be anchored (used when inserting an
231 * entry in the tree)
232 * @parent_ret: points to entry which would have been the parent of the entry,
233 * containing @offset
234 *
235 * Return a pointer to the entry that contains @offset byte address and don't change
236 * @node_ret and @parent_ret.
237 *
238 * If no such entry exists, return pointer to entry that ends before @offset
239 * and fill parameters @node_ret and @parent_ret, ie. does not return NULL.
240 */
e349fd3b
JB
241static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
242 u64 offset,
243 struct rb_node ***node_ret,
244 struct rb_node **parent_ret)
91af24e4
JB
245{
246 struct rb_root *root = &tree->state;
247 struct rb_node **node = &root->rb_node;
248 struct rb_node *prev = NULL;
e349fd3b 249 struct extent_state *entry = NULL;
91af24e4
JB
250
251 while (*node) {
252 prev = *node;
071d19f5 253 entry = rb_entry(prev, struct extent_state, rb_node);
91af24e4
JB
254
255 if (offset < entry->start)
256 node = &(*node)->rb_left;
257 else if (offset > entry->end)
258 node = &(*node)->rb_right;
259 else
e349fd3b 260 return entry;
91af24e4
JB
261 }
262
263 if (node_ret)
264 *node_ret = node;
265 if (parent_ret)
266 *parent_ret = prev;
267
268 /* Search neighbors until we find the first one past the end */
e349fd3b
JB
269 while (entry && offset > entry->end)
270 entry = next_state(entry);
91af24e4 271
e349fd3b 272 return entry;
91af24e4
JB
273}
274
275/*
276 * Search offset in the tree or fill neighbor rbtree node pointers.
277 *
278 * @tree: the tree to search
279 * @offset: offset that should fall within an entry in @tree
280 * @next_ret: pointer to the first entry whose range ends after @offset
281 * @prev_ret: pointer to the first entry whose range begins before @offset
282 *
283 * Return a pointer to the entry that contains @offset byte address. If no
284 * such entry exists, then return NULL and fill @prev_ret and @next_ret.
285 * Otherwise return the found entry and other pointers are left untouched.
286 */
43b068ca
JB
287static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree,
288 u64 offset,
289 struct extent_state **prev_ret,
290 struct extent_state **next_ret)
91af24e4
JB
291{
292 struct rb_root *root = &tree->state;
293 struct rb_node **node = &root->rb_node;
43b068ca
JB
294 struct extent_state *orig_prev;
295 struct extent_state *entry = NULL;
91af24e4
JB
296
297 ASSERT(prev_ret);
298 ASSERT(next_ret);
299
300 while (*node) {
43b068ca 301 entry = rb_entry(*node, struct extent_state, rb_node);
91af24e4
JB
302
303 if (offset < entry->start)
304 node = &(*node)->rb_left;
305 else if (offset > entry->end)
306 node = &(*node)->rb_right;
307 else
43b068ca 308 return entry;
91af24e4
JB
309 }
310
43b068ca
JB
311 orig_prev = entry;
312 while (entry && offset > entry->end)
313 entry = next_state(entry);
314 *next_ret = entry;
315 entry = orig_prev;
91af24e4 316
43b068ca
JB
317 while (entry && offset < entry->start)
318 entry = prev_state(entry);
319 *prev_ret = entry;
91af24e4
JB
320
321 return NULL;
322}
323
e3974c66
JB
324/*
325 * Inexact rb-tree search, return the next entry if @offset is not found
326 */
aa852dab 327static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset)
e3974c66 328{
e349fd3b 329 return tree_search_for_insert(tree, offset, NULL, NULL);
e3974c66
JB
330}
331
332static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
333{
334 btrfs_panic(tree->fs_info, err,
335 "locking error: extent tree was modified by another thread while locked");
336}
337
c91ea4bf
FM
338static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state)
339{
340 struct extent_state *prev;
341
342 prev = prev_state(state);
343 if (prev && prev->end == state->start - 1 && prev->state == state->state) {
344 if (tree->inode)
345 btrfs_merge_delalloc_extent(tree->inode, state, prev);
346 state->start = prev->start;
347 rb_erase(&prev->rb_node, &tree->state);
348 RB_CLEAR_NODE(&prev->rb_node);
349 free_extent_state(prev);
350 }
351}
352
353static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state)
354{
355 struct extent_state *next;
356
357 next = next_state(state);
358 if (next && next->start == state->end + 1 && next->state == state->state) {
359 if (tree->inode)
360 btrfs_merge_delalloc_extent(tree->inode, state, next);
361 state->end = next->end;
362 rb_erase(&next->rb_node, &tree->state);
363 RB_CLEAR_NODE(&next->rb_node);
364 free_extent_state(next);
365 }
366}
367
04eba893
JB
368/*
369 * Utility function to look for merge candidates inside a given range. Any
370 * extents with matching state are merged together into a single extent in the
371 * tree. Extents with EXTENT_IO in their state field are not merged because
372 * the end_io handlers need to be able to do operations on them without
373 * sleeping (or doing allocations/splits).
374 *
375 * This should be called with the tree lock held.
376 */
a4055213 377static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
04eba893 378{
04eba893
JB
379 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
380 return;
381
c91ea4bf
FM
382 merge_prev_state(tree, state);
383 merge_next_state(tree, state);
04eba893
JB
384}
385
a4055213
JB
386static void set_state_bits(struct extent_io_tree *tree,
387 struct extent_state *state,
388 u32 bits, struct extent_changeset *changeset)
04eba893
JB
389{
390 u32 bits_to_set = bits & ~EXTENT_CTLBITS;
391 int ret;
392
0988fc7b 393 if (tree->inode)
4c5d166f 394 btrfs_set_delalloc_extent(tree->inode, state, bits);
04eba893 395
04eba893
JB
396 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
397 BUG_ON(ret < 0);
398 state->state |= bits_to_set;
399}
400
401/*
402 * Insert an extent_state struct into the tree. 'bits' are set on the
403 * struct before it is inserted.
404 *
c91ea4bf
FM
405 * Returns a pointer to the struct extent_state record containing the range
406 * requested for insertion, which may be the same as the given struct or it
407 * may be an existing record in the tree that was expanded to accommodate the
408 * requested range. In case of an extent_state different from the one that was
409 * given, the later can be freed or reused by the caller.
410 *
411 * On error it returns an error pointer.
04eba893
JB
412 *
413 * The tree lock is not taken internally. This is a utility function and
414 * probably isn't what you want to call (see set/clear_extent_bit).
415 */
c91ea4bf
FM
416static struct extent_state *insert_state(struct extent_io_tree *tree,
417 struct extent_state *state,
418 u32 bits,
419 struct extent_changeset *changeset)
04eba893
JB
420{
421 struct rb_node **node;
d7c9e1be 422 struct rb_node *parent = NULL;
c91ea4bf
FM
423 const u64 start = state->start - 1;
424 const u64 end = state->end + 1;
425 const bool try_merge = !(bits & (EXTENT_LOCKED | EXTENT_BOUNDARY));
04eba893
JB
426
427 set_state_bits(tree, state, bits, changeset);
428
429 node = &tree->state.rb_node;
430 while (*node) {
071d19f5 431 struct extent_state *entry;
04eba893
JB
432
433 parent = *node;
071d19f5 434 entry = rb_entry(parent, struct extent_state, rb_node);
04eba893 435
c91ea4bf
FM
436 if (state->end < entry->start) {
437 if (try_merge && end == entry->start &&
438 state->state == entry->state) {
439 if (tree->inode)
440 btrfs_merge_delalloc_extent(tree->inode,
441 state, entry);
442 entry->start = state->start;
443 merge_prev_state(tree, entry);
444 state->state = 0;
445 return entry;
446 }
04eba893 447 node = &(*node)->rb_left;
c91ea4bf
FM
448 } else if (state->end > entry->end) {
449 if (try_merge && entry->end == start &&
450 state->state == entry->state) {
451 if (tree->inode)
452 btrfs_merge_delalloc_extent(tree->inode,
453 state, entry);
454 entry->end = state->end;
455 merge_next_state(tree, entry);
456 state->state = 0;
457 return entry;
458 }
04eba893
JB
459 node = &(*node)->rb_right;
460 } else {
461 btrfs_err(tree->fs_info,
462 "found node %llu %llu on insert of %llu %llu",
c91ea4bf
FM
463 entry->start, entry->end, state->start, state->end);
464 return ERR_PTR(-EEXIST);
04eba893
JB
465 }
466 }
467
468 rb_link_node(&state->rb_node, parent, node);
469 rb_insert_color(&state->rb_node, &tree->state);
470
c91ea4bf 471 return state;
04eba893
JB
472}
473
474/*
475 * Insert state to @tree to the location given by @node and @parent.
476 */
a4055213
JB
477static void insert_state_fast(struct extent_io_tree *tree,
478 struct extent_state *state, struct rb_node **node,
479 struct rb_node *parent, unsigned bits,
480 struct extent_changeset *changeset)
04eba893
JB
481{
482 set_state_bits(tree, state, bits, changeset);
483 rb_link_node(&state->rb_node, parent, node);
484 rb_insert_color(&state->rb_node, &tree->state);
485 merge_state(tree, state);
486}
487
488/*
489 * Split a given extent state struct in two, inserting the preallocated
490 * struct 'prealloc' as the newly created second half. 'split' indicates an
491 * offset inside 'orig' where it should be split.
492 *
493 * Before calling,
494 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
495 * are two extent state structs in the tree:
496 * prealloc: [orig->start, split - 1]
497 * orig: [ split, orig->end ]
498 *
499 * The tree locks are not taken by this function. They need to be held
500 * by the caller.
501 */
a4055213
JB
502static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
503 struct extent_state *prealloc, u64 split)
04eba893
JB
504{
505 struct rb_node *parent = NULL;
506 struct rb_node **node;
507
0988fc7b 508 if (tree->inode)
62798a49 509 btrfs_split_delalloc_extent(tree->inode, orig, split);
04eba893
JB
510
511 prealloc->start = orig->start;
512 prealloc->end = split - 1;
513 prealloc->state = orig->state;
514 orig->start = split;
515
516 parent = &orig->rb_node;
517 node = &parent;
518 while (*node) {
071d19f5 519 struct extent_state *entry;
04eba893
JB
520
521 parent = *node;
071d19f5 522 entry = rb_entry(parent, struct extent_state, rb_node);
04eba893
JB
523
524 if (prealloc->end < entry->start) {
525 node = &(*node)->rb_left;
526 } else if (prealloc->end > entry->end) {
527 node = &(*node)->rb_right;
528 } else {
529 free_extent_state(prealloc);
530 return -EEXIST;
531 }
532 }
533
534 rb_link_node(&prealloc->rb_node, parent, node);
535 rb_insert_color(&prealloc->rb_node, &tree->state);
536
537 return 0;
538}
539
540/*
541 * Utility function to clear some bits in an extent state struct. It will
542 * optionally wake up anyone waiting on this state (wake == 1).
543 *
544 * If no bits are set on the state struct after clearing things, the
545 * struct is freed and removed from the tree
546 */
a4055213
JB
547static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
548 struct extent_state *state,
549 u32 bits, int wake,
550 struct extent_changeset *changeset)
04eba893
JB
551{
552 struct extent_state *next;
553 u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
554 int ret;
555
0988fc7b 556 if (tree->inode)
bd54766e 557 btrfs_clear_delalloc_extent(tree->inode, state, bits);
04eba893
JB
558
559 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
560 BUG_ON(ret < 0);
561 state->state &= ~bits_to_clear;
562 if (wake)
563 wake_up(&state->wq);
564 if (state->state == 0) {
565 next = next_state(state);
566 if (extent_state_in_tree(state)) {
567 rb_erase(&state->rb_node, &tree->state);
568 RB_CLEAR_NODE(&state->rb_node);
569 free_extent_state(state);
570 } else {
571 WARN_ON(1);
572 }
573 } else {
574 merge_state(tree, state);
575 next = next_state(state);
576 }
577 return next;
578}
579
62bc6047
DS
580/*
581 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
582 * unset the EXTENT_NOWAIT bit.
583 */
584static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
585{
586 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
587 *bits &= EXTENT_NOWAIT - 1;
588}
589
e3974c66
JB
590/*
591 * Clear some bits on a range in the tree. This may require splitting or
592 * inserting elements in the tree, so the gfp mask is used to indicate which
593 * allocations or sleeping are allowed.
594 *
595 * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given
596 * range from the tree regardless of state (ie for truncate).
597 *
598 * The range [start, end] is inclusive.
599 *
600 * This takes the tree lock, and returns 0 on success and < 0 on error.
601 */
602int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
bd015294 603 u32 bits, struct extent_state **cached_state,
1d126800 604 struct extent_changeset *changeset)
e3974c66
JB
605{
606 struct extent_state *state;
607 struct extent_state *cached;
608 struct extent_state *prealloc = NULL;
e3974c66
JB
609 u64 last_end;
610 int err;
611 int clear = 0;
bd015294
JB
612 int wake;
613 int delete = (bits & EXTENT_CLEAR_ALL_BITS);
1d126800 614 gfp_t mask;
e3974c66 615
62bc6047 616 set_gfp_mask_from_bits(&bits, &mask);
e3974c66
JB
617 btrfs_debug_check_extent_io_range(tree, start, end);
618 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
619
e3974c66
JB
620 if (delete)
621 bits |= ~EXTENT_CTLBITS;
622
bd015294
JB
623 if (bits & EXTENT_DELALLOC)
624 bits |= EXTENT_NORESERVE;
625
626 wake = (bits & EXTENT_LOCKED) ? 1 : 0;
e3974c66
JB
627 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
628 clear = 1;
629again:
5a75034e 630 if (!prealloc) {
e3974c66
JB
631 /*
632 * Don't care for allocation failure here because we might end
633 * up not needing the pre-allocated extent state at all, which
634 * is the case if we only have in the tree extent states that
635 * cover our input range and don't cover too any other range.
636 * If we end up needing a new extent state we allocate it later.
637 */
638 prealloc = alloc_extent_state(mask);
639 }
640
641 spin_lock(&tree->lock);
642 if (cached_state) {
643 cached = *cached_state;
644
645 if (clear) {
646 *cached_state = NULL;
647 cached_state = NULL;
648 }
649
650 if (cached && extent_state_in_tree(cached) &&
651 cached->start <= start && cached->end > start) {
652 if (clear)
653 refcount_dec(&cached->refs);
654 state = cached;
655 goto hit_next;
656 }
657 if (clear)
658 free_extent_state(cached);
659 }
660
661 /* This search will find the extents that end after our range starts. */
aa852dab
JB
662 state = tree_search(tree, start);
663 if (!state)
e3974c66 664 goto out;
e3974c66
JB
665hit_next:
666 if (state->start > end)
667 goto out;
668 WARN_ON(state->end < start);
669 last_end = state->end;
670
671 /* The state doesn't have the wanted bits, go ahead. */
672 if (!(state->state & bits)) {
673 state = next_state(state);
674 goto next;
675 }
676
677 /*
678 * | ---- desired range ---- |
679 * | state | or
680 * | ------------- state -------------- |
681 *
682 * We need to split the extent we found, and may flip bits on second
683 * half.
684 *
685 * If the extent we found extends past our range, we just split and
686 * search again. It'll get split again the next time though.
687 *
688 * If the extent we found is inside our range, we clear the desired bit
689 * on it.
690 */
691
692 if (state->start < start) {
693 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
694 if (!prealloc)
695 goto search_again;
e3974c66
JB
696 err = split_state(tree, state, prealloc, start);
697 if (err)
698 extent_io_tree_panic(tree, err);
699
700 prealloc = NULL;
701 if (err)
702 goto out;
703 if (state->end <= end) {
704 state = clear_state_bit(tree, state, bits, wake, changeset);
705 goto next;
706 }
707 goto search_again;
708 }
709 /*
710 * | ---- desired range ---- |
711 * | state |
712 * We need to split the extent, and clear the bit on the first half.
713 */
714 if (state->start <= end && state->end > end) {
715 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
716 if (!prealloc)
717 goto search_again;
e3974c66
JB
718 err = split_state(tree, state, prealloc, end + 1);
719 if (err)
720 extent_io_tree_panic(tree, err);
721
722 if (wake)
723 wake_up(&state->wq);
724
725 clear_state_bit(tree, prealloc, bits, wake, changeset);
726
727 prealloc = NULL;
728 goto out;
729 }
730
731 state = clear_state_bit(tree, state, bits, wake, changeset);
732next:
733 if (last_end == (u64)-1)
734 goto out;
735 start = last_end + 1;
736 if (start <= end && state && !need_resched())
737 goto hit_next;
738
739search_again:
740 if (start > end)
741 goto out;
742 spin_unlock(&tree->lock);
743 if (gfpflags_allow_blocking(mask))
744 cond_resched();
745 goto again;
746
747out:
748 spin_unlock(&tree->lock);
749 if (prealloc)
750 free_extent_state(prealloc);
751
752 return 0;
753
754}
755
e3974c66
JB
756/*
757 * Wait for one or more bits to clear on a range in the state tree.
758 * The range [start, end] is inclusive.
759 * The tree lock is taken by this function
760 */
a1c20d15
FM
761static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
762 u32 bits, struct extent_state **cached_state)
e3974c66
JB
763{
764 struct extent_state *state;
e3974c66
JB
765
766 btrfs_debug_check_extent_io_range(tree, start, end);
767
768 spin_lock(&tree->lock);
769again:
123a7f00
JB
770 /*
771 * Maintain cached_state, as we may not remove it from the tree if there
772 * are more bits than the bits we're waiting on set on this state.
773 */
774 if (cached_state && *cached_state) {
775 state = *cached_state;
776 if (extent_state_in_tree(state) &&
777 state->start <= start && start < state->end)
778 goto process_node;
779 }
e3974c66
JB
780 while (1) {
781 /*
782 * This search will find all the extents that end after our
783 * range starts.
784 */
aa852dab 785 state = tree_search(tree, start);
ccaeff92 786process_node:
aa852dab
JB
787 if (!state)
788 break;
e3974c66
JB
789 if (state->start > end)
790 goto out;
791
792 if (state->state & bits) {
df2a8e70
FM
793 DEFINE_WAIT(wait);
794
e3974c66
JB
795 start = state->start;
796 refcount_inc(&state->refs);
df2a8e70
FM
797 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
798 spin_unlock(&tree->lock);
799 schedule();
800 spin_lock(&tree->lock);
801 finish_wait(&state->wq, &wait);
e3974c66
JB
802 free_extent_state(state);
803 goto again;
804 }
805 start = state->end + 1;
806
807 if (start > end)
808 break;
809
810 if (!cond_resched_lock(&tree->lock)) {
ccaeff92 811 state = next_state(state);
e3974c66
JB
812 goto process_node;
813 }
814 }
815out:
123a7f00
JB
816 /* This state is no longer useful, clear it and free it up. */
817 if (cached_state && *cached_state) {
818 state = *cached_state;
819 *cached_state = NULL;
820 free_extent_state(state);
821 }
e3974c66
JB
822 spin_unlock(&tree->lock);
823}
824
825static void cache_state_if_flags(struct extent_state *state,
826 struct extent_state **cached_ptr,
827 unsigned flags)
828{
829 if (cached_ptr && !(*cached_ptr)) {
830 if (!flags || (state->state & flags)) {
831 *cached_ptr = state;
832 refcount_inc(&state->refs);
833 }
834 }
835}
836
837static void cache_state(struct extent_state *state,
838 struct extent_state **cached_ptr)
839{
840 return cache_state_if_flags(state, cached_ptr,
841 EXTENT_LOCKED | EXTENT_BOUNDARY);
842}
843
844/*
845 * Find the first state struct with 'bits' set after 'start', and return it.
846 * tree->lock must be held. NULL will returned if nothing was found after
847 * 'start'.
848 */
849static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
850 u64 start, u32 bits)
851{
e3974c66
JB
852 struct extent_state *state;
853
854 /*
855 * This search will find all the extents that end after our range
856 * starts.
857 */
aa852dab 858 state = tree_search(tree, start);
ccaeff92 859 while (state) {
e3974c66
JB
860 if (state->end >= start && (state->state & bits))
861 return state;
ccaeff92 862 state = next_state(state);
e3974c66 863 }
e3974c66
JB
864 return NULL;
865}
866
867/*
868 * Find the first offset in the io tree with one or more @bits set.
869 *
870 * Note: If there are multiple bits set in @bits, any of them will match.
871 *
e5860f82
FM
872 * Return true if we find something, and update @start_ret and @end_ret.
873 * Return false if we found nothing.
e3974c66 874 */
e5860f82
FM
875bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
876 u64 *start_ret, u64 *end_ret, u32 bits,
877 struct extent_state **cached_state)
e3974c66
JB
878{
879 struct extent_state *state;
e5860f82 880 bool ret = false;
e3974c66
JB
881
882 spin_lock(&tree->lock);
883 if (cached_state && *cached_state) {
884 state = *cached_state;
885 if (state->end == start - 1 && extent_state_in_tree(state)) {
886 while ((state = next_state(state)) != NULL) {
887 if (state->state & bits)
efba1454 888 break;
e3974c66 889 }
efba1454
FM
890 /*
891 * If we found the next extent state, clear cached_state
892 * so that we can cache the next extent state below and
893 * avoid future calls going over the same extent state
894 * again. If we haven't found any, clear as well since
895 * it's now useless.
896 */
e3974c66
JB
897 free_extent_state(*cached_state);
898 *cached_state = NULL;
efba1454
FM
899 if (state)
900 goto got_it;
e3974c66
JB
901 goto out;
902 }
903 free_extent_state(*cached_state);
904 *cached_state = NULL;
905 }
906
907 state = find_first_extent_bit_state(tree, start, bits);
908got_it:
909 if (state) {
910 cache_state_if_flags(state, cached_state, 0);
911 *start_ret = state->start;
912 *end_ret = state->end;
e5860f82 913 ret = true;
e3974c66
JB
914 }
915out:
916 spin_unlock(&tree->lock);
917 return ret;
918}
919
920/*
921 * Find a contiguous area of bits
922 *
923 * @tree: io tree to check
924 * @start: offset to start the search from
925 * @start_ret: the first offset we found with the bits set
926 * @end_ret: the final contiguous range of the bits that were set
927 * @bits: bits to look for
928 *
929 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
930 * to set bits appropriately, and then merge them again. During this time it
931 * will drop the tree->lock, so use this helper if you want to find the actual
932 * contiguous area for given bits. We will search to the first bit we find, and
933 * then walk down the tree until we find a non-contiguous area. The area
934 * returned will be the full contiguous area with the bits set.
935 */
936int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
937 u64 *start_ret, u64 *end_ret, u32 bits)
938{
939 struct extent_state *state;
940 int ret = 1;
941
942 spin_lock(&tree->lock);
943 state = find_first_extent_bit_state(tree, start, bits);
944 if (state) {
945 *start_ret = state->start;
946 *end_ret = state->end;
947 while ((state = next_state(state)) != NULL) {
948 if (state->start > (*end_ret + 1))
949 break;
950 *end_ret = state->end;
951 }
952 ret = 0;
953 }
954 spin_unlock(&tree->lock);
955 return ret;
956}
957
958/*
959 * Find a contiguous range of bytes in the file marked as delalloc, not more
960 * than 'max_bytes'. start and end are used to return the range,
961 *
962 * True is returned if we find something, false if nothing was in the tree.
963 */
964bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
965 u64 *end, u64 max_bytes,
966 struct extent_state **cached_state)
967{
e3974c66
JB
968 struct extent_state *state;
969 u64 cur_start = *start;
970 bool found = false;
971 u64 total_bytes = 0;
972
973 spin_lock(&tree->lock);
974
975 /*
976 * This search will find all the extents that end after our range
977 * starts.
978 */
aa852dab
JB
979 state = tree_search(tree, cur_start);
980 if (!state) {
e3974c66
JB
981 *end = (u64)-1;
982 goto out;
983 }
984
ccaeff92 985 while (state) {
e3974c66
JB
986 if (found && (state->start != cur_start ||
987 (state->state & EXTENT_BOUNDARY))) {
988 goto out;
989 }
990 if (!(state->state & EXTENT_DELALLOC)) {
991 if (!found)
992 *end = state->end;
993 goto out;
994 }
995 if (!found) {
996 *start = state->start;
997 *cached_state = state;
998 refcount_inc(&state->refs);
999 }
1000 found = true;
1001 *end = state->end;
1002 cur_start = state->end + 1;
e3974c66
JB
1003 total_bytes += state->end - state->start + 1;
1004 if (total_bytes >= max_bytes)
1005 break;
ccaeff92 1006 state = next_state(state);
e3974c66
JB
1007 }
1008out:
1009 spin_unlock(&tree->lock);
1010 return found;
1011}
1012
1013/*
1014 * Set some bits on a range in the tree. This may require allocations or
1d126800
DS
1015 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
1016 * GFP_NOWAIT.
e3974c66
JB
1017 *
1018 * If any of the exclusive bits are set, this will fail with -EEXIST if some
123a7f00
JB
1019 * part of the range already has the desired bits set. The extent_state of the
1020 * existing range is returned in failed_state in this case, and the start of the
1021 * existing range is returned in failed_start. failed_state is used as an
1022 * optimization for wait_extent_bit, failed_start must be used as the source of
1023 * truth as failed_state may have changed since we returned.
e3974c66
JB
1024 *
1025 * [start, end] is inclusive This takes the tree lock.
1026 */
994bcd1e
JB
1027static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1028 u32 bits, u64 *failed_start,
123a7f00 1029 struct extent_state **failed_state,
994bcd1e 1030 struct extent_state **cached_state,
1d126800 1031 struct extent_changeset *changeset)
e3974c66
JB
1032{
1033 struct extent_state *state;
1034 struct extent_state *prealloc = NULL;
59864325
JB
1035 struct rb_node **p = NULL;
1036 struct rb_node *parent = NULL;
e3974c66
JB
1037 int err = 0;
1038 u64 last_start;
1039 u64 last_end;
c07d1004 1040 u32 exclusive_bits = (bits & EXTENT_LOCKED);
1d126800 1041 gfp_t mask;
e3974c66 1042
62bc6047 1043 set_gfp_mask_from_bits(&bits, &mask);
e3974c66
JB
1044 btrfs_debug_check_extent_io_range(tree, start, end);
1045 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
1046
1047 if (exclusive_bits)
1048 ASSERT(failed_start);
1049 else
123a7f00 1050 ASSERT(failed_start == NULL && failed_state == NULL);
e3974c66 1051again:
5a75034e 1052 if (!prealloc) {
e3974c66
JB
1053 /*
1054 * Don't care for allocation failure here because we might end
1055 * up not needing the pre-allocated extent state at all, which
1056 * is the case if we only have in the tree extent states that
1057 * cover our input range and don't cover too any other range.
1058 * If we end up needing a new extent state we allocate it later.
1059 */
1060 prealloc = alloc_extent_state(mask);
1061 }
1062
1063 spin_lock(&tree->lock);
1064 if (cached_state && *cached_state) {
1065 state = *cached_state;
1066 if (state->start <= start && state->end > start &&
e349fd3b 1067 extent_state_in_tree(state))
e3974c66 1068 goto hit_next;
e3974c66
JB
1069 }
1070 /*
1071 * This search will find all the extents that end after our range
1072 * starts.
1073 */
e349fd3b
JB
1074 state = tree_search_for_insert(tree, start, &p, &parent);
1075 if (!state) {
e3974c66 1076 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
1077 if (!prealloc)
1078 goto search_again;
e3974c66
JB
1079 prealloc->start = start;
1080 prealloc->end = end;
1081 insert_state_fast(tree, prealloc, p, parent, bits, changeset);
1082 cache_state(prealloc, cached_state);
1083 prealloc = NULL;
1084 goto out;
1085 }
e3974c66
JB
1086hit_next:
1087 last_start = state->start;
1088 last_end = state->end;
1089
1090 /*
1091 * | ---- desired range ---- |
1092 * | state |
1093 *
1094 * Just lock what we found and keep going
1095 */
1096 if (state->start == start && state->end <= end) {
1097 if (state->state & exclusive_bits) {
1098 *failed_start = state->start;
123a7f00 1099 cache_state(state, failed_state);
e3974c66
JB
1100 err = -EEXIST;
1101 goto out;
1102 }
1103
1104 set_state_bits(tree, state, bits, changeset);
1105 cache_state(state, cached_state);
1106 merge_state(tree, state);
1107 if (last_end == (u64)-1)
1108 goto out;
1109 start = last_end + 1;
1110 state = next_state(state);
1111 if (start < end && state && state->start == start &&
1112 !need_resched())
1113 goto hit_next;
1114 goto search_again;
1115 }
1116
1117 /*
1118 * | ---- desired range ---- |
1119 * | state |
1120 * or
1121 * | ------------- state -------------- |
1122 *
1123 * We need to split the extent we found, and may flip bits on second
1124 * half.
1125 *
1126 * If the extent we found extends past our range, we just split and
1127 * search again. It'll get split again the next time though.
1128 *
1129 * If the extent we found is inside our range, we set the desired bit
1130 * on it.
1131 */
1132 if (state->start < start) {
1133 if (state->state & exclusive_bits) {
1134 *failed_start = start;
123a7f00 1135 cache_state(state, failed_state);
e3974c66
JB
1136 err = -EEXIST;
1137 goto out;
1138 }
1139
1140 /*
1141 * If this extent already has all the bits we want set, then
1142 * skip it, not necessary to split it or do anything with it.
1143 */
1144 if ((state->state & bits) == bits) {
1145 start = state->end + 1;
1146 cache_state(state, cached_state);
1147 goto search_again;
1148 }
1149
1150 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
1151 if (!prealloc)
1152 goto search_again;
e3974c66
JB
1153 err = split_state(tree, state, prealloc, start);
1154 if (err)
1155 extent_io_tree_panic(tree, err);
1156
1157 prealloc = NULL;
1158 if (err)
1159 goto out;
1160 if (state->end <= end) {
1161 set_state_bits(tree, state, bits, changeset);
1162 cache_state(state, cached_state);
1163 merge_state(tree, state);
1164 if (last_end == (u64)-1)
1165 goto out;
1166 start = last_end + 1;
1167 state = next_state(state);
1168 if (start < end && state && state->start == start &&
1169 !need_resched())
1170 goto hit_next;
1171 }
1172 goto search_again;
1173 }
1174 /*
1175 * | ---- desired range ---- |
1176 * | state | or | state |
1177 *
1178 * There's a hole, we need to insert something in it and ignore the
1179 * extent we found.
1180 */
1181 if (state->start > start) {
1182 u64 this_end;
c91ea4bf
FM
1183 struct extent_state *inserted_state;
1184
e3974c66
JB
1185 if (end < last_start)
1186 this_end = end;
1187 else
1188 this_end = last_start - 1;
1189
1190 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
1191 if (!prealloc)
1192 goto search_again;
e3974c66
JB
1193
1194 /*
1195 * Avoid to free 'prealloc' if it can be merged with the later
1196 * extent.
1197 */
1198 prealloc->start = start;
1199 prealloc->end = this_end;
c91ea4bf
FM
1200 inserted_state = insert_state(tree, prealloc, bits, changeset);
1201 if (IS_ERR(inserted_state)) {
1202 err = PTR_ERR(inserted_state);
e3974c66 1203 extent_io_tree_panic(tree, err);
c91ea4bf 1204 }
e3974c66 1205
c91ea4bf
FM
1206 cache_state(inserted_state, cached_state);
1207 if (inserted_state == prealloc)
1208 prealloc = NULL;
e3974c66
JB
1209 start = this_end + 1;
1210 goto search_again;
1211 }
1212 /*
1213 * | ---- desired range ---- |
1214 * | state |
1215 *
1216 * We need to split the extent, and set the bit on the first half
1217 */
1218 if (state->start <= end && state->end > end) {
1219 if (state->state & exclusive_bits) {
1220 *failed_start = start;
123a7f00 1221 cache_state(state, failed_state);
e3974c66
JB
1222 err = -EEXIST;
1223 goto out;
1224 }
1225
1226 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
1227 if (!prealloc)
1228 goto search_again;
e3974c66
JB
1229 err = split_state(tree, state, prealloc, end + 1);
1230 if (err)
1231 extent_io_tree_panic(tree, err);
1232
1233 set_state_bits(tree, prealloc, bits, changeset);
1234 cache_state(prealloc, cached_state);
1235 merge_state(tree, prealloc);
1236 prealloc = NULL;
1237 goto out;
1238 }
1239
1240search_again:
1241 if (start > end)
1242 goto out;
1243 spin_unlock(&tree->lock);
1244 if (gfpflags_allow_blocking(mask))
1245 cond_resched();
1246 goto again;
1247
1248out:
1249 spin_unlock(&tree->lock);
1250 if (prealloc)
1251 free_extent_state(prealloc);
1252
1253 return err;
1254
1255}
1256
994bcd1e 1257int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1d126800 1258 u32 bits, struct extent_state **cached_state)
994bcd1e 1259{
123a7f00 1260 return __set_extent_bit(tree, start, end, bits, NULL, NULL,
1d126800 1261 cached_state, NULL);
994bcd1e
JB
1262}
1263
e3974c66
JB
1264/*
1265 * Convert all bits in a given range from one bit to another
1266 *
1267 * @tree: the io tree to search
1268 * @start: the start offset in bytes
1269 * @end: the end offset in bytes (inclusive)
1270 * @bits: the bits to set in this range
1271 * @clear_bits: the bits to clear in this range
1272 * @cached_state: state that we're going to cache
1273 *
1274 * This will go through and set bits for the given range. If any states exist
1275 * already in this range they are set with the given bit and cleared of the
1276 * clear_bits. This is only meant to be used by things that are mergeable, ie.
1277 * converting from say DELALLOC to DIRTY. This is not meant to be used with
1278 * boundary bits like LOCK.
1279 *
1280 * All allocations are done with GFP_NOFS.
1281 */
1282int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1283 u32 bits, u32 clear_bits,
1284 struct extent_state **cached_state)
1285{
1286 struct extent_state *state;
1287 struct extent_state *prealloc = NULL;
59864325
JB
1288 struct rb_node **p = NULL;
1289 struct rb_node *parent = NULL;
e3974c66
JB
1290 int err = 0;
1291 u64 last_start;
1292 u64 last_end;
1293 bool first_iteration = true;
1294
1295 btrfs_debug_check_extent_io_range(tree, start, end);
1296 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1297 clear_bits);
1298
1299again:
1300 if (!prealloc) {
1301 /*
1302 * Best effort, don't worry if extent state allocation fails
1303 * here for the first iteration. We might have a cached state
1304 * that matches exactly the target range, in which case no
1305 * extent state allocations are needed. We'll only know this
1306 * after locking the tree.
1307 */
1308 prealloc = alloc_extent_state(GFP_NOFS);
1309 if (!prealloc && !first_iteration)
1310 return -ENOMEM;
1311 }
1312
1313 spin_lock(&tree->lock);
1314 if (cached_state && *cached_state) {
1315 state = *cached_state;
1316 if (state->start <= start && state->end > start &&
e349fd3b 1317 extent_state_in_tree(state))
e3974c66 1318 goto hit_next;
e3974c66
JB
1319 }
1320
1321 /*
1322 * This search will find all the extents that end after our range
1323 * starts.
1324 */
e349fd3b
JB
1325 state = tree_search_for_insert(tree, start, &p, &parent);
1326 if (!state) {
e3974c66
JB
1327 prealloc = alloc_extent_state_atomic(prealloc);
1328 if (!prealloc) {
1329 err = -ENOMEM;
1330 goto out;
1331 }
1332 prealloc->start = start;
1333 prealloc->end = end;
1334 insert_state_fast(tree, prealloc, p, parent, bits, NULL);
1335 cache_state(prealloc, cached_state);
1336 prealloc = NULL;
1337 goto out;
1338 }
e3974c66
JB
1339hit_next:
1340 last_start = state->start;
1341 last_end = state->end;
1342
1343 /*
1344 * | ---- desired range ---- |
1345 * | state |
1346 *
1347 * Just lock what we found and keep going.
1348 */
1349 if (state->start == start && state->end <= end) {
1350 set_state_bits(tree, state, bits, NULL);
1351 cache_state(state, cached_state);
1352 state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1353 if (last_end == (u64)-1)
1354 goto out;
1355 start = last_end + 1;
1356 if (start < end && state && state->start == start &&
1357 !need_resched())
1358 goto hit_next;
1359 goto search_again;
1360 }
1361
1362 /*
1363 * | ---- desired range ---- |
1364 * | state |
1365 * or
1366 * | ------------- state -------------- |
1367 *
1368 * We need to split the extent we found, and may flip bits on second
1369 * half.
1370 *
1371 * If the extent we found extends past our range, we just split and
1372 * search again. It'll get split again the next time though.
1373 *
1374 * If the extent we found is inside our range, we set the desired bit
1375 * on it.
1376 */
1377 if (state->start < start) {
1378 prealloc = alloc_extent_state_atomic(prealloc);
1379 if (!prealloc) {
1380 err = -ENOMEM;
1381 goto out;
1382 }
1383 err = split_state(tree, state, prealloc, start);
1384 if (err)
1385 extent_io_tree_panic(tree, err);
1386 prealloc = NULL;
1387 if (err)
1388 goto out;
1389 if (state->end <= end) {
1390 set_state_bits(tree, state, bits, NULL);
1391 cache_state(state, cached_state);
1392 state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1393 if (last_end == (u64)-1)
1394 goto out;
1395 start = last_end + 1;
1396 if (start < end && state && state->start == start &&
1397 !need_resched())
1398 goto hit_next;
1399 }
1400 goto search_again;
1401 }
1402 /*
1403 * | ---- desired range ---- |
1404 * | state | or | state |
1405 *
1406 * There's a hole, we need to insert something in it and ignore the
1407 * extent we found.
1408 */
1409 if (state->start > start) {
1410 u64 this_end;
c91ea4bf
FM
1411 struct extent_state *inserted_state;
1412
e3974c66
JB
1413 if (end < last_start)
1414 this_end = end;
1415 else
1416 this_end = last_start - 1;
1417
1418 prealloc = alloc_extent_state_atomic(prealloc);
1419 if (!prealloc) {
1420 err = -ENOMEM;
1421 goto out;
1422 }
1423
1424 /*
1425 * Avoid to free 'prealloc' if it can be merged with the later
1426 * extent.
1427 */
1428 prealloc->start = start;
1429 prealloc->end = this_end;
c91ea4bf
FM
1430 inserted_state = insert_state(tree, prealloc, bits, NULL);
1431 if (IS_ERR(inserted_state)) {
1432 err = PTR_ERR(inserted_state);
e3974c66 1433 extent_io_tree_panic(tree, err);
c91ea4bf
FM
1434 }
1435 cache_state(inserted_state, cached_state);
1436 if (inserted_state == prealloc)
1437 prealloc = NULL;
e3974c66
JB
1438 start = this_end + 1;
1439 goto search_again;
1440 }
1441 /*
1442 * | ---- desired range ---- |
1443 * | state |
1444 *
1445 * We need to split the extent, and set the bit on the first half.
1446 */
1447 if (state->start <= end && state->end > end) {
1448 prealloc = alloc_extent_state_atomic(prealloc);
1449 if (!prealloc) {
1450 err = -ENOMEM;
1451 goto out;
1452 }
1453
1454 err = split_state(tree, state, prealloc, end + 1);
1455 if (err)
1456 extent_io_tree_panic(tree, err);
1457
1458 set_state_bits(tree, prealloc, bits, NULL);
1459 cache_state(prealloc, cached_state);
1460 clear_state_bit(tree, prealloc, clear_bits, 0, NULL);
1461 prealloc = NULL;
1462 goto out;
1463 }
1464
1465search_again:
1466 if (start > end)
1467 goto out;
1468 spin_unlock(&tree->lock);
1469 cond_resched();
1470 first_iteration = false;
1471 goto again;
1472
1473out:
1474 spin_unlock(&tree->lock);
1475 if (prealloc)
1476 free_extent_state(prealloc);
1477
1478 return err;
1479}
1480
38830018
JB
1481/*
1482 * Find the first range that has @bits not set. This range could start before
1483 * @start.
1484 *
1485 * @tree: the tree to search
1486 * @start: offset at/after which the found extent should start
1487 * @start_ret: records the beginning of the range
1488 * @end_ret: records the end of the range (inclusive)
1489 * @bits: the set of bits which must be unset
1490 *
1491 * Since unallocated range is also considered one which doesn't have the bits
1492 * set it's possible that @end_ret contains -1, this happens in case the range
1493 * spans (last_range_end, end of device]. In this case it's up to the caller to
1494 * trim @end_ret to the appropriate size.
1495 */
1496void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1497 u64 *start_ret, u64 *end_ret, u32 bits)
1498{
1499 struct extent_state *state;
26df39a9 1500 struct extent_state *prev = NULL, *next = NULL;
38830018
JB
1501
1502 spin_lock(&tree->lock);
1503
1504 /* Find first extent with bits cleared */
1505 while (1) {
43b068ca
JB
1506 state = tree_search_prev_next(tree, start, &prev, &next);
1507 if (!state && !next && !prev) {
38830018
JB
1508 /*
1509 * Tree is completely empty, send full range and let
1510 * caller deal with it
1511 */
1512 *start_ret = 0;
1513 *end_ret = -1;
1514 goto out;
43b068ca 1515 } else if (!state && !next) {
38830018
JB
1516 /*
1517 * We are past the last allocated chunk, set start at
1518 * the end of the last extent.
1519 */
43b068ca 1520 *start_ret = prev->end + 1;
38830018
JB
1521 *end_ret = -1;
1522 goto out;
43b068ca
JB
1523 } else if (!state) {
1524 state = next;
38830018 1525 }
43b068ca 1526
38830018 1527 /*
43b068ca
JB
1528 * At this point 'state' either contains 'start' or start is
1529 * before 'state'
38830018 1530 */
38830018
JB
1531 if (in_range(start, state->start, state->end - state->start + 1)) {
1532 if (state->state & bits) {
1533 /*
1534 * |--range with bits sets--|
1535 * |
1536 * start
1537 */
1538 start = state->end + 1;
1539 } else {
1540 /*
1541 * 'start' falls within a range that doesn't
1542 * have the bits set, so take its start as the
1543 * beginning of the desired range
1544 *
1545 * |--range with bits cleared----|
1546 * |
1547 * start
1548 */
1549 *start_ret = state->start;
1550 break;
1551 }
1552 } else {
1553 /*
1554 * |---prev range---|---hole/unset---|---node range---|
1555 * |
1556 * start
1557 *
1558 * or
1559 *
1560 * |---hole/unset--||--first node--|
1561 * 0 |
1562 * start
1563 */
43b068ca
JB
1564 if (prev)
1565 *start_ret = prev->end + 1;
1566 else
38830018 1567 *start_ret = 0;
38830018
JB
1568 break;
1569 }
1570 }
1571
1572 /*
1573 * Find the longest stretch from start until an entry which has the
1574 * bits set
1575 */
ccaeff92 1576 while (state) {
38830018
JB
1577 if (state->end >= start && !(state->state & bits)) {
1578 *end_ret = state->end;
1579 } else {
1580 *end_ret = state->start - 1;
1581 break;
1582 }
ccaeff92 1583 state = next_state(state);
38830018
JB
1584 }
1585out:
1586 spin_unlock(&tree->lock);
1587}
1588
e3974c66 1589/*
1ee51a06
FM
1590 * Count the number of bytes in the tree that have a given bit(s) set for a
1591 * given range.
1592 *
1593 * @tree: The io tree to search.
1594 * @start: The start offset of the range. This value is updated to the
1595 * offset of the first byte found with the given bit(s), so it
1596 * can end up being bigger than the initial value.
1597 * @search_end: The end offset (inclusive value) of the search range.
1598 * @max_bytes: The maximum byte count we are interested. The search stops
1599 * once it reaches this count.
1600 * @bits: The bits the range must have in order to be accounted for.
1601 * If multiple bits are set, then only subranges that have all
1602 * the bits set are accounted for.
1603 * @contig: Indicate if we should ignore holes in the range or not. If
1604 * this is true, then stop once we find a hole.
1605 * @cached_state: A cached state to be used across multiple calls to this
1606 * function in order to speedup searches. Use NULL if this is
1607 * called only once or if each call does not start where the
1608 * previous one ended.
1609 *
1610 * Returns the total number of bytes found within the given range that have
1611 * all given bits set. If the returned number of bytes is greater than zero
1612 * then @start is updated with the offset of the first byte with the bits set.
e3974c66
JB
1613 */
1614u64 count_range_bits(struct extent_io_tree *tree,
1615 u64 *start, u64 search_end, u64 max_bytes,
8c6e53a7
FM
1616 u32 bits, int contig,
1617 struct extent_state **cached_state)
e3974c66 1618{
8c6e53a7
FM
1619 struct extent_state *state = NULL;
1620 struct extent_state *cached;
e3974c66
JB
1621 u64 cur_start = *start;
1622 u64 total_bytes = 0;
1623 u64 last = 0;
1624 int found = 0;
1625
2f2e84ca 1626 if (WARN_ON(search_end < cur_start))
e3974c66
JB
1627 return 0;
1628
1629 spin_lock(&tree->lock);
71528e9e 1630
8c6e53a7
FM
1631 if (!cached_state || !*cached_state)
1632 goto search;
1633
1634 cached = *cached_state;
1635
1636 if (!extent_state_in_tree(cached))
1637 goto search;
1638
1639 if (cached->start <= cur_start && cur_start <= cached->end) {
1640 state = cached;
1641 } else if (cached->start > cur_start) {
1642 struct extent_state *prev;
1643
1644 /*
1645 * The cached state starts after our search range's start. Check
1646 * if the previous state record starts at or before the range we
1647 * are looking for, and if so, use it - this is a common case
1648 * when there are holes between records in the tree. If there is
1649 * no previous state record, we can start from our cached state.
1650 */
1651 prev = prev_state(cached);
1652 if (!prev)
1653 state = cached;
1654 else if (prev->start <= cur_start && cur_start <= prev->end)
1655 state = prev;
1656 }
1657
e3974c66
JB
1658 /*
1659 * This search will find all the extents that end after our range
1660 * starts.
1661 */
8c6e53a7
FM
1662search:
1663 if (!state)
1664 state = tree_search(tree, cur_start);
1665
ccaeff92 1666 while (state) {
e3974c66
JB
1667 if (state->start > search_end)
1668 break;
1669 if (contig && found && state->start > last + 1)
1670 break;
1671 if (state->end >= cur_start && (state->state & bits) == bits) {
1672 total_bytes += min(search_end, state->end) + 1 -
1673 max(cur_start, state->start);
1674 if (total_bytes >= max_bytes)
1675 break;
1676 if (!found) {
1677 *start = max(cur_start, state->start);
1678 found = 1;
1679 }
1680 last = state->end;
1681 } else if (contig && found) {
1682 break;
1683 }
ccaeff92 1684 state = next_state(state);
e3974c66 1685 }
8c6e53a7
FM
1686
1687 if (cached_state) {
1688 free_extent_state(*cached_state);
1689 *cached_state = state;
1690 if (state)
1691 refcount_inc(&state->refs);
1692 }
1693
e3974c66 1694 spin_unlock(&tree->lock);
8c6e53a7 1695
e3974c66
JB
1696 return total_bytes;
1697}
1698
99be1a66
DS
1699/*
1700 * Check if the single @bit exists in the given range.
1701 */
1702bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit)
1703{
1704 struct extent_state *state = NULL;
1705 bool bitset = false;
1706
1707 ASSERT(is_power_of_2(bit));
1708
1709 spin_lock(&tree->lock);
1710 state = tree_search(tree, start);
1711 while (state && start <= end) {
1712 if (state->start > end)
1713 break;
1714
1715 if (state->state & bit) {
1716 bitset = true;
1717 break;
1718 }
1719
1720 /* If state->end is (u64)-1, start will overflow to 0 */
1721 start = state->end + 1;
1722 if (start > end || start == 0)
1723 break;
1724 state = next_state(state);
1725 }
1726 spin_unlock(&tree->lock);
1727 return bitset;
1728}
1729
e3974c66 1730/*
893fe243 1731 * Check if the whole range [@start,@end) contains the single @bit set.
e3974c66 1732 */
893fe243
DS
1733bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
1734 struct extent_state *cached)
e3974c66
JB
1735{
1736 struct extent_state *state = NULL;
893fe243
DS
1737 bool bitset = true;
1738
1739 ASSERT(is_power_of_2(bit));
e3974c66
JB
1740
1741 spin_lock(&tree->lock);
1742 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1743 cached->end > start)
aa852dab 1744 state = cached;
e3974c66 1745 else
aa852dab 1746 state = tree_search(tree, start);
ccaeff92 1747 while (state && start <= end) {
893fe243
DS
1748 if (state->start > start) {
1749 bitset = false;
e3974c66
JB
1750 break;
1751 }
1752
1753 if (state->start > end)
1754 break;
1755
893fe243
DS
1756 if ((state->state & bit) == 0) {
1757 bitset = false;
e3974c66
JB
1758 break;
1759 }
1760
1761 if (state->end == (u64)-1)
1762 break;
1763
893fe243
DS
1764 /*
1765 * Last entry (if state->end is (u64)-1 and overflow happens),
1766 * or next entry starts after the range.
1767 */
e3974c66 1768 start = state->end + 1;
893fe243 1769 if (start > end || start == 0)
e3974c66 1770 break;
ccaeff92 1771 state = next_state(state);
e3974c66 1772 }
ccaeff92
JB
1773
1774 /* We ran out of states and were still inside of our range. */
893fe243
DS
1775 if (!state)
1776 bitset = false;
e3974c66
JB
1777 spin_unlock(&tree->lock);
1778 return bitset;
1779}
1780
a6631887
JB
1781/* Wrappers around set/clear extent bit */
1782int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1783 u32 bits, struct extent_changeset *changeset)
1784{
1785 /*
1786 * We don't support EXTENT_LOCKED yet, as current changeset will
1787 * record any bits changed, so for EXTENT_LOCKED case, it will
1788 * either fail with -EEXIST or changeset will record the whole
1789 * range.
1790 */
1791 ASSERT(!(bits & EXTENT_LOCKED));
1792
1d126800 1793 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
a6631887
JB
1794}
1795
1796int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1797 u32 bits, struct extent_changeset *changeset)
1798{
1799 /*
1800 * Don't support EXTENT_LOCKED case, same reason as
1801 * set_record_extent_bits().
1802 */
1803 ASSERT(!(bits & EXTENT_LOCKED));
1804
1d126800 1805 return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
a6631887
JB
1806}
1807
83ae4133
JB
1808int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1809 struct extent_state **cached)
a6631887
JB
1810{
1811 int err;
1812 u64 failed_start;
1813
994bcd1e 1814 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1d126800 1815 NULL, cached, NULL);
a6631887
JB
1816 if (err == -EEXIST) {
1817 if (failed_start > start)
1818 clear_extent_bit(tree, start, failed_start - 1,
83ae4133 1819 EXTENT_LOCKED, cached);
a6631887
JB
1820 return 0;
1821 }
1822 return 1;
1823}
1824
38830018
JB
1825/*
1826 * Either insert or lock state struct between start and end use mask to tell
1827 * us if waiting is desired.
1828 */
570eb97b
JB
1829int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1830 struct extent_state **cached_state)
38830018 1831{
123a7f00 1832 struct extent_state *failed_state = NULL;
38830018
JB
1833 int err;
1834 u64 failed_start;
1835
9e769bd7 1836 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1d126800 1837 &failed_state, cached_state, NULL);
9e769bd7
JB
1838 while (err == -EEXIST) {
1839 if (failed_start != start)
1840 clear_extent_bit(tree, start, failed_start - 1,
1841 EXTENT_LOCKED, cached_state);
1842
123a7f00
JB
1843 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED,
1844 &failed_state);
994bcd1e 1845 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
123a7f00 1846 &failed_start, &failed_state,
1d126800 1847 cached_state, NULL);
38830018
JB
1848 }
1849 return err;
1850}
1851
83cf709a
JB
1852void __cold extent_state_free_cachep(void)
1853{
1854 btrfs_extent_state_leak_debug_check();
1855 kmem_cache_destroy(extent_state_cache);
1856}
1857
1858int __init extent_state_init_cachep(void)
1859{
1860 extent_state_cache = kmem_cache_create("btrfs_extent_state",
1861 sizeof(struct extent_state), 0,
1862 SLAB_MEM_SPREAD, NULL);
1863 if (!extent_state_cache)
1864 return -ENOMEM;
1865
1866 return 0;
1867}