]> git.ipfire.org Git - thirdparty/linux.git/blame - fs/btrfs/extent-io-tree.c
btrfs: collapse wait_on_state() to its caller wait_extent_bit()
[thirdparty/linux.git] / fs / btrfs / extent-io-tree.c
CommitLineData
83cf709a
JB
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/slab.h>
4#include <trace/events/btrfs.h>
9b569ea0 5#include "messages.h"
83cf709a
JB
6#include "ctree.h"
7#include "extent-io-tree.h"
6962541e 8#include "btrfs_inode.h"
38830018 9#include "misc.h"
83cf709a
JB
10
11static struct kmem_cache *extent_state_cache;
12
a4055213
JB
13static inline bool extent_state_in_tree(const struct extent_state *state)
14{
15 return !RB_EMPTY_NODE(&state->rb_node);
16}
17
83cf709a
JB
18#ifdef CONFIG_BTRFS_DEBUG
19static LIST_HEAD(states);
20static DEFINE_SPINLOCK(leak_lock);
21
22static inline void btrfs_leak_debug_add_state(struct extent_state *state)
23{
24 unsigned long flags;
25
26 spin_lock_irqsave(&leak_lock, flags);
27 list_add(&state->leak_list, &states);
28 spin_unlock_irqrestore(&leak_lock, flags);
29}
30
31static inline void btrfs_leak_debug_del_state(struct extent_state *state)
32{
33 unsigned long flags;
34
35 spin_lock_irqsave(&leak_lock, flags);
36 list_del(&state->leak_list);
37 spin_unlock_irqrestore(&leak_lock, flags);
38}
39
40static inline void btrfs_extent_state_leak_debug_check(void)
41{
42 struct extent_state *state;
43
44 while (!list_empty(&states)) {
45 state = list_entry(states.next, struct extent_state, leak_list);
46 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
47 state->start, state->end, state->state,
48 extent_state_in_tree(state),
49 refcount_read(&state->refs));
50 list_del(&state->leak_list);
51 kmem_cache_free(extent_state_cache, state);
52 }
53}
6962541e 54
d8038a1f
JB
55#define btrfs_debug_check_extent_io_range(tree, start, end) \
56 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
57static inline void __btrfs_debug_check_extent_io_range(const char *caller,
58 struct extent_io_tree *tree,
59 u64 start, u64 end)
6962541e 60{
0988fc7b 61 struct btrfs_inode *inode = tree->inode;
6962541e
JB
62 u64 isize;
63
23408d81 64 if (!inode)
6962541e
JB
65 return;
66
0988fc7b 67 isize = i_size_read(&inode->vfs_inode);
6962541e 68 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
0988fc7b 69 btrfs_debug_rl(inode->root->fs_info,
6962541e 70 "%s: ino %llu isize %llu odd range [%llu,%llu]",
0988fc7b 71 caller, btrfs_ino(inode), isize, start, end);
6962541e
JB
72 }
73}
83cf709a
JB
74#else
75#define btrfs_leak_debug_add_state(state) do {} while (0)
76#define btrfs_leak_debug_del_state(state) do {} while (0)
77#define btrfs_extent_state_leak_debug_check() do {} while (0)
d8038a1f 78#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
83cf709a
JB
79#endif
80
81/*
82 * For the file_extent_tree, we want to hold the inode lock when we lookup and
83 * update the disk_i_size, but lockdep will complain because our io_tree we hold
84 * the tree lock and get the inode lock when setting delalloc. These two things
85 * are unrelated, so make a class for the file_extent_tree so we don't get the
86 * two locking patterns mixed up.
87 */
88static struct lock_class_key file_extent_tree_class;
89
a4055213
JB
90struct tree_entry {
91 u64 start;
92 u64 end;
93 struct rb_node rb_node;
94};
95
83cf709a 96void extent_io_tree_init(struct btrfs_fs_info *fs_info,
35da5a7e 97 struct extent_io_tree *tree, unsigned int owner)
83cf709a
JB
98{
99 tree->fs_info = fs_info;
100 tree->state = RB_ROOT;
83cf709a 101 spin_lock_init(&tree->lock);
0988fc7b 102 tree->inode = NULL;
83cf709a
JB
103 tree->owner = owner;
104 if (owner == IO_TREE_INODE_FILE_EXTENT)
105 lockdep_set_class(&tree->lock, &file_extent_tree_class);
106}
107
bea22a58
FM
108/*
109 * Empty an io tree, removing and freeing every extent state record from the
110 * tree. This should be called once we are sure no other task can access the
111 * tree anymore, so no tree updates happen after we empty the tree and there
112 * aren't any waiters on any extent state record (EXTENT_LOCKED bit is never
113 * set on any extent state when calling this function).
114 */
83cf709a
JB
115void extent_io_tree_release(struct extent_io_tree *tree)
116{
117 spin_lock(&tree->lock);
83cf709a
JB
118 while (!RB_EMPTY_ROOT(&tree->state)) {
119 struct rb_node *node;
120 struct extent_state *state;
121
122 node = rb_first(&tree->state);
123 state = rb_entry(node, struct extent_state, rb_node);
124 rb_erase(&state->rb_node, &tree->state);
125 RB_CLEAR_NODE(&state->rb_node);
bea22a58 126 ASSERT(!(state->state & EXTENT_LOCKED));
28967c76
FM
127 /*
128 * No need for a memory barrier here, as we are holding the tree
129 * lock and we only change the waitqueue while holding that lock
df2a8e70 130 * (see wait_extent_bit()).
28967c76 131 */
83cf709a
JB
132 ASSERT(!waitqueue_active(&state->wq));
133 free_extent_state(state);
134
135 cond_resched_lock(&tree->lock);
136 }
137 spin_unlock(&tree->lock);
138}
139
a4055213 140static struct extent_state *alloc_extent_state(gfp_t mask)
83cf709a
JB
141{
142 struct extent_state *state;
143
144 /*
145 * The given mask might be not appropriate for the slab allocator,
146 * drop the unsupported bits
147 */
148 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
149 state = kmem_cache_alloc(extent_state_cache, mask);
150 if (!state)
151 return state;
152 state->state = 0;
153 RB_CLEAR_NODE(&state->rb_node);
154 btrfs_leak_debug_add_state(state);
155 refcount_set(&state->refs, 1);
156 init_waitqueue_head(&state->wq);
157 trace_alloc_extent_state(state, mask, _RET_IP_);
158 return state;
159}
160
a4055213 161static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc)
83cf709a
JB
162{
163 if (!prealloc)
164 prealloc = alloc_extent_state(GFP_ATOMIC);
165
166 return prealloc;
167}
168
169void free_extent_state(struct extent_state *state)
170{
171 if (!state)
172 return;
173 if (refcount_dec_and_test(&state->refs)) {
174 WARN_ON(extent_state_in_tree(state));
175 btrfs_leak_debug_del_state(state);
176 trace_free_extent_state(state, _RET_IP_);
177 kmem_cache_free(extent_state_cache, state);
178 }
179}
180
04eba893
JB
181static int add_extent_changeset(struct extent_state *state, u32 bits,
182 struct extent_changeset *changeset,
183 int set)
184{
185 int ret;
186
187 if (!changeset)
188 return 0;
189 if (set && (state->state & bits) == bits)
190 return 0;
191 if (!set && (state->state & bits) == 0)
192 return 0;
193 changeset->bytes_changed += state->end - state->start + 1;
194 ret = ulist_add(&changeset->range_changed, state->start, state->end,
195 GFP_ATOMIC);
196 return ret;
197}
198
a4055213
JB
199static inline struct extent_state *next_state(struct extent_state *state)
200{
201 struct rb_node *next = rb_next(&state->rb_node);
202
203 if (next)
204 return rb_entry(next, struct extent_state, rb_node);
205 else
206 return NULL;
207}
208
43b068ca
JB
209static inline struct extent_state *prev_state(struct extent_state *state)
210{
211 struct rb_node *next = rb_prev(&state->rb_node);
212
213 if (next)
214 return rb_entry(next, struct extent_state, rb_node);
215 else
216 return NULL;
217}
218
91af24e4
JB
219/*
220 * Search @tree for an entry that contains @offset. Such entry would have
221 * entry->start <= offset && entry->end >= offset.
222 *
223 * @tree: the tree to search
224 * @offset: offset that should fall within an entry in @tree
225 * @node_ret: pointer where new node should be anchored (used when inserting an
226 * entry in the tree)
227 * @parent_ret: points to entry which would have been the parent of the entry,
228 * containing @offset
229 *
230 * Return a pointer to the entry that contains @offset byte address and don't change
231 * @node_ret and @parent_ret.
232 *
233 * If no such entry exists, return pointer to entry that ends before @offset
234 * and fill parameters @node_ret and @parent_ret, ie. does not return NULL.
235 */
e349fd3b
JB
236static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
237 u64 offset,
238 struct rb_node ***node_ret,
239 struct rb_node **parent_ret)
91af24e4
JB
240{
241 struct rb_root *root = &tree->state;
242 struct rb_node **node = &root->rb_node;
243 struct rb_node *prev = NULL;
e349fd3b 244 struct extent_state *entry = NULL;
91af24e4
JB
245
246 while (*node) {
247 prev = *node;
071d19f5 248 entry = rb_entry(prev, struct extent_state, rb_node);
91af24e4
JB
249
250 if (offset < entry->start)
251 node = &(*node)->rb_left;
252 else if (offset > entry->end)
253 node = &(*node)->rb_right;
254 else
e349fd3b 255 return entry;
91af24e4
JB
256 }
257
258 if (node_ret)
259 *node_ret = node;
260 if (parent_ret)
261 *parent_ret = prev;
262
263 /* Search neighbors until we find the first one past the end */
e349fd3b
JB
264 while (entry && offset > entry->end)
265 entry = next_state(entry);
91af24e4 266
e349fd3b 267 return entry;
91af24e4
JB
268}
269
270/*
271 * Search offset in the tree or fill neighbor rbtree node pointers.
272 *
273 * @tree: the tree to search
274 * @offset: offset that should fall within an entry in @tree
275 * @next_ret: pointer to the first entry whose range ends after @offset
276 * @prev_ret: pointer to the first entry whose range begins before @offset
277 *
278 * Return a pointer to the entry that contains @offset byte address. If no
279 * such entry exists, then return NULL and fill @prev_ret and @next_ret.
280 * Otherwise return the found entry and other pointers are left untouched.
281 */
43b068ca
JB
282static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree,
283 u64 offset,
284 struct extent_state **prev_ret,
285 struct extent_state **next_ret)
91af24e4
JB
286{
287 struct rb_root *root = &tree->state;
288 struct rb_node **node = &root->rb_node;
43b068ca
JB
289 struct extent_state *orig_prev;
290 struct extent_state *entry = NULL;
91af24e4
JB
291
292 ASSERT(prev_ret);
293 ASSERT(next_ret);
294
295 while (*node) {
43b068ca 296 entry = rb_entry(*node, struct extent_state, rb_node);
91af24e4
JB
297
298 if (offset < entry->start)
299 node = &(*node)->rb_left;
300 else if (offset > entry->end)
301 node = &(*node)->rb_right;
302 else
43b068ca 303 return entry;
91af24e4
JB
304 }
305
43b068ca
JB
306 orig_prev = entry;
307 while (entry && offset > entry->end)
308 entry = next_state(entry);
309 *next_ret = entry;
310 entry = orig_prev;
91af24e4 311
43b068ca
JB
312 while (entry && offset < entry->start)
313 entry = prev_state(entry);
314 *prev_ret = entry;
91af24e4
JB
315
316 return NULL;
317}
318
e3974c66
JB
319/*
320 * Inexact rb-tree search, return the next entry if @offset is not found
321 */
aa852dab 322static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset)
e3974c66 323{
e349fd3b 324 return tree_search_for_insert(tree, offset, NULL, NULL);
e3974c66
JB
325}
326
327static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
328{
329 btrfs_panic(tree->fs_info, err,
330 "locking error: extent tree was modified by another thread while locked");
331}
332
c91ea4bf
FM
333static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state)
334{
335 struct extent_state *prev;
336
337 prev = prev_state(state);
338 if (prev && prev->end == state->start - 1 && prev->state == state->state) {
339 if (tree->inode)
340 btrfs_merge_delalloc_extent(tree->inode, state, prev);
341 state->start = prev->start;
342 rb_erase(&prev->rb_node, &tree->state);
343 RB_CLEAR_NODE(&prev->rb_node);
344 free_extent_state(prev);
345 }
346}
347
348static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state)
349{
350 struct extent_state *next;
351
352 next = next_state(state);
353 if (next && next->start == state->end + 1 && next->state == state->state) {
354 if (tree->inode)
355 btrfs_merge_delalloc_extent(tree->inode, state, next);
356 state->end = next->end;
357 rb_erase(&next->rb_node, &tree->state);
358 RB_CLEAR_NODE(&next->rb_node);
359 free_extent_state(next);
360 }
361}
362
04eba893
JB
363/*
364 * Utility function to look for merge candidates inside a given range. Any
365 * extents with matching state are merged together into a single extent in the
366 * tree. Extents with EXTENT_IO in their state field are not merged because
367 * the end_io handlers need to be able to do operations on them without
368 * sleeping (or doing allocations/splits).
369 *
370 * This should be called with the tree lock held.
371 */
a4055213 372static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
04eba893 373{
04eba893
JB
374 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
375 return;
376
c91ea4bf
FM
377 merge_prev_state(tree, state);
378 merge_next_state(tree, state);
04eba893
JB
379}
380
a4055213
JB
381static void set_state_bits(struct extent_io_tree *tree,
382 struct extent_state *state,
383 u32 bits, struct extent_changeset *changeset)
04eba893
JB
384{
385 u32 bits_to_set = bits & ~EXTENT_CTLBITS;
386 int ret;
387
0988fc7b 388 if (tree->inode)
4c5d166f 389 btrfs_set_delalloc_extent(tree->inode, state, bits);
04eba893 390
04eba893
JB
391 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
392 BUG_ON(ret < 0);
393 state->state |= bits_to_set;
394}
395
396/*
397 * Insert an extent_state struct into the tree. 'bits' are set on the
398 * struct before it is inserted.
399 *
c91ea4bf
FM
400 * Returns a pointer to the struct extent_state record containing the range
401 * requested for insertion, which may be the same as the given struct or it
402 * may be an existing record in the tree that was expanded to accommodate the
403 * requested range. In case of an extent_state different from the one that was
404 * given, the later can be freed or reused by the caller.
405 *
406 * On error it returns an error pointer.
04eba893
JB
407 *
408 * The tree lock is not taken internally. This is a utility function and
409 * probably isn't what you want to call (see set/clear_extent_bit).
410 */
c91ea4bf
FM
411static struct extent_state *insert_state(struct extent_io_tree *tree,
412 struct extent_state *state,
413 u32 bits,
414 struct extent_changeset *changeset)
04eba893
JB
415{
416 struct rb_node **node;
d7c9e1be 417 struct rb_node *parent = NULL;
c91ea4bf
FM
418 const u64 start = state->start - 1;
419 const u64 end = state->end + 1;
420 const bool try_merge = !(bits & (EXTENT_LOCKED | EXTENT_BOUNDARY));
04eba893
JB
421
422 set_state_bits(tree, state, bits, changeset);
423
424 node = &tree->state.rb_node;
425 while (*node) {
071d19f5 426 struct extent_state *entry;
04eba893
JB
427
428 parent = *node;
071d19f5 429 entry = rb_entry(parent, struct extent_state, rb_node);
04eba893 430
c91ea4bf
FM
431 if (state->end < entry->start) {
432 if (try_merge && end == entry->start &&
433 state->state == entry->state) {
434 if (tree->inode)
435 btrfs_merge_delalloc_extent(tree->inode,
436 state, entry);
437 entry->start = state->start;
438 merge_prev_state(tree, entry);
439 state->state = 0;
440 return entry;
441 }
04eba893 442 node = &(*node)->rb_left;
c91ea4bf
FM
443 } else if (state->end > entry->end) {
444 if (try_merge && entry->end == start &&
445 state->state == entry->state) {
446 if (tree->inode)
447 btrfs_merge_delalloc_extent(tree->inode,
448 state, entry);
449 entry->end = state->end;
450 merge_next_state(tree, entry);
451 state->state = 0;
452 return entry;
453 }
04eba893
JB
454 node = &(*node)->rb_right;
455 } else {
456 btrfs_err(tree->fs_info,
457 "found node %llu %llu on insert of %llu %llu",
c91ea4bf
FM
458 entry->start, entry->end, state->start, state->end);
459 return ERR_PTR(-EEXIST);
04eba893
JB
460 }
461 }
462
463 rb_link_node(&state->rb_node, parent, node);
464 rb_insert_color(&state->rb_node, &tree->state);
465
c91ea4bf 466 return state;
04eba893
JB
467}
468
469/*
470 * Insert state to @tree to the location given by @node and @parent.
471 */
a4055213
JB
472static void insert_state_fast(struct extent_io_tree *tree,
473 struct extent_state *state, struct rb_node **node,
474 struct rb_node *parent, unsigned bits,
475 struct extent_changeset *changeset)
04eba893
JB
476{
477 set_state_bits(tree, state, bits, changeset);
478 rb_link_node(&state->rb_node, parent, node);
479 rb_insert_color(&state->rb_node, &tree->state);
480 merge_state(tree, state);
481}
482
483/*
484 * Split a given extent state struct in two, inserting the preallocated
485 * struct 'prealloc' as the newly created second half. 'split' indicates an
486 * offset inside 'orig' where it should be split.
487 *
488 * Before calling,
489 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
490 * are two extent state structs in the tree:
491 * prealloc: [orig->start, split - 1]
492 * orig: [ split, orig->end ]
493 *
494 * The tree locks are not taken by this function. They need to be held
495 * by the caller.
496 */
a4055213
JB
497static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
498 struct extent_state *prealloc, u64 split)
04eba893
JB
499{
500 struct rb_node *parent = NULL;
501 struct rb_node **node;
502
0988fc7b 503 if (tree->inode)
62798a49 504 btrfs_split_delalloc_extent(tree->inode, orig, split);
04eba893
JB
505
506 prealloc->start = orig->start;
507 prealloc->end = split - 1;
508 prealloc->state = orig->state;
509 orig->start = split;
510
511 parent = &orig->rb_node;
512 node = &parent;
513 while (*node) {
071d19f5 514 struct extent_state *entry;
04eba893
JB
515
516 parent = *node;
071d19f5 517 entry = rb_entry(parent, struct extent_state, rb_node);
04eba893
JB
518
519 if (prealloc->end < entry->start) {
520 node = &(*node)->rb_left;
521 } else if (prealloc->end > entry->end) {
522 node = &(*node)->rb_right;
523 } else {
524 free_extent_state(prealloc);
525 return -EEXIST;
526 }
527 }
528
529 rb_link_node(&prealloc->rb_node, parent, node);
530 rb_insert_color(&prealloc->rb_node, &tree->state);
531
532 return 0;
533}
534
535/*
536 * Utility function to clear some bits in an extent state struct. It will
537 * optionally wake up anyone waiting on this state (wake == 1).
538 *
539 * If no bits are set on the state struct after clearing things, the
540 * struct is freed and removed from the tree
541 */
a4055213
JB
542static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
543 struct extent_state *state,
544 u32 bits, int wake,
545 struct extent_changeset *changeset)
04eba893
JB
546{
547 struct extent_state *next;
548 u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
549 int ret;
550
0988fc7b 551 if (tree->inode)
bd54766e 552 btrfs_clear_delalloc_extent(tree->inode, state, bits);
04eba893
JB
553
554 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
555 BUG_ON(ret < 0);
556 state->state &= ~bits_to_clear;
557 if (wake)
558 wake_up(&state->wq);
559 if (state->state == 0) {
560 next = next_state(state);
561 if (extent_state_in_tree(state)) {
562 rb_erase(&state->rb_node, &tree->state);
563 RB_CLEAR_NODE(&state->rb_node);
564 free_extent_state(state);
565 } else {
566 WARN_ON(1);
567 }
568 } else {
569 merge_state(tree, state);
570 next = next_state(state);
571 }
572 return next;
573}
574
62bc6047
DS
575/*
576 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
577 * unset the EXTENT_NOWAIT bit.
578 */
579static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
580{
581 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
582 *bits &= EXTENT_NOWAIT - 1;
583}
584
e3974c66
JB
585/*
586 * Clear some bits on a range in the tree. This may require splitting or
587 * inserting elements in the tree, so the gfp mask is used to indicate which
588 * allocations or sleeping are allowed.
589 *
590 * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given
591 * range from the tree regardless of state (ie for truncate).
592 *
593 * The range [start, end] is inclusive.
594 *
595 * This takes the tree lock, and returns 0 on success and < 0 on error.
596 */
597int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
bd015294 598 u32 bits, struct extent_state **cached_state,
1d126800 599 struct extent_changeset *changeset)
e3974c66
JB
600{
601 struct extent_state *state;
602 struct extent_state *cached;
603 struct extent_state *prealloc = NULL;
e3974c66
JB
604 u64 last_end;
605 int err;
606 int clear = 0;
bd015294
JB
607 int wake;
608 int delete = (bits & EXTENT_CLEAR_ALL_BITS);
1d126800 609 gfp_t mask;
e3974c66 610
62bc6047 611 set_gfp_mask_from_bits(&bits, &mask);
e3974c66
JB
612 btrfs_debug_check_extent_io_range(tree, start, end);
613 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
614
e3974c66
JB
615 if (delete)
616 bits |= ~EXTENT_CTLBITS;
617
bd015294
JB
618 if (bits & EXTENT_DELALLOC)
619 bits |= EXTENT_NORESERVE;
620
621 wake = (bits & EXTENT_LOCKED) ? 1 : 0;
e3974c66
JB
622 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
623 clear = 1;
624again:
5a75034e 625 if (!prealloc) {
e3974c66
JB
626 /*
627 * Don't care for allocation failure here because we might end
628 * up not needing the pre-allocated extent state at all, which
629 * is the case if we only have in the tree extent states that
630 * cover our input range and don't cover too any other range.
631 * If we end up needing a new extent state we allocate it later.
632 */
633 prealloc = alloc_extent_state(mask);
634 }
635
636 spin_lock(&tree->lock);
637 if (cached_state) {
638 cached = *cached_state;
639
640 if (clear) {
641 *cached_state = NULL;
642 cached_state = NULL;
643 }
644
645 if (cached && extent_state_in_tree(cached) &&
646 cached->start <= start && cached->end > start) {
647 if (clear)
648 refcount_dec(&cached->refs);
649 state = cached;
650 goto hit_next;
651 }
652 if (clear)
653 free_extent_state(cached);
654 }
655
656 /* This search will find the extents that end after our range starts. */
aa852dab
JB
657 state = tree_search(tree, start);
658 if (!state)
e3974c66 659 goto out;
e3974c66
JB
660hit_next:
661 if (state->start > end)
662 goto out;
663 WARN_ON(state->end < start);
664 last_end = state->end;
665
666 /* The state doesn't have the wanted bits, go ahead. */
667 if (!(state->state & bits)) {
668 state = next_state(state);
669 goto next;
670 }
671
672 /*
673 * | ---- desired range ---- |
674 * | state | or
675 * | ------------- state -------------- |
676 *
677 * We need to split the extent we found, and may flip bits on second
678 * half.
679 *
680 * If the extent we found extends past our range, we just split and
681 * search again. It'll get split again the next time though.
682 *
683 * If the extent we found is inside our range, we clear the desired bit
684 * on it.
685 */
686
687 if (state->start < start) {
688 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
689 if (!prealloc)
690 goto search_again;
e3974c66
JB
691 err = split_state(tree, state, prealloc, start);
692 if (err)
693 extent_io_tree_panic(tree, err);
694
695 prealloc = NULL;
696 if (err)
697 goto out;
698 if (state->end <= end) {
699 state = clear_state_bit(tree, state, bits, wake, changeset);
700 goto next;
701 }
702 goto search_again;
703 }
704 /*
705 * | ---- desired range ---- |
706 * | state |
707 * We need to split the extent, and clear the bit on the first half.
708 */
709 if (state->start <= end && state->end > end) {
710 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
711 if (!prealloc)
712 goto search_again;
e3974c66
JB
713 err = split_state(tree, state, prealloc, end + 1);
714 if (err)
715 extent_io_tree_panic(tree, err);
716
717 if (wake)
718 wake_up(&state->wq);
719
720 clear_state_bit(tree, prealloc, bits, wake, changeset);
721
722 prealloc = NULL;
723 goto out;
724 }
725
726 state = clear_state_bit(tree, state, bits, wake, changeset);
727next:
728 if (last_end == (u64)-1)
729 goto out;
730 start = last_end + 1;
731 if (start <= end && state && !need_resched())
732 goto hit_next;
733
734search_again:
735 if (start > end)
736 goto out;
737 spin_unlock(&tree->lock);
738 if (gfpflags_allow_blocking(mask))
739 cond_resched();
740 goto again;
741
742out:
743 spin_unlock(&tree->lock);
744 if (prealloc)
745 free_extent_state(prealloc);
746
747 return 0;
748
749}
750
e3974c66
JB
751/*
752 * Wait for one or more bits to clear on a range in the state tree.
753 * The range [start, end] is inclusive.
754 * The tree lock is taken by this function
755 */
a1c20d15
FM
756static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
757 u32 bits, struct extent_state **cached_state)
e3974c66
JB
758{
759 struct extent_state *state;
e3974c66
JB
760
761 btrfs_debug_check_extent_io_range(tree, start, end);
762
763 spin_lock(&tree->lock);
764again:
123a7f00
JB
765 /*
766 * Maintain cached_state, as we may not remove it from the tree if there
767 * are more bits than the bits we're waiting on set on this state.
768 */
769 if (cached_state && *cached_state) {
770 state = *cached_state;
771 if (extent_state_in_tree(state) &&
772 state->start <= start && start < state->end)
773 goto process_node;
774 }
e3974c66
JB
775 while (1) {
776 /*
777 * This search will find all the extents that end after our
778 * range starts.
779 */
aa852dab 780 state = tree_search(tree, start);
ccaeff92 781process_node:
aa852dab
JB
782 if (!state)
783 break;
e3974c66
JB
784 if (state->start > end)
785 goto out;
786
787 if (state->state & bits) {
df2a8e70
FM
788 DEFINE_WAIT(wait);
789
e3974c66
JB
790 start = state->start;
791 refcount_inc(&state->refs);
df2a8e70
FM
792 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
793 spin_unlock(&tree->lock);
794 schedule();
795 spin_lock(&tree->lock);
796 finish_wait(&state->wq, &wait);
e3974c66
JB
797 free_extent_state(state);
798 goto again;
799 }
800 start = state->end + 1;
801
802 if (start > end)
803 break;
804
805 if (!cond_resched_lock(&tree->lock)) {
ccaeff92 806 state = next_state(state);
e3974c66
JB
807 goto process_node;
808 }
809 }
810out:
123a7f00
JB
811 /* This state is no longer useful, clear it and free it up. */
812 if (cached_state && *cached_state) {
813 state = *cached_state;
814 *cached_state = NULL;
815 free_extent_state(state);
816 }
e3974c66
JB
817 spin_unlock(&tree->lock);
818}
819
820static void cache_state_if_flags(struct extent_state *state,
821 struct extent_state **cached_ptr,
822 unsigned flags)
823{
824 if (cached_ptr && !(*cached_ptr)) {
825 if (!flags || (state->state & flags)) {
826 *cached_ptr = state;
827 refcount_inc(&state->refs);
828 }
829 }
830}
831
832static void cache_state(struct extent_state *state,
833 struct extent_state **cached_ptr)
834{
835 return cache_state_if_flags(state, cached_ptr,
836 EXTENT_LOCKED | EXTENT_BOUNDARY);
837}
838
839/*
840 * Find the first state struct with 'bits' set after 'start', and return it.
841 * tree->lock must be held. NULL will returned if nothing was found after
842 * 'start'.
843 */
844static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
845 u64 start, u32 bits)
846{
e3974c66
JB
847 struct extent_state *state;
848
849 /*
850 * This search will find all the extents that end after our range
851 * starts.
852 */
aa852dab 853 state = tree_search(tree, start);
ccaeff92 854 while (state) {
e3974c66
JB
855 if (state->end >= start && (state->state & bits))
856 return state;
ccaeff92 857 state = next_state(state);
e3974c66 858 }
e3974c66
JB
859 return NULL;
860}
861
862/*
863 * Find the first offset in the io tree with one or more @bits set.
864 *
865 * Note: If there are multiple bits set in @bits, any of them will match.
866 *
e5860f82
FM
867 * Return true if we find something, and update @start_ret and @end_ret.
868 * Return false if we found nothing.
e3974c66 869 */
e5860f82
FM
870bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
871 u64 *start_ret, u64 *end_ret, u32 bits,
872 struct extent_state **cached_state)
e3974c66
JB
873{
874 struct extent_state *state;
e5860f82 875 bool ret = false;
e3974c66
JB
876
877 spin_lock(&tree->lock);
878 if (cached_state && *cached_state) {
879 state = *cached_state;
880 if (state->end == start - 1 && extent_state_in_tree(state)) {
881 while ((state = next_state(state)) != NULL) {
882 if (state->state & bits)
883 goto got_it;
884 }
885 free_extent_state(*cached_state);
886 *cached_state = NULL;
887 goto out;
888 }
889 free_extent_state(*cached_state);
890 *cached_state = NULL;
891 }
892
893 state = find_first_extent_bit_state(tree, start, bits);
894got_it:
895 if (state) {
896 cache_state_if_flags(state, cached_state, 0);
897 *start_ret = state->start;
898 *end_ret = state->end;
e5860f82 899 ret = true;
e3974c66
JB
900 }
901out:
902 spin_unlock(&tree->lock);
903 return ret;
904}
905
906/*
907 * Find a contiguous area of bits
908 *
909 * @tree: io tree to check
910 * @start: offset to start the search from
911 * @start_ret: the first offset we found with the bits set
912 * @end_ret: the final contiguous range of the bits that were set
913 * @bits: bits to look for
914 *
915 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
916 * to set bits appropriately, and then merge them again. During this time it
917 * will drop the tree->lock, so use this helper if you want to find the actual
918 * contiguous area for given bits. We will search to the first bit we find, and
919 * then walk down the tree until we find a non-contiguous area. The area
920 * returned will be the full contiguous area with the bits set.
921 */
922int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
923 u64 *start_ret, u64 *end_ret, u32 bits)
924{
925 struct extent_state *state;
926 int ret = 1;
927
928 spin_lock(&tree->lock);
929 state = find_first_extent_bit_state(tree, start, bits);
930 if (state) {
931 *start_ret = state->start;
932 *end_ret = state->end;
933 while ((state = next_state(state)) != NULL) {
934 if (state->start > (*end_ret + 1))
935 break;
936 *end_ret = state->end;
937 }
938 ret = 0;
939 }
940 spin_unlock(&tree->lock);
941 return ret;
942}
943
944/*
945 * Find a contiguous range of bytes in the file marked as delalloc, not more
946 * than 'max_bytes'. start and end are used to return the range,
947 *
948 * True is returned if we find something, false if nothing was in the tree.
949 */
950bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
951 u64 *end, u64 max_bytes,
952 struct extent_state **cached_state)
953{
e3974c66
JB
954 struct extent_state *state;
955 u64 cur_start = *start;
956 bool found = false;
957 u64 total_bytes = 0;
958
959 spin_lock(&tree->lock);
960
961 /*
962 * This search will find all the extents that end after our range
963 * starts.
964 */
aa852dab
JB
965 state = tree_search(tree, cur_start);
966 if (!state) {
e3974c66
JB
967 *end = (u64)-1;
968 goto out;
969 }
970
ccaeff92 971 while (state) {
e3974c66
JB
972 if (found && (state->start != cur_start ||
973 (state->state & EXTENT_BOUNDARY))) {
974 goto out;
975 }
976 if (!(state->state & EXTENT_DELALLOC)) {
977 if (!found)
978 *end = state->end;
979 goto out;
980 }
981 if (!found) {
982 *start = state->start;
983 *cached_state = state;
984 refcount_inc(&state->refs);
985 }
986 found = true;
987 *end = state->end;
988 cur_start = state->end + 1;
e3974c66
JB
989 total_bytes += state->end - state->start + 1;
990 if (total_bytes >= max_bytes)
991 break;
ccaeff92 992 state = next_state(state);
e3974c66
JB
993 }
994out:
995 spin_unlock(&tree->lock);
996 return found;
997}
998
999/*
1000 * Set some bits on a range in the tree. This may require allocations or
1d126800
DS
1001 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
1002 * GFP_NOWAIT.
e3974c66
JB
1003 *
1004 * If any of the exclusive bits are set, this will fail with -EEXIST if some
123a7f00
JB
1005 * part of the range already has the desired bits set. The extent_state of the
1006 * existing range is returned in failed_state in this case, and the start of the
1007 * existing range is returned in failed_start. failed_state is used as an
1008 * optimization for wait_extent_bit, failed_start must be used as the source of
1009 * truth as failed_state may have changed since we returned.
e3974c66
JB
1010 *
1011 * [start, end] is inclusive This takes the tree lock.
1012 */
994bcd1e
JB
1013static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1014 u32 bits, u64 *failed_start,
123a7f00 1015 struct extent_state **failed_state,
994bcd1e 1016 struct extent_state **cached_state,
1d126800 1017 struct extent_changeset *changeset)
e3974c66
JB
1018{
1019 struct extent_state *state;
1020 struct extent_state *prealloc = NULL;
59864325
JB
1021 struct rb_node **p = NULL;
1022 struct rb_node *parent = NULL;
e3974c66
JB
1023 int err = 0;
1024 u64 last_start;
1025 u64 last_end;
c07d1004 1026 u32 exclusive_bits = (bits & EXTENT_LOCKED);
1d126800 1027 gfp_t mask;
e3974c66 1028
62bc6047 1029 set_gfp_mask_from_bits(&bits, &mask);
e3974c66
JB
1030 btrfs_debug_check_extent_io_range(tree, start, end);
1031 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
1032
1033 if (exclusive_bits)
1034 ASSERT(failed_start);
1035 else
123a7f00 1036 ASSERT(failed_start == NULL && failed_state == NULL);
e3974c66 1037again:
5a75034e 1038 if (!prealloc) {
e3974c66
JB
1039 /*
1040 * Don't care for allocation failure here because we might end
1041 * up not needing the pre-allocated extent state at all, which
1042 * is the case if we only have in the tree extent states that
1043 * cover our input range and don't cover too any other range.
1044 * If we end up needing a new extent state we allocate it later.
1045 */
1046 prealloc = alloc_extent_state(mask);
1047 }
1048
1049 spin_lock(&tree->lock);
1050 if (cached_state && *cached_state) {
1051 state = *cached_state;
1052 if (state->start <= start && state->end > start &&
e349fd3b 1053 extent_state_in_tree(state))
e3974c66 1054 goto hit_next;
e3974c66
JB
1055 }
1056 /*
1057 * This search will find all the extents that end after our range
1058 * starts.
1059 */
e349fd3b
JB
1060 state = tree_search_for_insert(tree, start, &p, &parent);
1061 if (!state) {
e3974c66 1062 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
1063 if (!prealloc)
1064 goto search_again;
e3974c66
JB
1065 prealloc->start = start;
1066 prealloc->end = end;
1067 insert_state_fast(tree, prealloc, p, parent, bits, changeset);
1068 cache_state(prealloc, cached_state);
1069 prealloc = NULL;
1070 goto out;
1071 }
e3974c66
JB
1072hit_next:
1073 last_start = state->start;
1074 last_end = state->end;
1075
1076 /*
1077 * | ---- desired range ---- |
1078 * | state |
1079 *
1080 * Just lock what we found and keep going
1081 */
1082 if (state->start == start && state->end <= end) {
1083 if (state->state & exclusive_bits) {
1084 *failed_start = state->start;
123a7f00 1085 cache_state(state, failed_state);
e3974c66
JB
1086 err = -EEXIST;
1087 goto out;
1088 }
1089
1090 set_state_bits(tree, state, bits, changeset);
1091 cache_state(state, cached_state);
1092 merge_state(tree, state);
1093 if (last_end == (u64)-1)
1094 goto out;
1095 start = last_end + 1;
1096 state = next_state(state);
1097 if (start < end && state && state->start == start &&
1098 !need_resched())
1099 goto hit_next;
1100 goto search_again;
1101 }
1102
1103 /*
1104 * | ---- desired range ---- |
1105 * | state |
1106 * or
1107 * | ------------- state -------------- |
1108 *
1109 * We need to split the extent we found, and may flip bits on second
1110 * half.
1111 *
1112 * If the extent we found extends past our range, we just split and
1113 * search again. It'll get split again the next time though.
1114 *
1115 * If the extent we found is inside our range, we set the desired bit
1116 * on it.
1117 */
1118 if (state->start < start) {
1119 if (state->state & exclusive_bits) {
1120 *failed_start = start;
123a7f00 1121 cache_state(state, failed_state);
e3974c66
JB
1122 err = -EEXIST;
1123 goto out;
1124 }
1125
1126 /*
1127 * If this extent already has all the bits we want set, then
1128 * skip it, not necessary to split it or do anything with it.
1129 */
1130 if ((state->state & bits) == bits) {
1131 start = state->end + 1;
1132 cache_state(state, cached_state);
1133 goto search_again;
1134 }
1135
1136 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
1137 if (!prealloc)
1138 goto search_again;
e3974c66
JB
1139 err = split_state(tree, state, prealloc, start);
1140 if (err)
1141 extent_io_tree_panic(tree, err);
1142
1143 prealloc = NULL;
1144 if (err)
1145 goto out;
1146 if (state->end <= end) {
1147 set_state_bits(tree, state, bits, changeset);
1148 cache_state(state, cached_state);
1149 merge_state(tree, state);
1150 if (last_end == (u64)-1)
1151 goto out;
1152 start = last_end + 1;
1153 state = next_state(state);
1154 if (start < end && state && state->start == start &&
1155 !need_resched())
1156 goto hit_next;
1157 }
1158 goto search_again;
1159 }
1160 /*
1161 * | ---- desired range ---- |
1162 * | state | or | state |
1163 *
1164 * There's a hole, we need to insert something in it and ignore the
1165 * extent we found.
1166 */
1167 if (state->start > start) {
1168 u64 this_end;
c91ea4bf
FM
1169 struct extent_state *inserted_state;
1170
e3974c66
JB
1171 if (end < last_start)
1172 this_end = end;
1173 else
1174 this_end = last_start - 1;
1175
1176 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
1177 if (!prealloc)
1178 goto search_again;
e3974c66
JB
1179
1180 /*
1181 * Avoid to free 'prealloc' if it can be merged with the later
1182 * extent.
1183 */
1184 prealloc->start = start;
1185 prealloc->end = this_end;
c91ea4bf
FM
1186 inserted_state = insert_state(tree, prealloc, bits, changeset);
1187 if (IS_ERR(inserted_state)) {
1188 err = PTR_ERR(inserted_state);
e3974c66 1189 extent_io_tree_panic(tree, err);
c91ea4bf 1190 }
e3974c66 1191
c91ea4bf
FM
1192 cache_state(inserted_state, cached_state);
1193 if (inserted_state == prealloc)
1194 prealloc = NULL;
e3974c66
JB
1195 start = this_end + 1;
1196 goto search_again;
1197 }
1198 /*
1199 * | ---- desired range ---- |
1200 * | state |
1201 *
1202 * We need to split the extent, and set the bit on the first half
1203 */
1204 if (state->start <= end && state->end > end) {
1205 if (state->state & exclusive_bits) {
1206 *failed_start = start;
123a7f00 1207 cache_state(state, failed_state);
e3974c66
JB
1208 err = -EEXIST;
1209 goto out;
1210 }
1211
1212 prealloc = alloc_extent_state_atomic(prealloc);
5a75034e
JB
1213 if (!prealloc)
1214 goto search_again;
e3974c66
JB
1215 err = split_state(tree, state, prealloc, end + 1);
1216 if (err)
1217 extent_io_tree_panic(tree, err);
1218
1219 set_state_bits(tree, prealloc, bits, changeset);
1220 cache_state(prealloc, cached_state);
1221 merge_state(tree, prealloc);
1222 prealloc = NULL;
1223 goto out;
1224 }
1225
1226search_again:
1227 if (start > end)
1228 goto out;
1229 spin_unlock(&tree->lock);
1230 if (gfpflags_allow_blocking(mask))
1231 cond_resched();
1232 goto again;
1233
1234out:
1235 spin_unlock(&tree->lock);
1236 if (prealloc)
1237 free_extent_state(prealloc);
1238
1239 return err;
1240
1241}
1242
994bcd1e 1243int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1d126800 1244 u32 bits, struct extent_state **cached_state)
994bcd1e 1245{
123a7f00 1246 return __set_extent_bit(tree, start, end, bits, NULL, NULL,
1d126800 1247 cached_state, NULL);
994bcd1e
JB
1248}
1249
e3974c66
JB
1250/*
1251 * Convert all bits in a given range from one bit to another
1252 *
1253 * @tree: the io tree to search
1254 * @start: the start offset in bytes
1255 * @end: the end offset in bytes (inclusive)
1256 * @bits: the bits to set in this range
1257 * @clear_bits: the bits to clear in this range
1258 * @cached_state: state that we're going to cache
1259 *
1260 * This will go through and set bits for the given range. If any states exist
1261 * already in this range they are set with the given bit and cleared of the
1262 * clear_bits. This is only meant to be used by things that are mergeable, ie.
1263 * converting from say DELALLOC to DIRTY. This is not meant to be used with
1264 * boundary bits like LOCK.
1265 *
1266 * All allocations are done with GFP_NOFS.
1267 */
1268int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1269 u32 bits, u32 clear_bits,
1270 struct extent_state **cached_state)
1271{
1272 struct extent_state *state;
1273 struct extent_state *prealloc = NULL;
59864325
JB
1274 struct rb_node **p = NULL;
1275 struct rb_node *parent = NULL;
e3974c66
JB
1276 int err = 0;
1277 u64 last_start;
1278 u64 last_end;
1279 bool first_iteration = true;
1280
1281 btrfs_debug_check_extent_io_range(tree, start, end);
1282 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1283 clear_bits);
1284
1285again:
1286 if (!prealloc) {
1287 /*
1288 * Best effort, don't worry if extent state allocation fails
1289 * here for the first iteration. We might have a cached state
1290 * that matches exactly the target range, in which case no
1291 * extent state allocations are needed. We'll only know this
1292 * after locking the tree.
1293 */
1294 prealloc = alloc_extent_state(GFP_NOFS);
1295 if (!prealloc && !first_iteration)
1296 return -ENOMEM;
1297 }
1298
1299 spin_lock(&tree->lock);
1300 if (cached_state && *cached_state) {
1301 state = *cached_state;
1302 if (state->start <= start && state->end > start &&
e349fd3b 1303 extent_state_in_tree(state))
e3974c66 1304 goto hit_next;
e3974c66
JB
1305 }
1306
1307 /*
1308 * This search will find all the extents that end after our range
1309 * starts.
1310 */
e349fd3b
JB
1311 state = tree_search_for_insert(tree, start, &p, &parent);
1312 if (!state) {
e3974c66
JB
1313 prealloc = alloc_extent_state_atomic(prealloc);
1314 if (!prealloc) {
1315 err = -ENOMEM;
1316 goto out;
1317 }
1318 prealloc->start = start;
1319 prealloc->end = end;
1320 insert_state_fast(tree, prealloc, p, parent, bits, NULL);
1321 cache_state(prealloc, cached_state);
1322 prealloc = NULL;
1323 goto out;
1324 }
e3974c66
JB
1325hit_next:
1326 last_start = state->start;
1327 last_end = state->end;
1328
1329 /*
1330 * | ---- desired range ---- |
1331 * | state |
1332 *
1333 * Just lock what we found and keep going.
1334 */
1335 if (state->start == start && state->end <= end) {
1336 set_state_bits(tree, state, bits, NULL);
1337 cache_state(state, cached_state);
1338 state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1339 if (last_end == (u64)-1)
1340 goto out;
1341 start = last_end + 1;
1342 if (start < end && state && state->start == start &&
1343 !need_resched())
1344 goto hit_next;
1345 goto search_again;
1346 }
1347
1348 /*
1349 * | ---- desired range ---- |
1350 * | state |
1351 * or
1352 * | ------------- state -------------- |
1353 *
1354 * We need to split the extent we found, and may flip bits on second
1355 * half.
1356 *
1357 * If the extent we found extends past our range, we just split and
1358 * search again. It'll get split again the next time though.
1359 *
1360 * If the extent we found is inside our range, we set the desired bit
1361 * on it.
1362 */
1363 if (state->start < start) {
1364 prealloc = alloc_extent_state_atomic(prealloc);
1365 if (!prealloc) {
1366 err = -ENOMEM;
1367 goto out;
1368 }
1369 err = split_state(tree, state, prealloc, start);
1370 if (err)
1371 extent_io_tree_panic(tree, err);
1372 prealloc = NULL;
1373 if (err)
1374 goto out;
1375 if (state->end <= end) {
1376 set_state_bits(tree, state, bits, NULL);
1377 cache_state(state, cached_state);
1378 state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1379 if (last_end == (u64)-1)
1380 goto out;
1381 start = last_end + 1;
1382 if (start < end && state && state->start == start &&
1383 !need_resched())
1384 goto hit_next;
1385 }
1386 goto search_again;
1387 }
1388 /*
1389 * | ---- desired range ---- |
1390 * | state | or | state |
1391 *
1392 * There's a hole, we need to insert something in it and ignore the
1393 * extent we found.
1394 */
1395 if (state->start > start) {
1396 u64 this_end;
c91ea4bf
FM
1397 struct extent_state *inserted_state;
1398
e3974c66
JB
1399 if (end < last_start)
1400 this_end = end;
1401 else
1402 this_end = last_start - 1;
1403
1404 prealloc = alloc_extent_state_atomic(prealloc);
1405 if (!prealloc) {
1406 err = -ENOMEM;
1407 goto out;
1408 }
1409
1410 /*
1411 * Avoid to free 'prealloc' if it can be merged with the later
1412 * extent.
1413 */
1414 prealloc->start = start;
1415 prealloc->end = this_end;
c91ea4bf
FM
1416 inserted_state = insert_state(tree, prealloc, bits, NULL);
1417 if (IS_ERR(inserted_state)) {
1418 err = PTR_ERR(inserted_state);
e3974c66 1419 extent_io_tree_panic(tree, err);
c91ea4bf
FM
1420 }
1421 cache_state(inserted_state, cached_state);
1422 if (inserted_state == prealloc)
1423 prealloc = NULL;
e3974c66
JB
1424 start = this_end + 1;
1425 goto search_again;
1426 }
1427 /*
1428 * | ---- desired range ---- |
1429 * | state |
1430 *
1431 * We need to split the extent, and set the bit on the first half.
1432 */
1433 if (state->start <= end && state->end > end) {
1434 prealloc = alloc_extent_state_atomic(prealloc);
1435 if (!prealloc) {
1436 err = -ENOMEM;
1437 goto out;
1438 }
1439
1440 err = split_state(tree, state, prealloc, end + 1);
1441 if (err)
1442 extent_io_tree_panic(tree, err);
1443
1444 set_state_bits(tree, prealloc, bits, NULL);
1445 cache_state(prealloc, cached_state);
1446 clear_state_bit(tree, prealloc, clear_bits, 0, NULL);
1447 prealloc = NULL;
1448 goto out;
1449 }
1450
1451search_again:
1452 if (start > end)
1453 goto out;
1454 spin_unlock(&tree->lock);
1455 cond_resched();
1456 first_iteration = false;
1457 goto again;
1458
1459out:
1460 spin_unlock(&tree->lock);
1461 if (prealloc)
1462 free_extent_state(prealloc);
1463
1464 return err;
1465}
1466
38830018
JB
1467/*
1468 * Find the first range that has @bits not set. This range could start before
1469 * @start.
1470 *
1471 * @tree: the tree to search
1472 * @start: offset at/after which the found extent should start
1473 * @start_ret: records the beginning of the range
1474 * @end_ret: records the end of the range (inclusive)
1475 * @bits: the set of bits which must be unset
1476 *
1477 * Since unallocated range is also considered one which doesn't have the bits
1478 * set it's possible that @end_ret contains -1, this happens in case the range
1479 * spans (last_range_end, end of device]. In this case it's up to the caller to
1480 * trim @end_ret to the appropriate size.
1481 */
1482void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1483 u64 *start_ret, u64 *end_ret, u32 bits)
1484{
1485 struct extent_state *state;
26df39a9 1486 struct extent_state *prev = NULL, *next = NULL;
38830018
JB
1487
1488 spin_lock(&tree->lock);
1489
1490 /* Find first extent with bits cleared */
1491 while (1) {
43b068ca
JB
1492 state = tree_search_prev_next(tree, start, &prev, &next);
1493 if (!state && !next && !prev) {
38830018
JB
1494 /*
1495 * Tree is completely empty, send full range and let
1496 * caller deal with it
1497 */
1498 *start_ret = 0;
1499 *end_ret = -1;
1500 goto out;
43b068ca 1501 } else if (!state && !next) {
38830018
JB
1502 /*
1503 * We are past the last allocated chunk, set start at
1504 * the end of the last extent.
1505 */
43b068ca 1506 *start_ret = prev->end + 1;
38830018
JB
1507 *end_ret = -1;
1508 goto out;
43b068ca
JB
1509 } else if (!state) {
1510 state = next;
38830018 1511 }
43b068ca 1512
38830018 1513 /*
43b068ca
JB
1514 * At this point 'state' either contains 'start' or start is
1515 * before 'state'
38830018 1516 */
38830018
JB
1517 if (in_range(start, state->start, state->end - state->start + 1)) {
1518 if (state->state & bits) {
1519 /*
1520 * |--range with bits sets--|
1521 * |
1522 * start
1523 */
1524 start = state->end + 1;
1525 } else {
1526 /*
1527 * 'start' falls within a range that doesn't
1528 * have the bits set, so take its start as the
1529 * beginning of the desired range
1530 *
1531 * |--range with bits cleared----|
1532 * |
1533 * start
1534 */
1535 *start_ret = state->start;
1536 break;
1537 }
1538 } else {
1539 /*
1540 * |---prev range---|---hole/unset---|---node range---|
1541 * |
1542 * start
1543 *
1544 * or
1545 *
1546 * |---hole/unset--||--first node--|
1547 * 0 |
1548 * start
1549 */
43b068ca
JB
1550 if (prev)
1551 *start_ret = prev->end + 1;
1552 else
38830018 1553 *start_ret = 0;
38830018
JB
1554 break;
1555 }
1556 }
1557
1558 /*
1559 * Find the longest stretch from start until an entry which has the
1560 * bits set
1561 */
ccaeff92 1562 while (state) {
38830018
JB
1563 if (state->end >= start && !(state->state & bits)) {
1564 *end_ret = state->end;
1565 } else {
1566 *end_ret = state->start - 1;
1567 break;
1568 }
ccaeff92 1569 state = next_state(state);
38830018
JB
1570 }
1571out:
1572 spin_unlock(&tree->lock);
1573}
1574
e3974c66 1575/*
1ee51a06
FM
1576 * Count the number of bytes in the tree that have a given bit(s) set for a
1577 * given range.
1578 *
1579 * @tree: The io tree to search.
1580 * @start: The start offset of the range. This value is updated to the
1581 * offset of the first byte found with the given bit(s), so it
1582 * can end up being bigger than the initial value.
1583 * @search_end: The end offset (inclusive value) of the search range.
1584 * @max_bytes: The maximum byte count we are interested. The search stops
1585 * once it reaches this count.
1586 * @bits: The bits the range must have in order to be accounted for.
1587 * If multiple bits are set, then only subranges that have all
1588 * the bits set are accounted for.
1589 * @contig: Indicate if we should ignore holes in the range or not. If
1590 * this is true, then stop once we find a hole.
1591 * @cached_state: A cached state to be used across multiple calls to this
1592 * function in order to speedup searches. Use NULL if this is
1593 * called only once or if each call does not start where the
1594 * previous one ended.
1595 *
1596 * Returns the total number of bytes found within the given range that have
1597 * all given bits set. If the returned number of bytes is greater than zero
1598 * then @start is updated with the offset of the first byte with the bits set.
e3974c66
JB
1599 */
1600u64 count_range_bits(struct extent_io_tree *tree,
1601 u64 *start, u64 search_end, u64 max_bytes,
8c6e53a7
FM
1602 u32 bits, int contig,
1603 struct extent_state **cached_state)
e3974c66 1604{
8c6e53a7
FM
1605 struct extent_state *state = NULL;
1606 struct extent_state *cached;
e3974c66
JB
1607 u64 cur_start = *start;
1608 u64 total_bytes = 0;
1609 u64 last = 0;
1610 int found = 0;
1611
2f2e84ca 1612 if (WARN_ON(search_end < cur_start))
e3974c66
JB
1613 return 0;
1614
1615 spin_lock(&tree->lock);
71528e9e 1616
8c6e53a7
FM
1617 if (!cached_state || !*cached_state)
1618 goto search;
1619
1620 cached = *cached_state;
1621
1622 if (!extent_state_in_tree(cached))
1623 goto search;
1624
1625 if (cached->start <= cur_start && cur_start <= cached->end) {
1626 state = cached;
1627 } else if (cached->start > cur_start) {
1628 struct extent_state *prev;
1629
1630 /*
1631 * The cached state starts after our search range's start. Check
1632 * if the previous state record starts at or before the range we
1633 * are looking for, and if so, use it - this is a common case
1634 * when there are holes between records in the tree. If there is
1635 * no previous state record, we can start from our cached state.
1636 */
1637 prev = prev_state(cached);
1638 if (!prev)
1639 state = cached;
1640 else if (prev->start <= cur_start && cur_start <= prev->end)
1641 state = prev;
1642 }
1643
e3974c66
JB
1644 /*
1645 * This search will find all the extents that end after our range
1646 * starts.
1647 */
8c6e53a7
FM
1648search:
1649 if (!state)
1650 state = tree_search(tree, cur_start);
1651
ccaeff92 1652 while (state) {
e3974c66
JB
1653 if (state->start > search_end)
1654 break;
1655 if (contig && found && state->start > last + 1)
1656 break;
1657 if (state->end >= cur_start && (state->state & bits) == bits) {
1658 total_bytes += min(search_end, state->end) + 1 -
1659 max(cur_start, state->start);
1660 if (total_bytes >= max_bytes)
1661 break;
1662 if (!found) {
1663 *start = max(cur_start, state->start);
1664 found = 1;
1665 }
1666 last = state->end;
1667 } else if (contig && found) {
1668 break;
1669 }
ccaeff92 1670 state = next_state(state);
e3974c66 1671 }
8c6e53a7
FM
1672
1673 if (cached_state) {
1674 free_extent_state(*cached_state);
1675 *cached_state = state;
1676 if (state)
1677 refcount_inc(&state->refs);
1678 }
1679
e3974c66 1680 spin_unlock(&tree->lock);
8c6e53a7 1681
e3974c66
JB
1682 return total_bytes;
1683}
1684
99be1a66
DS
1685/*
1686 * Check if the single @bit exists in the given range.
1687 */
1688bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit)
1689{
1690 struct extent_state *state = NULL;
1691 bool bitset = false;
1692
1693 ASSERT(is_power_of_2(bit));
1694
1695 spin_lock(&tree->lock);
1696 state = tree_search(tree, start);
1697 while (state && start <= end) {
1698 if (state->start > end)
1699 break;
1700
1701 if (state->state & bit) {
1702 bitset = true;
1703 break;
1704 }
1705
1706 /* If state->end is (u64)-1, start will overflow to 0 */
1707 start = state->end + 1;
1708 if (start > end || start == 0)
1709 break;
1710 state = next_state(state);
1711 }
1712 spin_unlock(&tree->lock);
1713 return bitset;
1714}
1715
e3974c66 1716/*
893fe243 1717 * Check if the whole range [@start,@end) contains the single @bit set.
e3974c66 1718 */
893fe243
DS
1719bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
1720 struct extent_state *cached)
e3974c66
JB
1721{
1722 struct extent_state *state = NULL;
893fe243
DS
1723 bool bitset = true;
1724
1725 ASSERT(is_power_of_2(bit));
e3974c66
JB
1726
1727 spin_lock(&tree->lock);
1728 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1729 cached->end > start)
aa852dab 1730 state = cached;
e3974c66 1731 else
aa852dab 1732 state = tree_search(tree, start);
ccaeff92 1733 while (state && start <= end) {
893fe243
DS
1734 if (state->start > start) {
1735 bitset = false;
e3974c66
JB
1736 break;
1737 }
1738
1739 if (state->start > end)
1740 break;
1741
893fe243
DS
1742 if ((state->state & bit) == 0) {
1743 bitset = false;
e3974c66
JB
1744 break;
1745 }
1746
1747 if (state->end == (u64)-1)
1748 break;
1749
893fe243
DS
1750 /*
1751 * Last entry (if state->end is (u64)-1 and overflow happens),
1752 * or next entry starts after the range.
1753 */
e3974c66 1754 start = state->end + 1;
893fe243 1755 if (start > end || start == 0)
e3974c66 1756 break;
ccaeff92 1757 state = next_state(state);
e3974c66 1758 }
ccaeff92
JB
1759
1760 /* We ran out of states and were still inside of our range. */
893fe243
DS
1761 if (!state)
1762 bitset = false;
e3974c66
JB
1763 spin_unlock(&tree->lock);
1764 return bitset;
1765}
1766
a6631887
JB
1767/* Wrappers around set/clear extent bit */
1768int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1769 u32 bits, struct extent_changeset *changeset)
1770{
1771 /*
1772 * We don't support EXTENT_LOCKED yet, as current changeset will
1773 * record any bits changed, so for EXTENT_LOCKED case, it will
1774 * either fail with -EEXIST or changeset will record the whole
1775 * range.
1776 */
1777 ASSERT(!(bits & EXTENT_LOCKED));
1778
1d126800 1779 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
a6631887
JB
1780}
1781
1782int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1783 u32 bits, struct extent_changeset *changeset)
1784{
1785 /*
1786 * Don't support EXTENT_LOCKED case, same reason as
1787 * set_record_extent_bits().
1788 */
1789 ASSERT(!(bits & EXTENT_LOCKED));
1790
1d126800 1791 return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
a6631887
JB
1792}
1793
83ae4133
JB
1794int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1795 struct extent_state **cached)
a6631887
JB
1796{
1797 int err;
1798 u64 failed_start;
1799
994bcd1e 1800 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1d126800 1801 NULL, cached, NULL);
a6631887
JB
1802 if (err == -EEXIST) {
1803 if (failed_start > start)
1804 clear_extent_bit(tree, start, failed_start - 1,
83ae4133 1805 EXTENT_LOCKED, cached);
a6631887
JB
1806 return 0;
1807 }
1808 return 1;
1809}
1810
38830018
JB
1811/*
1812 * Either insert or lock state struct between start and end use mask to tell
1813 * us if waiting is desired.
1814 */
570eb97b
JB
1815int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1816 struct extent_state **cached_state)
38830018 1817{
123a7f00 1818 struct extent_state *failed_state = NULL;
38830018
JB
1819 int err;
1820 u64 failed_start;
1821
9e769bd7 1822 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1d126800 1823 &failed_state, cached_state, NULL);
9e769bd7
JB
1824 while (err == -EEXIST) {
1825 if (failed_start != start)
1826 clear_extent_bit(tree, start, failed_start - 1,
1827 EXTENT_LOCKED, cached_state);
1828
123a7f00
JB
1829 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED,
1830 &failed_state);
994bcd1e 1831 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
123a7f00 1832 &failed_start, &failed_state,
1d126800 1833 cached_state, NULL);
38830018
JB
1834 }
1835 return err;
1836}
1837
83cf709a
JB
1838void __cold extent_state_free_cachep(void)
1839{
1840 btrfs_extent_state_leak_debug_check();
1841 kmem_cache_destroy(extent_state_cache);
1842}
1843
1844int __init extent_state_init_cachep(void)
1845{
1846 extent_state_cache = kmem_cache_create("btrfs_extent_state",
1847 sizeof(struct extent_state), 0,
1848 SLAB_MEM_SPREAD, NULL);
1849 if (!extent_state_cache)
1850 return -ENOMEM;
1851
1852 return 0;
1853}