]>
git.ipfire.org Git - thirdparty/linux.git/blob - fs/btrfs/extent_map.c
0d1167f454d2a3bbeddfa51823d4039a04589a5a
1 // SPDX-License-Identifier: GPL-2.0
4 #include <linux/slab.h>
5 #include <linux/spinlock.h>
9 #include "extent_map.h"
10 #include "compression.h"
11 #include "btrfs_inode.h"
14 static struct kmem_cache
*extent_map_cache
;
16 int __init
extent_map_init(void)
18 extent_map_cache
= kmem_cache_create("btrfs_extent_map",
19 sizeof(struct extent_map
), 0,
20 SLAB_MEM_SPREAD
, NULL
);
21 if (!extent_map_cache
)
26 void __cold
extent_map_exit(void)
28 kmem_cache_destroy(extent_map_cache
);
32 * Initialize the extent tree @tree. Should be called for each new inode or
33 * other user of the extent_map interface.
35 void extent_map_tree_init(struct extent_map_tree
*tree
)
37 tree
->map
= RB_ROOT_CACHED
;
38 INIT_LIST_HEAD(&tree
->modified_extents
);
39 rwlock_init(&tree
->lock
);
43 * Allocate a new extent_map structure. The new structure is returned with a
44 * reference count of one and needs to be freed using free_extent_map()
46 struct extent_map
*alloc_extent_map(void)
48 struct extent_map
*em
;
49 em
= kmem_cache_zalloc(extent_map_cache
, GFP_NOFS
);
52 RB_CLEAR_NODE(&em
->rb_node
);
53 em
->compress_type
= BTRFS_COMPRESS_NONE
;
54 refcount_set(&em
->refs
, 1);
55 INIT_LIST_HEAD(&em
->list
);
60 * Drop the reference out on @em by one and free the structure if the reference
63 void free_extent_map(struct extent_map
*em
)
67 if (refcount_dec_and_test(&em
->refs
)) {
68 WARN_ON(extent_map_in_tree(em
));
69 WARN_ON(!list_empty(&em
->list
));
70 kmem_cache_free(extent_map_cache
, em
);
74 /* Do the math around the end of an extent, handling wrapping. */
75 static u64
range_end(u64 start
, u64 len
)
77 if (start
+ len
< start
)
82 static int tree_insert(struct rb_root_cached
*root
, struct extent_map
*em
)
84 struct rb_node
**p
= &root
->rb_root
.rb_node
;
85 struct rb_node
*parent
= NULL
;
86 struct extent_map
*entry
= NULL
;
87 struct rb_node
*orig_parent
= NULL
;
88 u64 end
= range_end(em
->start
, em
->len
);
93 entry
= rb_entry(parent
, struct extent_map
, rb_node
);
95 if (em
->start
< entry
->start
) {
97 } else if (em
->start
>= extent_map_end(entry
)) {
105 orig_parent
= parent
;
106 while (parent
&& em
->start
>= extent_map_end(entry
)) {
107 parent
= rb_next(parent
);
108 entry
= rb_entry(parent
, struct extent_map
, rb_node
);
111 if (end
> entry
->start
&& em
->start
< extent_map_end(entry
))
114 parent
= orig_parent
;
115 entry
= rb_entry(parent
, struct extent_map
, rb_node
);
116 while (parent
&& em
->start
< entry
->start
) {
117 parent
= rb_prev(parent
);
118 entry
= rb_entry(parent
, struct extent_map
, rb_node
);
121 if (end
> entry
->start
&& em
->start
< extent_map_end(entry
))
124 rb_link_node(&em
->rb_node
, orig_parent
, p
);
125 rb_insert_color_cached(&em
->rb_node
, root
, leftmost
);
130 * Search through the tree for an extent_map with a given offset. If it can't
131 * be found, try to find some neighboring extents
133 static struct rb_node
*__tree_search(struct rb_root
*root
, u64 offset
,
134 struct rb_node
**prev_or_next_ret
)
136 struct rb_node
*n
= root
->rb_node
;
137 struct rb_node
*prev
= NULL
;
138 struct rb_node
*orig_prev
= NULL
;
139 struct extent_map
*entry
;
140 struct extent_map
*prev_entry
= NULL
;
142 ASSERT(prev_or_next_ret
);
145 entry
= rb_entry(n
, struct extent_map
, rb_node
);
149 if (offset
< entry
->start
)
151 else if (offset
>= extent_map_end(entry
))
158 while (prev
&& offset
>= extent_map_end(prev_entry
)) {
159 prev
= rb_next(prev
);
160 prev_entry
= rb_entry(prev
, struct extent_map
, rb_node
);
164 * Previous extent map found, return as in this case the caller does not
165 * care about the next one.
168 *prev_or_next_ret
= prev
;
173 prev_entry
= rb_entry(prev
, struct extent_map
, rb_node
);
174 while (prev
&& offset
< prev_entry
->start
) {
175 prev
= rb_prev(prev
);
176 prev_entry
= rb_entry(prev
, struct extent_map
, rb_node
);
178 *prev_or_next_ret
= prev
;
183 static inline u64
extent_map_block_end(const struct extent_map
*em
)
185 if (em
->block_start
+ em
->block_len
< em
->block_start
)
187 return em
->block_start
+ em
->block_len
;
190 static bool can_merge_extent_map(const struct extent_map
*em
)
192 if (test_bit(EXTENT_FLAG_PINNED
, &em
->flags
))
195 /* Don't merge compressed extents, we need to know their actual size. */
196 if (test_bit(EXTENT_FLAG_COMPRESSED
, &em
->flags
))
199 if (test_bit(EXTENT_FLAG_LOGGING
, &em
->flags
))
203 * We don't want to merge stuff that hasn't been written to the log yet
204 * since it may not reflect exactly what is on disk, and that would be
207 if (!list_empty(&em
->list
))
213 /* Check to see if two extent_map structs are adjacent and safe to merge. */
214 static bool mergeable_maps(const struct extent_map
*prev
, const struct extent_map
*next
)
216 if (extent_map_end(prev
) != next
->start
)
219 if (prev
->flags
!= next
->flags
)
222 if (next
->block_start
< EXTENT_MAP_LAST_BYTE
- 1)
223 return next
->block_start
== extent_map_block_end(prev
);
225 /* HOLES and INLINE extents. */
226 return next
->block_start
== prev
->block_start
;
229 static void try_merge_map(struct extent_map_tree
*tree
, struct extent_map
*em
)
231 struct extent_map
*merge
= NULL
;
235 * We can't modify an extent map that is in the tree and that is being
236 * used by another task, as it can cause that other task to see it in
237 * inconsistent state during the merging. We always have 1 reference for
238 * the tree and 1 for this task (which is unpinning the extent map or
239 * clearing the logging flag), so anything > 2 means it's being used by
242 if (refcount_read(&em
->refs
) > 2)
245 if (!can_merge_extent_map(em
))
248 if (em
->start
!= 0) {
249 rb
= rb_prev(&em
->rb_node
);
251 merge
= rb_entry(rb
, struct extent_map
, rb_node
);
252 if (rb
&& can_merge_extent_map(merge
) && mergeable_maps(merge
, em
)) {
253 em
->start
= merge
->start
;
254 em
->orig_start
= merge
->orig_start
;
255 em
->len
+= merge
->len
;
256 em
->block_len
+= merge
->block_len
;
257 em
->block_start
= merge
->block_start
;
258 em
->mod_len
= (em
->mod_len
+ em
->mod_start
) - merge
->mod_start
;
259 em
->mod_start
= merge
->mod_start
;
260 em
->generation
= max(em
->generation
, merge
->generation
);
261 set_bit(EXTENT_FLAG_MERGED
, &em
->flags
);
263 rb_erase_cached(&merge
->rb_node
, &tree
->map
);
264 RB_CLEAR_NODE(&merge
->rb_node
);
265 free_extent_map(merge
);
269 rb
= rb_next(&em
->rb_node
);
271 merge
= rb_entry(rb
, struct extent_map
, rb_node
);
272 if (rb
&& can_merge_extent_map(merge
) && mergeable_maps(em
, merge
)) {
273 em
->len
+= merge
->len
;
274 em
->block_len
+= merge
->block_len
;
275 rb_erase_cached(&merge
->rb_node
, &tree
->map
);
276 RB_CLEAR_NODE(&merge
->rb_node
);
277 em
->mod_len
= (merge
->mod_start
+ merge
->mod_len
) - em
->mod_start
;
278 em
->generation
= max(em
->generation
, merge
->generation
);
279 set_bit(EXTENT_FLAG_MERGED
, &em
->flags
);
280 free_extent_map(merge
);
285 * Unpin an extent from the cache.
287 * @inode: the inode from which we are unpinning an extent range
288 * @start: logical offset in the file
289 * @len: length of the extent
290 * @gen: generation that this extent has been modified in
292 * Called after an extent has been written to disk properly. Set the generation
293 * to the generation that actually added the file item to the inode so we know
294 * we need to sync this extent when we call fsync().
296 int unpin_extent_cache(struct btrfs_inode
*inode
, u64 start
, u64 len
, u64 gen
)
298 struct btrfs_fs_info
*fs_info
= inode
->root
->fs_info
;
299 struct extent_map_tree
*tree
= &inode
->extent_tree
;
301 struct extent_map
*em
;
302 bool prealloc
= false;
304 write_lock(&tree
->lock
);
305 em
= lookup_extent_mapping(tree
, start
, len
);
309 "no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
310 btrfs_ino(inode
), btrfs_root_id(inode
->root
),
315 if (WARN_ON(em
->start
!= start
))
317 "found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
318 btrfs_ino(inode
), btrfs_root_id(inode
->root
),
319 em
->start
, start
, len
, gen
);
321 em
->generation
= gen
;
322 clear_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
323 em
->mod_start
= em
->start
;
324 em
->mod_len
= em
->len
;
326 if (test_bit(EXTENT_FLAG_FILLING
, &em
->flags
)) {
328 clear_bit(EXTENT_FLAG_FILLING
, &em
->flags
);
331 try_merge_map(tree
, em
);
334 em
->mod_start
= em
->start
;
335 em
->mod_len
= em
->len
;
340 write_unlock(&tree
->lock
);
345 void clear_em_logging(struct extent_map_tree
*tree
, struct extent_map
*em
)
347 lockdep_assert_held_write(&tree
->lock
);
349 clear_bit(EXTENT_FLAG_LOGGING
, &em
->flags
);
350 if (extent_map_in_tree(em
))
351 try_merge_map(tree
, em
);
354 static inline void setup_extent_mapping(struct extent_map_tree
*tree
,
355 struct extent_map
*em
,
358 refcount_inc(&em
->refs
);
359 em
->mod_start
= em
->start
;
360 em
->mod_len
= em
->len
;
362 ASSERT(list_empty(&em
->list
));
365 list_add(&em
->list
, &tree
->modified_extents
);
367 try_merge_map(tree
, em
);
371 * Add new extent map to the extent tree
373 * @tree: tree to insert new map in
375 * @modified: indicate whether the given @em should be added to the
376 * modified list, which indicates the extent needs to be logged
378 * Insert @em into @tree or perform a simple forward/backward merge with
379 * existing mappings. The extent_map struct passed in will be inserted
380 * into the tree directly, with an additional reference taken, or a
381 * reference dropped if the merge attempt was successful.
383 static int add_extent_mapping(struct extent_map_tree
*tree
,
384 struct extent_map
*em
, int modified
)
388 lockdep_assert_held_write(&tree
->lock
);
390 ret
= tree_insert(&tree
->map
, em
);
394 setup_extent_mapping(tree
, em
, modified
);
399 static struct extent_map
*
400 __lookup_extent_mapping(struct extent_map_tree
*tree
,
401 u64 start
, u64 len
, int strict
)
403 struct extent_map
*em
;
404 struct rb_node
*rb_node
;
405 struct rb_node
*prev_or_next
= NULL
;
406 u64 end
= range_end(start
, len
);
408 rb_node
= __tree_search(&tree
->map
.rb_root
, start
, &prev_or_next
);
411 rb_node
= prev_or_next
;
416 em
= rb_entry(rb_node
, struct extent_map
, rb_node
);
418 if (strict
&& !(end
> em
->start
&& start
< extent_map_end(em
)))
421 refcount_inc(&em
->refs
);
426 * Lookup extent_map that intersects @start + @len range.
428 * @tree: tree to lookup in
429 * @start: byte offset to start the search
430 * @len: length of the lookup range
432 * Find and return the first extent_map struct in @tree that intersects the
433 * [start, len] range. There may be additional objects in the tree that
434 * intersect, so check the object returned carefully to make sure that no
435 * additional lookups are needed.
437 struct extent_map
*lookup_extent_mapping(struct extent_map_tree
*tree
,
440 return __lookup_extent_mapping(tree
, start
, len
, 1);
444 * Find a nearby extent map intersecting @start + @len (not an exact search).
446 * @tree: tree to lookup in
447 * @start: byte offset to start the search
448 * @len: length of the lookup range
450 * Find and return the first extent_map struct in @tree that intersects the
451 * [start, len] range.
453 * If one can't be found, any nearby extent may be returned
455 struct extent_map
*search_extent_mapping(struct extent_map_tree
*tree
,
458 return __lookup_extent_mapping(tree
, start
, len
, 0);
462 * Remove an extent_map from the extent tree.
464 * @tree: extent tree to remove from
465 * @em: extent map being removed
467 * Remove @em from @tree. No reference counts are dropped, and no checks
468 * are done to see if the range is in use.
470 void remove_extent_mapping(struct extent_map_tree
*tree
, struct extent_map
*em
)
472 lockdep_assert_held_write(&tree
->lock
);
474 WARN_ON(test_bit(EXTENT_FLAG_PINNED
, &em
->flags
));
475 rb_erase_cached(&em
->rb_node
, &tree
->map
);
476 if (!test_bit(EXTENT_FLAG_LOGGING
, &em
->flags
))
477 list_del_init(&em
->list
);
478 RB_CLEAR_NODE(&em
->rb_node
);
481 static void replace_extent_mapping(struct extent_map_tree
*tree
,
482 struct extent_map
*cur
,
483 struct extent_map
*new,
486 lockdep_assert_held_write(&tree
->lock
);
488 WARN_ON(test_bit(EXTENT_FLAG_PINNED
, &cur
->flags
));
489 ASSERT(extent_map_in_tree(cur
));
490 if (!test_bit(EXTENT_FLAG_LOGGING
, &cur
->flags
))
491 list_del_init(&cur
->list
);
492 rb_replace_node_cached(&cur
->rb_node
, &new->rb_node
, &tree
->map
);
493 RB_CLEAR_NODE(&cur
->rb_node
);
495 setup_extent_mapping(tree
, new, modified
);
498 static struct extent_map
*next_extent_map(const struct extent_map
*em
)
500 struct rb_node
*next
;
502 next
= rb_next(&em
->rb_node
);
505 return container_of(next
, struct extent_map
, rb_node
);
508 static struct extent_map
*prev_extent_map(struct extent_map
*em
)
510 struct rb_node
*prev
;
512 prev
= rb_prev(&em
->rb_node
);
515 return container_of(prev
, struct extent_map
, rb_node
);
519 * Helper for btrfs_get_extent. Given an existing extent in the tree,
520 * the existing extent is the nearest extent to map_start,
521 * and an extent that you want to insert, deal with overlap and insert
522 * the best fitted new extent into the tree.
524 static noinline
int merge_extent_mapping(struct extent_map_tree
*em_tree
,
525 struct extent_map
*existing
,
526 struct extent_map
*em
,
529 struct extent_map
*prev
;
530 struct extent_map
*next
;
535 BUG_ON(map_start
< em
->start
|| map_start
>= extent_map_end(em
));
537 if (existing
->start
> map_start
) {
539 prev
= prev_extent_map(next
);
542 next
= next_extent_map(prev
);
545 start
= prev
? extent_map_end(prev
) : em
->start
;
546 start
= max_t(u64
, start
, em
->start
);
547 end
= next
? next
->start
: extent_map_end(em
);
548 end
= min_t(u64
, end
, extent_map_end(em
));
549 start_diff
= start
- em
->start
;
551 em
->len
= end
- start
;
552 if (em
->block_start
< EXTENT_MAP_LAST_BYTE
&&
553 !test_bit(EXTENT_FLAG_COMPRESSED
, &em
->flags
)) {
554 em
->block_start
+= start_diff
;
555 em
->block_len
= em
->len
;
557 return add_extent_mapping(em_tree
, em
, 0);
561 * Add extent mapping into em_tree.
563 * @fs_info: the filesystem
564 * @em_tree: extent tree into which we want to insert the extent mapping
565 * @em_in: extent we are inserting
566 * @start: start of the logical range btrfs_get_extent() is requesting
567 * @len: length of the logical range btrfs_get_extent() is requesting
569 * Note that @em_in's range may be different from [start, start+len),
570 * but they must be overlapped.
572 * Insert @em_in into @em_tree. In case there is an overlapping range, handle
573 * the -EEXIST by either:
574 * a) Returning the existing extent in @em_in if @start is within the
576 * b) Merge the existing extent with @em_in passed in.
578 * Return 0 on success, otherwise -EEXIST.
581 int btrfs_add_extent_mapping(struct btrfs_fs_info
*fs_info
,
582 struct extent_map_tree
*em_tree
,
583 struct extent_map
**em_in
, u64 start
, u64 len
)
586 struct extent_map
*em
= *em_in
;
589 * Tree-checker should have rejected any inline extent with non-zero
590 * file offset. Here just do a sanity check.
592 if (em
->block_start
== EXTENT_MAP_INLINE
)
593 ASSERT(em
->start
== 0);
595 ret
= add_extent_mapping(em_tree
, em
, 0);
596 /* it is possible that someone inserted the extent into the tree
597 * while we had the lock dropped. It is also possible that
598 * an overlapping map exists in the tree
600 if (ret
== -EEXIST
) {
601 struct extent_map
*existing
;
603 existing
= search_extent_mapping(em_tree
, start
, len
);
605 trace_btrfs_handle_em_exist(fs_info
, existing
, em
, start
, len
);
608 * existing will always be non-NULL, since there must be
609 * extent causing the -EEXIST.
611 if (start
>= existing
->start
&&
612 start
< extent_map_end(existing
)) {
617 u64 orig_start
= em
->start
;
618 u64 orig_len
= em
->len
;
621 * The existing extent map is the one nearest to
622 * the [start, start + len) range which overlaps
624 ret
= merge_extent_mapping(em_tree
, existing
,
630 "unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
631 ret
, existing
->start
, existing
->len
,
632 orig_start
, orig_len
);
634 free_extent_map(existing
);
638 ASSERT(ret
== 0 || ret
== -EEXIST
);
643 * Drop all extent maps from a tree in the fastest possible way, rescheduling
644 * if needed. This avoids searching the tree, from the root down to the first
645 * extent map, before each deletion.
647 static void drop_all_extent_maps_fast(struct extent_map_tree
*tree
)
649 write_lock(&tree
->lock
);
650 while (!RB_EMPTY_ROOT(&tree
->map
.rb_root
)) {
651 struct extent_map
*em
;
652 struct rb_node
*node
;
654 node
= rb_first_cached(&tree
->map
);
655 em
= rb_entry(node
, struct extent_map
, rb_node
);
656 clear_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
657 clear_bit(EXTENT_FLAG_LOGGING
, &em
->flags
);
658 remove_extent_mapping(tree
, em
);
660 cond_resched_rwlock_write(&tree
->lock
);
662 write_unlock(&tree
->lock
);
666 * Drop all extent maps in a given range.
668 * @inode: The target inode.
669 * @start: Start offset of the range.
670 * @end: End offset of the range (inclusive value).
671 * @skip_pinned: Indicate if pinned extent maps should be ignored or not.
673 * This drops all the extent maps that intersect the given range [@start, @end].
674 * Extent maps that partially overlap the range and extend behind or beyond it,
676 * The caller should have locked an appropriate file range in the inode's io
677 * tree before calling this function.
679 void btrfs_drop_extent_map_range(struct btrfs_inode
*inode
, u64 start
, u64 end
,
682 struct extent_map
*split
;
683 struct extent_map
*split2
;
684 struct extent_map
*em
;
685 struct extent_map_tree
*em_tree
= &inode
->extent_tree
;
686 u64 len
= end
- start
+ 1;
688 WARN_ON(end
< start
);
689 if (end
== (u64
)-1) {
690 if (start
== 0 && !skip_pinned
) {
691 drop_all_extent_maps_fast(em_tree
);
696 /* Make end offset exclusive for use in the loop below. */
701 * It's ok if we fail to allocate the extent maps, see the comment near
702 * the bottom of the loop below. We only need two spare extent maps in
703 * the worst case, where the first extent map that intersects our range
704 * starts before the range and the last extent map that intersects our
705 * range ends after our range (and they might be the same extent map),
706 * because we need to split those two extent maps at the boundaries.
708 split
= alloc_extent_map();
709 split2
= alloc_extent_map();
711 write_lock(&em_tree
->lock
);
712 em
= lookup_extent_mapping(em_tree
, start
, len
);
715 /* extent_map_end() returns exclusive value (last byte + 1). */
716 const u64 em_end
= extent_map_end(em
);
717 struct extent_map
*next_em
= NULL
;
724 next_em
= next_extent_map(em
);
726 if (next_em
->start
< end
)
727 refcount_inc(&next_em
->refs
);
733 if (skip_pinned
&& test_bit(EXTENT_FLAG_PINNED
, &em
->flags
)) {
739 clear_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
741 * In case we split the extent map, we want to preserve the
742 * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
743 * it on the new extent maps.
745 clear_bit(EXTENT_FLAG_LOGGING
, &flags
);
746 modified
= !list_empty(&em
->list
);
749 * The extent map does not cross our target range, so no need to
750 * split it, we can remove it directly.
752 if (em
->start
>= start
&& em_end
<= end
)
755 gen
= em
->generation
;
756 compressed
= test_bit(EXTENT_FLAG_COMPRESSED
, &em
->flags
);
758 if (em
->start
< start
) {
765 split
->start
= em
->start
;
766 split
->len
= start
- em
->start
;
768 if (em
->block_start
< EXTENT_MAP_LAST_BYTE
) {
769 split
->orig_start
= em
->orig_start
;
770 split
->block_start
= em
->block_start
;
773 split
->block_len
= em
->block_len
;
775 split
->block_len
= split
->len
;
776 split
->orig_block_len
= max(split
->block_len
,
778 split
->ram_bytes
= em
->ram_bytes
;
780 split
->orig_start
= split
->start
;
781 split
->block_len
= 0;
782 split
->block_start
= em
->block_start
;
783 split
->orig_block_len
= 0;
784 split
->ram_bytes
= split
->len
;
787 split
->generation
= gen
;
788 split
->flags
= flags
;
789 split
->compress_type
= em
->compress_type
;
790 replace_extent_mapping(em_tree
, em
, split
, modified
);
791 free_extent_map(split
);
803 split
->len
= em_end
- end
;
804 split
->block_start
= em
->block_start
;
805 split
->flags
= flags
;
806 split
->compress_type
= em
->compress_type
;
807 split
->generation
= gen
;
809 if (em
->block_start
< EXTENT_MAP_LAST_BYTE
) {
810 split
->orig_block_len
= max(em
->block_len
,
813 split
->ram_bytes
= em
->ram_bytes
;
815 split
->block_len
= em
->block_len
;
816 split
->orig_start
= em
->orig_start
;
818 const u64 diff
= start
+ len
- em
->start
;
820 split
->block_len
= split
->len
;
821 split
->block_start
+= diff
;
822 split
->orig_start
= em
->orig_start
;
825 split
->ram_bytes
= split
->len
;
826 split
->orig_start
= split
->start
;
827 split
->block_len
= 0;
828 split
->orig_block_len
= 0;
831 if (extent_map_in_tree(em
)) {
832 replace_extent_mapping(em_tree
, em
, split
,
837 ret
= add_extent_mapping(em_tree
, split
,
839 /* Logic error, shouldn't happen. */
841 if (WARN_ON(ret
!= 0) && modified
)
842 btrfs_set_inode_full_sync(inode
);
844 free_extent_map(split
);
848 if (extent_map_in_tree(em
)) {
850 * If the extent map is still in the tree it means that
851 * either of the following is true:
853 * 1) It fits entirely in our range (doesn't end beyond
854 * it or starts before it);
856 * 2) It starts before our range and/or ends after our
857 * range, and we were not able to allocate the extent
858 * maps for split operations, @split and @split2.
860 * If we are at case 2) then we just remove the entire
861 * extent map - this is fine since if anyone needs it to
862 * access the subranges outside our range, will just
863 * load it again from the subvolume tree's file extent
864 * item. However if the extent map was in the list of
865 * modified extents, then we must mark the inode for a
866 * full fsync, otherwise a fast fsync will miss this
867 * extent if it's new and needs to be logged.
869 if ((em
->start
< start
|| em_end
> end
) && modified
) {
871 btrfs_set_inode_full_sync(inode
);
873 remove_extent_mapping(em_tree
, em
);
877 * Once for the tree reference (we replaced or removed the
878 * extent map from the tree).
882 /* Once for us (for our lookup reference). */
888 write_unlock(&em_tree
->lock
);
890 free_extent_map(split
);
891 free_extent_map(split2
);
895 * Replace a range in the inode's extent map tree with a new extent map.
897 * @inode: The target inode.
898 * @new_em: The new extent map to add to the inode's extent map tree.
899 * @modified: Indicate if the new extent map should be added to the list of
900 * modified extents (for fast fsync tracking).
902 * Drops all the extent maps in the inode's extent map tree that intersect the
903 * range of the new extent map and adds the new extent map to the tree.
904 * The caller should have locked an appropriate file range in the inode's io
905 * tree before calling this function.
907 int btrfs_replace_extent_map_range(struct btrfs_inode
*inode
,
908 struct extent_map
*new_em
,
911 const u64 end
= new_em
->start
+ new_em
->len
- 1;
912 struct extent_map_tree
*tree
= &inode
->extent_tree
;
915 ASSERT(!extent_map_in_tree(new_em
));
918 * The caller has locked an appropriate file range in the inode's io
919 * tree, but getting -EEXIST when adding the new extent map can still
920 * happen in case there are extents that partially cover the range, and
921 * this is due to two tasks operating on different parts of the extent.
922 * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
923 * btrfs_get_extent") for an example and details.
926 btrfs_drop_extent_map_range(inode
, new_em
->start
, end
, false);
927 write_lock(&tree
->lock
);
928 ret
= add_extent_mapping(tree
, new_em
, modified
);
929 write_unlock(&tree
->lock
);
930 } while (ret
== -EEXIST
);
936 * Split off the first pre bytes from the extent_map at [start, start + len],
937 * and set the block_start for it to new_logical.
939 * This function is used when an ordered_extent needs to be split.
941 int split_extent_map(struct btrfs_inode
*inode
, u64 start
, u64 len
, u64 pre
,
944 struct extent_map_tree
*em_tree
= &inode
->extent_tree
;
945 struct extent_map
*em
;
946 struct extent_map
*split_pre
= NULL
;
947 struct extent_map
*split_mid
= NULL
;
954 split_pre
= alloc_extent_map();
957 split_mid
= alloc_extent_map();
963 lock_extent(&inode
->io_tree
, start
, start
+ len
- 1, NULL
);
964 write_lock(&em_tree
->lock
);
965 em
= lookup_extent_mapping(em_tree
, start
, len
);
971 ASSERT(em
->len
== len
);
972 ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED
, &em
->flags
));
973 ASSERT(em
->block_start
< EXTENT_MAP_LAST_BYTE
);
974 ASSERT(test_bit(EXTENT_FLAG_PINNED
, &em
->flags
));
975 ASSERT(!test_bit(EXTENT_FLAG_LOGGING
, &em
->flags
));
976 ASSERT(!list_empty(&em
->list
));
979 clear_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
981 /* First, replace the em with a new extent_map starting from * em->start */
982 split_pre
->start
= em
->start
;
983 split_pre
->len
= pre
;
984 split_pre
->orig_start
= split_pre
->start
;
985 split_pre
->block_start
= new_logical
;
986 split_pre
->block_len
= split_pre
->len
;
987 split_pre
->orig_block_len
= split_pre
->block_len
;
988 split_pre
->ram_bytes
= split_pre
->len
;
989 split_pre
->flags
= flags
;
990 split_pre
->compress_type
= em
->compress_type
;
991 split_pre
->generation
= em
->generation
;
993 replace_extent_mapping(em_tree
, em
, split_pre
, 1);
996 * Now we only have an extent_map at:
997 * [em->start, em->start + pre]
1000 /* Insert the middle extent_map. */
1001 split_mid
->start
= em
->start
+ pre
;
1002 split_mid
->len
= em
->len
- pre
;
1003 split_mid
->orig_start
= split_mid
->start
;
1004 split_mid
->block_start
= em
->block_start
+ pre
;
1005 split_mid
->block_len
= split_mid
->len
;
1006 split_mid
->orig_block_len
= split_mid
->block_len
;
1007 split_mid
->ram_bytes
= split_mid
->len
;
1008 split_mid
->flags
= flags
;
1009 split_mid
->compress_type
= em
->compress_type
;
1010 split_mid
->generation
= em
->generation
;
1011 add_extent_mapping(em_tree
, split_mid
, 1);
1014 free_extent_map(em
);
1015 /* Once for the tree */
1016 free_extent_map(em
);
1019 write_unlock(&em_tree
->lock
);
1020 unlock_extent(&inode
->io_tree
, start
, start
+ len
- 1, NULL
);
1021 free_extent_map(split_mid
);
1023 free_extent_map(split_pre
);