]>
git.ipfire.org Git - thirdparty/linux.git/blob - fs/btrfs/extent_map.c
bced39dc0da86e1b6e698fc622a9c487499f58c1
1 // SPDX-License-Identifier: GPL-2.0
4 #include <linux/slab.h>
5 #include <linux/spinlock.h>
9 #include "extent_map.h"
10 #include "compression.h"
11 #include "btrfs_inode.h"
14 static struct kmem_cache
*extent_map_cache
;
16 int __init
extent_map_init(void)
18 extent_map_cache
= kmem_cache_create("btrfs_extent_map",
19 sizeof(struct extent_map
), 0,
20 SLAB_MEM_SPREAD
, NULL
);
21 if (!extent_map_cache
)
26 void __cold
extent_map_exit(void)
28 kmem_cache_destroy(extent_map_cache
);
32 * Initialize the extent tree @tree. Should be called for each new inode or
33 * other user of the extent_map interface.
35 void extent_map_tree_init(struct extent_map_tree
*tree
)
37 tree
->map
= RB_ROOT_CACHED
;
38 INIT_LIST_HEAD(&tree
->modified_extents
);
39 rwlock_init(&tree
->lock
);
43 * Allocate a new extent_map structure. The new structure is returned with a
44 * reference count of one and needs to be freed using free_extent_map()
46 struct extent_map
*alloc_extent_map(void)
48 struct extent_map
*em
;
49 em
= kmem_cache_zalloc(extent_map_cache
, GFP_NOFS
);
52 RB_CLEAR_NODE(&em
->rb_node
);
53 em
->compress_type
= BTRFS_COMPRESS_NONE
;
54 refcount_set(&em
->refs
, 1);
55 INIT_LIST_HEAD(&em
->list
);
60 * Drop the reference out on @em by one and free the structure if the reference
63 void free_extent_map(struct extent_map
*em
)
67 if (refcount_dec_and_test(&em
->refs
)) {
68 WARN_ON(extent_map_in_tree(em
));
69 WARN_ON(!list_empty(&em
->list
));
70 if (test_bit(EXTENT_FLAG_FS_MAPPING
, &em
->flags
))
71 kfree(em
->map_lookup
);
72 kmem_cache_free(extent_map_cache
, em
);
76 /* Do the math around the end of an extent, handling wrapping. */
77 static u64
range_end(u64 start
, u64 len
)
79 if (start
+ len
< start
)
84 static int tree_insert(struct rb_root_cached
*root
, struct extent_map
*em
)
86 struct rb_node
**p
= &root
->rb_root
.rb_node
;
87 struct rb_node
*parent
= NULL
;
88 struct extent_map
*entry
= NULL
;
89 struct rb_node
*orig_parent
= NULL
;
90 u64 end
= range_end(em
->start
, em
->len
);
95 entry
= rb_entry(parent
, struct extent_map
, rb_node
);
97 if (em
->start
< entry
->start
) {
99 } else if (em
->start
>= extent_map_end(entry
)) {
107 orig_parent
= parent
;
108 while (parent
&& em
->start
>= extent_map_end(entry
)) {
109 parent
= rb_next(parent
);
110 entry
= rb_entry(parent
, struct extent_map
, rb_node
);
113 if (end
> entry
->start
&& em
->start
< extent_map_end(entry
))
116 parent
= orig_parent
;
117 entry
= rb_entry(parent
, struct extent_map
, rb_node
);
118 while (parent
&& em
->start
< entry
->start
) {
119 parent
= rb_prev(parent
);
120 entry
= rb_entry(parent
, struct extent_map
, rb_node
);
123 if (end
> entry
->start
&& em
->start
< extent_map_end(entry
))
126 rb_link_node(&em
->rb_node
, orig_parent
, p
);
127 rb_insert_color_cached(&em
->rb_node
, root
, leftmost
);
132 * Search through the tree for an extent_map with a given offset. If it can't
133 * be found, try to find some neighboring extents
135 static struct rb_node
*__tree_search(struct rb_root
*root
, u64 offset
,
136 struct rb_node
**prev_or_next_ret
)
138 struct rb_node
*n
= root
->rb_node
;
139 struct rb_node
*prev
= NULL
;
140 struct rb_node
*orig_prev
= NULL
;
141 struct extent_map
*entry
;
142 struct extent_map
*prev_entry
= NULL
;
144 ASSERT(prev_or_next_ret
);
147 entry
= rb_entry(n
, struct extent_map
, rb_node
);
151 if (offset
< entry
->start
)
153 else if (offset
>= extent_map_end(entry
))
160 while (prev
&& offset
>= extent_map_end(prev_entry
)) {
161 prev
= rb_next(prev
);
162 prev_entry
= rb_entry(prev
, struct extent_map
, rb_node
);
166 * Previous extent map found, return as in this case the caller does not
167 * care about the next one.
170 *prev_or_next_ret
= prev
;
175 prev_entry
= rb_entry(prev
, struct extent_map
, rb_node
);
176 while (prev
&& offset
< prev_entry
->start
) {
177 prev
= rb_prev(prev
);
178 prev_entry
= rb_entry(prev
, struct extent_map
, rb_node
);
180 *prev_or_next_ret
= prev
;
185 static inline u64
extent_map_block_end(const struct extent_map
*em
)
187 if (em
->block_start
+ em
->block_len
< em
->block_start
)
189 return em
->block_start
+ em
->block_len
;
192 /* Check to see if two extent_map structs are adjacent and safe to merge. */
193 static int mergable_maps(struct extent_map
*prev
, struct extent_map
*next
)
195 if (test_bit(EXTENT_FLAG_PINNED
, &prev
->flags
))
199 * don't merge compressed extents, we need to know their
202 if (test_bit(EXTENT_FLAG_COMPRESSED
, &prev
->flags
))
205 if (test_bit(EXTENT_FLAG_LOGGING
, &prev
->flags
) ||
206 test_bit(EXTENT_FLAG_LOGGING
, &next
->flags
))
210 * We don't want to merge stuff that hasn't been written to the log yet
211 * since it may not reflect exactly what is on disk, and that would be
214 if (!list_empty(&prev
->list
) || !list_empty(&next
->list
))
217 ASSERT(next
->block_start
!= EXTENT_MAP_DELALLOC
&&
218 prev
->block_start
!= EXTENT_MAP_DELALLOC
);
220 if (prev
->map_lookup
|| next
->map_lookup
)
221 ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING
, &prev
->flags
) &&
222 test_bit(EXTENT_FLAG_FS_MAPPING
, &next
->flags
));
224 if (extent_map_end(prev
) == next
->start
&&
225 prev
->flags
== next
->flags
&&
226 prev
->map_lookup
== next
->map_lookup
&&
227 ((next
->block_start
== EXTENT_MAP_HOLE
&&
228 prev
->block_start
== EXTENT_MAP_HOLE
) ||
229 (next
->block_start
== EXTENT_MAP_INLINE
&&
230 prev
->block_start
== EXTENT_MAP_INLINE
) ||
231 (next
->block_start
< EXTENT_MAP_LAST_BYTE
- 1 &&
232 next
->block_start
== extent_map_block_end(prev
)))) {
238 static void try_merge_map(struct extent_map_tree
*tree
, struct extent_map
*em
)
240 struct extent_map
*merge
= NULL
;
244 * We can't modify an extent map that is in the tree and that is being
245 * used by another task, as it can cause that other task to see it in
246 * inconsistent state during the merging. We always have 1 reference for
247 * the tree and 1 for this task (which is unpinning the extent map or
248 * clearing the logging flag), so anything > 2 means it's being used by
251 if (refcount_read(&em
->refs
) > 2)
254 if (em
->start
!= 0) {
255 rb
= rb_prev(&em
->rb_node
);
257 merge
= rb_entry(rb
, struct extent_map
, rb_node
);
258 if (rb
&& mergable_maps(merge
, em
)) {
259 em
->start
= merge
->start
;
260 em
->orig_start
= merge
->orig_start
;
261 em
->len
+= merge
->len
;
262 em
->block_len
+= merge
->block_len
;
263 em
->block_start
= merge
->block_start
;
264 em
->mod_len
= (em
->mod_len
+ em
->mod_start
) - merge
->mod_start
;
265 em
->mod_start
= merge
->mod_start
;
266 em
->generation
= max(em
->generation
, merge
->generation
);
267 set_bit(EXTENT_FLAG_MERGED
, &em
->flags
);
269 rb_erase_cached(&merge
->rb_node
, &tree
->map
);
270 RB_CLEAR_NODE(&merge
->rb_node
);
271 free_extent_map(merge
);
275 rb
= rb_next(&em
->rb_node
);
277 merge
= rb_entry(rb
, struct extent_map
, rb_node
);
278 if (rb
&& mergable_maps(em
, merge
)) {
279 em
->len
+= merge
->len
;
280 em
->block_len
+= merge
->block_len
;
281 rb_erase_cached(&merge
->rb_node
, &tree
->map
);
282 RB_CLEAR_NODE(&merge
->rb_node
);
283 em
->mod_len
= (merge
->mod_start
+ merge
->mod_len
) - em
->mod_start
;
284 em
->generation
= max(em
->generation
, merge
->generation
);
285 set_bit(EXTENT_FLAG_MERGED
, &em
->flags
);
286 free_extent_map(merge
);
291 * Unpin an extent from the cache.
293 * @tree: tree to unpin the extent in
294 * @start: logical offset in the file
295 * @len: length of the extent
296 * @gen: generation that this extent has been modified in
298 * Called after an extent has been written to disk properly. Set the generation
299 * to the generation that actually added the file item to the inode so we know
300 * we need to sync this extent when we call fsync().
302 int unpin_extent_cache(struct extent_map_tree
*tree
, u64 start
, u64 len
,
306 struct extent_map
*em
;
307 bool prealloc
= false;
309 write_lock(&tree
->lock
);
310 em
= lookup_extent_mapping(tree
, start
, len
);
312 WARN_ON(!em
|| em
->start
!= start
);
317 em
->generation
= gen
;
318 clear_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
319 em
->mod_start
= em
->start
;
320 em
->mod_len
= em
->len
;
322 if (test_bit(EXTENT_FLAG_FILLING
, &em
->flags
)) {
324 clear_bit(EXTENT_FLAG_FILLING
, &em
->flags
);
327 try_merge_map(tree
, em
);
330 em
->mod_start
= em
->start
;
331 em
->mod_len
= em
->len
;
336 write_unlock(&tree
->lock
);
341 void clear_em_logging(struct extent_map_tree
*tree
, struct extent_map
*em
)
343 lockdep_assert_held_write(&tree
->lock
);
345 clear_bit(EXTENT_FLAG_LOGGING
, &em
->flags
);
346 if (extent_map_in_tree(em
))
347 try_merge_map(tree
, em
);
350 static inline void setup_extent_mapping(struct extent_map_tree
*tree
,
351 struct extent_map
*em
,
354 refcount_inc(&em
->refs
);
355 em
->mod_start
= em
->start
;
356 em
->mod_len
= em
->len
;
359 list_move(&em
->list
, &tree
->modified_extents
);
361 try_merge_map(tree
, em
);
364 static void extent_map_device_set_bits(struct extent_map
*em
, unsigned bits
)
366 struct map_lookup
*map
= em
->map_lookup
;
367 u64 stripe_size
= em
->orig_block_len
;
370 for (i
= 0; i
< map
->num_stripes
; i
++) {
371 struct btrfs_io_stripe
*stripe
= &map
->stripes
[i
];
372 struct btrfs_device
*device
= stripe
->dev
;
374 set_extent_bit(&device
->alloc_state
, stripe
->physical
,
375 stripe
->physical
+ stripe_size
- 1,
376 bits
| EXTENT_NOWAIT
, NULL
);
380 static void extent_map_device_clear_bits(struct extent_map
*em
, unsigned bits
)
382 struct map_lookup
*map
= em
->map_lookup
;
383 u64 stripe_size
= em
->orig_block_len
;
386 for (i
= 0; i
< map
->num_stripes
; i
++) {
387 struct btrfs_io_stripe
*stripe
= &map
->stripes
[i
];
388 struct btrfs_device
*device
= stripe
->dev
;
390 __clear_extent_bit(&device
->alloc_state
, stripe
->physical
,
391 stripe
->physical
+ stripe_size
- 1,
392 bits
| EXTENT_NOWAIT
,
398 * Add new extent map to the extent tree
400 * @tree: tree to insert new map in
402 * @modified: indicate whether the given @em should be added to the
403 * modified list, which indicates the extent needs to be logged
405 * Insert @em into @tree or perform a simple forward/backward merge with
406 * existing mappings. The extent_map struct passed in will be inserted
407 * into the tree directly, with an additional reference taken, or a
408 * reference dropped if the merge attempt was successful.
410 int add_extent_mapping(struct extent_map_tree
*tree
,
411 struct extent_map
*em
, int modified
)
415 lockdep_assert_held_write(&tree
->lock
);
417 ret
= tree_insert(&tree
->map
, em
);
421 setup_extent_mapping(tree
, em
, modified
);
422 if (test_bit(EXTENT_FLAG_FS_MAPPING
, &em
->flags
)) {
423 extent_map_device_set_bits(em
, CHUNK_ALLOCATED
);
424 extent_map_device_clear_bits(em
, CHUNK_TRIMMED
);
430 static struct extent_map
*
431 __lookup_extent_mapping(struct extent_map_tree
*tree
,
432 u64 start
, u64 len
, int strict
)
434 struct extent_map
*em
;
435 struct rb_node
*rb_node
;
436 struct rb_node
*prev_or_next
= NULL
;
437 u64 end
= range_end(start
, len
);
439 rb_node
= __tree_search(&tree
->map
.rb_root
, start
, &prev_or_next
);
442 rb_node
= prev_or_next
;
447 em
= rb_entry(rb_node
, struct extent_map
, rb_node
);
449 if (strict
&& !(end
> em
->start
&& start
< extent_map_end(em
)))
452 refcount_inc(&em
->refs
);
457 * Lookup extent_map that intersects @start + @len range.
459 * @tree: tree to lookup in
460 * @start: byte offset to start the search
461 * @len: length of the lookup range
463 * Find and return the first extent_map struct in @tree that intersects the
464 * [start, len] range. There may be additional objects in the tree that
465 * intersect, so check the object returned carefully to make sure that no
466 * additional lookups are needed.
468 struct extent_map
*lookup_extent_mapping(struct extent_map_tree
*tree
,
471 return __lookup_extent_mapping(tree
, start
, len
, 1);
475 * Find a nearby extent map intersecting @start + @len (not an exact search).
477 * @tree: tree to lookup in
478 * @start: byte offset to start the search
479 * @len: length of the lookup range
481 * Find and return the first extent_map struct in @tree that intersects the
482 * [start, len] range.
484 * If one can't be found, any nearby extent may be returned
486 struct extent_map
*search_extent_mapping(struct extent_map_tree
*tree
,
489 return __lookup_extent_mapping(tree
, start
, len
, 0);
493 * Remove an extent_map from the extent tree.
495 * @tree: extent tree to remove from
496 * @em: extent map being removed
498 * Remove @em from @tree. No reference counts are dropped, and no checks
499 * are done to see if the range is in use.
501 void remove_extent_mapping(struct extent_map_tree
*tree
, struct extent_map
*em
)
503 lockdep_assert_held_write(&tree
->lock
);
505 WARN_ON(test_bit(EXTENT_FLAG_PINNED
, &em
->flags
));
506 rb_erase_cached(&em
->rb_node
, &tree
->map
);
507 if (!test_bit(EXTENT_FLAG_LOGGING
, &em
->flags
))
508 list_del_init(&em
->list
);
509 if (test_bit(EXTENT_FLAG_FS_MAPPING
, &em
->flags
))
510 extent_map_device_clear_bits(em
, CHUNK_ALLOCATED
);
511 RB_CLEAR_NODE(&em
->rb_node
);
514 static void replace_extent_mapping(struct extent_map_tree
*tree
,
515 struct extent_map
*cur
,
516 struct extent_map
*new,
519 lockdep_assert_held_write(&tree
->lock
);
521 WARN_ON(test_bit(EXTENT_FLAG_PINNED
, &cur
->flags
));
522 ASSERT(extent_map_in_tree(cur
));
523 if (!test_bit(EXTENT_FLAG_LOGGING
, &cur
->flags
))
524 list_del_init(&cur
->list
);
525 rb_replace_node_cached(&cur
->rb_node
, &new->rb_node
, &tree
->map
);
526 RB_CLEAR_NODE(&cur
->rb_node
);
528 setup_extent_mapping(tree
, new, modified
);
531 static struct extent_map
*next_extent_map(const struct extent_map
*em
)
533 struct rb_node
*next
;
535 next
= rb_next(&em
->rb_node
);
538 return container_of(next
, struct extent_map
, rb_node
);
541 static struct extent_map
*prev_extent_map(struct extent_map
*em
)
543 struct rb_node
*prev
;
545 prev
= rb_prev(&em
->rb_node
);
548 return container_of(prev
, struct extent_map
, rb_node
);
552 * Helper for btrfs_get_extent. Given an existing extent in the tree,
553 * the existing extent is the nearest extent to map_start,
554 * and an extent that you want to insert, deal with overlap and insert
555 * the best fitted new extent into the tree.
557 static noinline
int merge_extent_mapping(struct extent_map_tree
*em_tree
,
558 struct extent_map
*existing
,
559 struct extent_map
*em
,
562 struct extent_map
*prev
;
563 struct extent_map
*next
;
568 BUG_ON(map_start
< em
->start
|| map_start
>= extent_map_end(em
));
570 if (existing
->start
> map_start
) {
572 prev
= prev_extent_map(next
);
575 next
= next_extent_map(prev
);
578 start
= prev
? extent_map_end(prev
) : em
->start
;
579 start
= max_t(u64
, start
, em
->start
);
580 end
= next
? next
->start
: extent_map_end(em
);
581 end
= min_t(u64
, end
, extent_map_end(em
));
582 start_diff
= start
- em
->start
;
584 em
->len
= end
- start
;
585 if (em
->block_start
< EXTENT_MAP_LAST_BYTE
&&
586 !test_bit(EXTENT_FLAG_COMPRESSED
, &em
->flags
)) {
587 em
->block_start
+= start_diff
;
588 em
->block_len
= em
->len
;
590 return add_extent_mapping(em_tree
, em
, 0);
594 * Add extent mapping into em_tree.
596 * @fs_info: the filesystem
597 * @em_tree: extent tree into which we want to insert the extent mapping
598 * @em_in: extent we are inserting
599 * @start: start of the logical range btrfs_get_extent() is requesting
600 * @len: length of the logical range btrfs_get_extent() is requesting
602 * Note that @em_in's range may be different from [start, start+len),
603 * but they must be overlapped.
605 * Insert @em_in into @em_tree. In case there is an overlapping range, handle
606 * the -EEXIST by either:
607 * a) Returning the existing extent in @em_in if @start is within the
609 * b) Merge the existing extent with @em_in passed in.
611 * Return 0 on success, otherwise -EEXIST.
614 int btrfs_add_extent_mapping(struct btrfs_fs_info
*fs_info
,
615 struct extent_map_tree
*em_tree
,
616 struct extent_map
**em_in
, u64 start
, u64 len
)
619 struct extent_map
*em
= *em_in
;
622 * Tree-checker should have rejected any inline extent with non-zero
623 * file offset. Here just do a sanity check.
625 if (em
->block_start
== EXTENT_MAP_INLINE
)
626 ASSERT(em
->start
== 0);
628 ret
= add_extent_mapping(em_tree
, em
, 0);
629 /* it is possible that someone inserted the extent into the tree
630 * while we had the lock dropped. It is also possible that
631 * an overlapping map exists in the tree
633 if (ret
== -EEXIST
) {
634 struct extent_map
*existing
;
638 existing
= search_extent_mapping(em_tree
, start
, len
);
640 trace_btrfs_handle_em_exist(fs_info
, existing
, em
, start
, len
);
643 * existing will always be non-NULL, since there must be
644 * extent causing the -EEXIST.
646 if (start
>= existing
->start
&&
647 start
< extent_map_end(existing
)) {
652 u64 orig_start
= em
->start
;
653 u64 orig_len
= em
->len
;
656 * The existing extent map is the one nearest to
657 * the [start, start + len) range which overlaps
659 ret
= merge_extent_mapping(em_tree
, existing
,
665 "unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
666 ret
, existing
->start
, existing
->len
,
667 orig_start
, orig_len
);
669 free_extent_map(existing
);
673 ASSERT(ret
== 0 || ret
== -EEXIST
);
678 * Drop all extent maps from a tree in the fastest possible way, rescheduling
679 * if needed. This avoids searching the tree, from the root down to the first
680 * extent map, before each deletion.
682 static void drop_all_extent_maps_fast(struct extent_map_tree
*tree
)
684 write_lock(&tree
->lock
);
685 while (!RB_EMPTY_ROOT(&tree
->map
.rb_root
)) {
686 struct extent_map
*em
;
687 struct rb_node
*node
;
689 node
= rb_first_cached(&tree
->map
);
690 em
= rb_entry(node
, struct extent_map
, rb_node
);
691 clear_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
692 clear_bit(EXTENT_FLAG_LOGGING
, &em
->flags
);
693 remove_extent_mapping(tree
, em
);
695 cond_resched_rwlock_write(&tree
->lock
);
697 write_unlock(&tree
->lock
);
701 * Drop all extent maps in a given range.
703 * @inode: The target inode.
704 * @start: Start offset of the range.
705 * @end: End offset of the range (inclusive value).
706 * @skip_pinned: Indicate if pinned extent maps should be ignored or not.
708 * This drops all the extent maps that intersect the given range [@start, @end].
709 * Extent maps that partially overlap the range and extend behind or beyond it,
711 * The caller should have locked an appropriate file range in the inode's io
712 * tree before calling this function.
714 void btrfs_drop_extent_map_range(struct btrfs_inode
*inode
, u64 start
, u64 end
,
717 struct extent_map
*split
;
718 struct extent_map
*split2
;
719 struct extent_map
*em
;
720 struct extent_map_tree
*em_tree
= &inode
->extent_tree
;
721 u64 len
= end
- start
+ 1;
723 WARN_ON(end
< start
);
724 if (end
== (u64
)-1) {
725 if (start
== 0 && !skip_pinned
) {
726 drop_all_extent_maps_fast(em_tree
);
731 /* Make end offset exclusive for use in the loop below. */
736 * It's ok if we fail to allocate the extent maps, see the comment near
737 * the bottom of the loop below. We only need two spare extent maps in
738 * the worst case, where the first extent map that intersects our range
739 * starts before the range and the last extent map that intersects our
740 * range ends after our range (and they might be the same extent map),
741 * because we need to split those two extent maps at the boundaries.
743 split
= alloc_extent_map();
744 split2
= alloc_extent_map();
746 write_lock(&em_tree
->lock
);
747 em
= lookup_extent_mapping(em_tree
, start
, len
);
750 /* extent_map_end() returns exclusive value (last byte + 1). */
751 const u64 em_end
= extent_map_end(em
);
752 struct extent_map
*next_em
= NULL
;
759 next_em
= next_extent_map(em
);
761 if (next_em
->start
< end
)
762 refcount_inc(&next_em
->refs
);
768 if (skip_pinned
&& test_bit(EXTENT_FLAG_PINNED
, &em
->flags
)) {
774 clear_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
776 * In case we split the extent map, we want to preserve the
777 * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
778 * it on the new extent maps.
780 clear_bit(EXTENT_FLAG_LOGGING
, &flags
);
781 modified
= !list_empty(&em
->list
);
784 * The extent map does not cross our target range, so no need to
785 * split it, we can remove it directly.
787 if (em
->start
>= start
&& em_end
<= end
)
790 gen
= em
->generation
;
791 compressed
= test_bit(EXTENT_FLAG_COMPRESSED
, &em
->flags
);
793 if (em
->start
< start
) {
800 split
->start
= em
->start
;
801 split
->len
= start
- em
->start
;
803 if (em
->block_start
< EXTENT_MAP_LAST_BYTE
) {
804 split
->orig_start
= em
->orig_start
;
805 split
->block_start
= em
->block_start
;
808 split
->block_len
= em
->block_len
;
810 split
->block_len
= split
->len
;
811 split
->orig_block_len
= max(split
->block_len
,
813 split
->ram_bytes
= em
->ram_bytes
;
815 split
->orig_start
= split
->start
;
816 split
->block_len
= 0;
817 split
->block_start
= em
->block_start
;
818 split
->orig_block_len
= 0;
819 split
->ram_bytes
= split
->len
;
822 split
->generation
= gen
;
823 split
->flags
= flags
;
824 split
->compress_type
= em
->compress_type
;
825 replace_extent_mapping(em_tree
, em
, split
, modified
);
826 free_extent_map(split
);
838 split
->len
= em_end
- end
;
839 split
->block_start
= em
->block_start
;
840 split
->flags
= flags
;
841 split
->compress_type
= em
->compress_type
;
842 split
->generation
= gen
;
844 if (em
->block_start
< EXTENT_MAP_LAST_BYTE
) {
845 split
->orig_block_len
= max(em
->block_len
,
848 split
->ram_bytes
= em
->ram_bytes
;
850 split
->block_len
= em
->block_len
;
851 split
->orig_start
= em
->orig_start
;
853 const u64 diff
= start
+ len
- em
->start
;
855 split
->block_len
= split
->len
;
856 split
->block_start
+= diff
;
857 split
->orig_start
= em
->orig_start
;
860 split
->ram_bytes
= split
->len
;
861 split
->orig_start
= split
->start
;
862 split
->block_len
= 0;
863 split
->orig_block_len
= 0;
866 if (extent_map_in_tree(em
)) {
867 replace_extent_mapping(em_tree
, em
, split
,
872 ret
= add_extent_mapping(em_tree
, split
,
874 /* Logic error, shouldn't happen. */
876 if (WARN_ON(ret
!= 0) && modified
)
877 btrfs_set_inode_full_sync(inode
);
879 free_extent_map(split
);
883 if (extent_map_in_tree(em
)) {
885 * If the extent map is still in the tree it means that
886 * either of the following is true:
888 * 1) It fits entirely in our range (doesn't end beyond
889 * it or starts before it);
891 * 2) It starts before our range and/or ends after our
892 * range, and we were not able to allocate the extent
893 * maps for split operations, @split and @split2.
895 * If we are at case 2) then we just remove the entire
896 * extent map - this is fine since if anyone needs it to
897 * access the subranges outside our range, will just
898 * load it again from the subvolume tree's file extent
899 * item. However if the extent map was in the list of
900 * modified extents, then we must mark the inode for a
901 * full fsync, otherwise a fast fsync will miss this
902 * extent if it's new and needs to be logged.
904 if ((em
->start
< start
|| em_end
> end
) && modified
) {
906 btrfs_set_inode_full_sync(inode
);
908 remove_extent_mapping(em_tree
, em
);
912 * Once for the tree reference (we replaced or removed the
913 * extent map from the tree).
917 /* Once for us (for our lookup reference). */
923 write_unlock(&em_tree
->lock
);
925 free_extent_map(split
);
926 free_extent_map(split2
);
930 * Replace a range in the inode's extent map tree with a new extent map.
932 * @inode: The target inode.
933 * @new_em: The new extent map to add to the inode's extent map tree.
934 * @modified: Indicate if the new extent map should be added to the list of
935 * modified extents (for fast fsync tracking).
937 * Drops all the extent maps in the inode's extent map tree that intersect the
938 * range of the new extent map and adds the new extent map to the tree.
939 * The caller should have locked an appropriate file range in the inode's io
940 * tree before calling this function.
942 int btrfs_replace_extent_map_range(struct btrfs_inode
*inode
,
943 struct extent_map
*new_em
,
946 const u64 end
= new_em
->start
+ new_em
->len
- 1;
947 struct extent_map_tree
*tree
= &inode
->extent_tree
;
950 ASSERT(!extent_map_in_tree(new_em
));
953 * The caller has locked an appropriate file range in the inode's io
954 * tree, but getting -EEXIST when adding the new extent map can still
955 * happen in case there are extents that partially cover the range, and
956 * this is due to two tasks operating on different parts of the extent.
957 * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
958 * btrfs_get_extent") for an example and details.
961 btrfs_drop_extent_map_range(inode
, new_em
->start
, end
, false);
962 write_lock(&tree
->lock
);
963 ret
= add_extent_mapping(tree
, new_em
, modified
);
964 write_unlock(&tree
->lock
);
965 } while (ret
== -EEXIST
);
971 * Split off the first pre bytes from the extent_map at [start, start + len],
972 * and set the block_start for it to new_logical.
974 * This function is used when an ordered_extent needs to be split.
976 int split_extent_map(struct btrfs_inode
*inode
, u64 start
, u64 len
, u64 pre
,
979 struct extent_map_tree
*em_tree
= &inode
->extent_tree
;
980 struct extent_map
*em
;
981 struct extent_map
*split_pre
= NULL
;
982 struct extent_map
*split_mid
= NULL
;
989 split_pre
= alloc_extent_map();
992 split_mid
= alloc_extent_map();
998 lock_extent(&inode
->io_tree
, start
, start
+ len
- 1, NULL
);
999 write_lock(&em_tree
->lock
);
1000 em
= lookup_extent_mapping(em_tree
, start
, len
);
1006 ASSERT(em
->len
== len
);
1007 ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED
, &em
->flags
));
1008 ASSERT(em
->block_start
< EXTENT_MAP_LAST_BYTE
);
1009 ASSERT(test_bit(EXTENT_FLAG_PINNED
, &em
->flags
));
1010 ASSERT(!test_bit(EXTENT_FLAG_LOGGING
, &em
->flags
));
1011 ASSERT(!list_empty(&em
->list
));
1014 clear_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
1016 /* First, replace the em with a new extent_map starting from * em->start */
1017 split_pre
->start
= em
->start
;
1018 split_pre
->len
= pre
;
1019 split_pre
->orig_start
= split_pre
->start
;
1020 split_pre
->block_start
= new_logical
;
1021 split_pre
->block_len
= split_pre
->len
;
1022 split_pre
->orig_block_len
= split_pre
->block_len
;
1023 split_pre
->ram_bytes
= split_pre
->len
;
1024 split_pre
->flags
= flags
;
1025 split_pre
->compress_type
= em
->compress_type
;
1026 split_pre
->generation
= em
->generation
;
1028 replace_extent_mapping(em_tree
, em
, split_pre
, 1);
1031 * Now we only have an extent_map at:
1032 * [em->start, em->start + pre]
1035 /* Insert the middle extent_map. */
1036 split_mid
->start
= em
->start
+ pre
;
1037 split_mid
->len
= em
->len
- pre
;
1038 split_mid
->orig_start
= split_mid
->start
;
1039 split_mid
->block_start
= em
->block_start
+ pre
;
1040 split_mid
->block_len
= split_mid
->len
;
1041 split_mid
->orig_block_len
= split_mid
->block_len
;
1042 split_mid
->ram_bytes
= split_mid
->len
;
1043 split_mid
->flags
= flags
;
1044 split_mid
->compress_type
= em
->compress_type
;
1045 split_mid
->generation
= em
->generation
;
1046 add_extent_mapping(em_tree
, split_mid
, 1);
1049 free_extent_map(em
);
1050 /* Once for the tree */
1051 free_extent_map(em
);
1054 write_unlock(&em_tree
->lock
);
1055 unlock_extent(&inode
->io_tree
, start
, start
+ len
- 1, NULL
);
1056 free_extent_map(split_mid
);
1058 free_extent_map(split_pre
);