xas_unlock_irqrestore(&xas, flags);
}
+static void buffer_tree_tag_for_writeback(struct btrfs_fs_info *fs_info,
+ unsigned long start, unsigned long end)
+{
+ XA_STATE(xas, &fs_info->buffer_tree, start);
+ unsigned int tagged = 0;
+ void *eb;
+
+ xas_lock_irq(&xas);
+ xas_for_each_marked(&xas, eb, end, PAGECACHE_TAG_DIRTY) {
+ xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
+ if (++tagged % XA_CHECK_SCHED)
+ continue;
+ xas_pause(&xas);
+ xas_unlock_irq(&xas);
+ cond_resched();
+ xas_lock_irq(&xas);
+ }
+ xas_unlock_irq(&xas);
+}
+
+struct eb_batch {
+ unsigned int nr;
+ unsigned int cur;
+ struct extent_buffer *ebs[PAGEVEC_SIZE];
+};
+
+static inline bool eb_batch_add(struct eb_batch *batch, struct extent_buffer *eb)
+{
+ batch->ebs[batch->nr++] = eb;
+ return (batch->nr < PAGEVEC_SIZE);
+}
+
+static inline void eb_batch_init(struct eb_batch *batch)
+{
+ batch->nr = 0;
+ batch->cur = 0;
+}
+
+static inline struct extent_buffer *eb_batch_next(struct eb_batch *batch)
+{
+ if (batch->cur >= batch->nr)
+ return NULL;
+ return batch->ebs[batch->cur++];
+}
+
+static inline void eb_batch_release(struct eb_batch *batch)
+{
+ for (unsigned int i = 0; i < batch->nr; i++)
+ free_extent_buffer(batch->ebs[i]);
+ eb_batch_init(batch);
+}
+
+static inline struct extent_buffer *find_get_eb(struct xa_state *xas, unsigned long max,
+ xa_mark_t mark)
+{
+ struct extent_buffer *eb;
+
+retry:
+ eb = xas_find_marked(xas, max, mark);
+
+ if (xas_retry(xas, eb))
+ goto retry;
+
+ if (!eb)
+ return NULL;
+
+ if (!atomic_inc_not_zero(&eb->refs)) {
+ xas_reset(xas);
+ goto retry;
+ }
+
+ if (unlikely(eb != xas_reload(xas))) {
+ free_extent_buffer(eb);
+ xas_reset(xas);
+ goto retry;
+ }
+
+ return eb;
+}
+
+static unsigned int buffer_tree_get_ebs_tag(struct btrfs_fs_info *fs_info,
+ unsigned long *start,
+ unsigned long end, xa_mark_t tag,
+ struct eb_batch *batch)
+{
+ XA_STATE(xas, &fs_info->buffer_tree, *start);
+ struct extent_buffer *eb;
+
+ rcu_read_lock();
+ while ((eb = find_get_eb(&xas, end, tag)) != NULL) {
+ if (!eb_batch_add(batch, eb)) {
+ *start = ((eb->start + eb->len) >> fs_info->sectorsize_bits);
+ goto out;
+ }
+ }
+ if (end == ULONG_MAX)
+ *start = ULONG_MAX;
+ else
+ *start = end + 1;
+out:
+ rcu_read_unlock();
+
+ return batch->nr;
+}
+
/*
* The endio specific version which won't touch any unsafe spinlock in endio
* context.
}
/*
- * Submit one subpage btree page.
+ * Wait for all eb writeback in the given range to finish.
*
- * The main difference to submit_eb_page() is:
- * - Page locking
- * For subpage, we don't rely on page locking at all.
- *
- * - Flush write bio
- * We only flush bio if we may be unable to fit current extent buffers into
- * current bio.
- *
- * Return >=0 for the number of submitted extent buffers.
- * Return <0 for fatal error.
+ * @fs_info: The fs_info for this file system.
+ * @start: The offset of the range to start waiting on writeback.
+ * @end: The end of the range, inclusive. This is meant to be used in
+ * conjuction with wait_marked_extents, so this will usually be
+ * the_next_eb->start - 1.
*/
-static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc)
+void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start,
+ u64 end)
{
- struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
- int submitted = 0;
- u64 folio_start = folio_pos(folio);
- int bit_start = 0;
- int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
- const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
+ struct eb_batch batch;
+ unsigned long start_index = (start >> fs_info->sectorsize_bits);
+ unsigned long end_index = (end >> fs_info->sectorsize_bits);
- /* Lock and write each dirty extent buffers in the range */
- while (bit_start < blocks_per_folio) {
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ eb_batch_init(&batch);
+ while (start_index <= end_index) {
struct extent_buffer *eb;
- unsigned long flags;
- u64 start;
+ unsigned int nr_ebs;
- /*
- * Take private lock to ensure the subpage won't be detached
- * in the meantime.
- */
- spin_lock(&folio->mapping->i_private_lock);
- if (!folio_test_private(folio)) {
- spin_unlock(&folio->mapping->i_private_lock);
+ nr_ebs = buffer_tree_get_ebs_tag(fs_info, &start_index, end_index,
+ PAGECACHE_TAG_WRITEBACK, &batch);
+ if (!nr_ebs)
break;
- }
- spin_lock_irqsave(&subpage->lock, flags);
- if (!test_bit(bit_start + btrfs_bitmap_nr_dirty * blocks_per_folio,
- subpage->bitmaps)) {
- spin_unlock_irqrestore(&subpage->lock, flags);
- spin_unlock(&folio->mapping->i_private_lock);
- bit_start += sectors_per_node;
- continue;
- }
-
- start = folio_start + bit_start * fs_info->sectorsize;
- bit_start += sectors_per_node;
-
- /*
- * Here we just want to grab the eb without touching extra
- * spin locks, so call find_extent_buffer_nolock().
- */
- eb = find_extent_buffer_nolock(fs_info, start);
- spin_unlock_irqrestore(&subpage->lock, flags);
- spin_unlock(&folio->mapping->i_private_lock);
-
- /*
- * The eb has already reached 0 refs thus find_extent_buffer()
- * doesn't return it. We don't need to write back such eb
- * anyway.
- */
- if (!eb)
- continue;
-
- if (lock_extent_buffer_for_io(eb, wbc)) {
- write_one_eb(eb, wbc);
- submitted++;
- }
- free_extent_buffer(eb);
- }
- return submitted;
-}
-
-/*
- * Submit all page(s) of one extent buffer.
- *
- * @page: the page of one extent buffer
- * @eb_context: to determine if we need to submit this page, if current page
- * belongs to this eb, we don't need to submit
- *
- * The caller should pass each page in their bytenr order, and here we use
- * @eb_context to determine if we have submitted pages of one extent buffer.
- *
- * If we have, we just skip until we hit a new page that doesn't belong to
- * current @eb_context.
- *
- * If not, we submit all the page(s) of the extent buffer.
- *
- * Return >0 if we have submitted the extent buffer successfully.
- * Return 0 if we don't need to submit the page, as it's already submitted by
- * previous call.
- * Return <0 for fatal error.
- */
-static int submit_eb_page(struct folio *folio, struct btrfs_eb_write_context *ctx)
-{
- struct writeback_control *wbc = ctx->wbc;
- struct address_space *mapping = folio->mapping;
- struct extent_buffer *eb;
- int ret;
-
- if (!folio_test_private(folio))
- return 0;
-
- if (btrfs_meta_is_subpage(folio_to_fs_info(folio)))
- return submit_eb_subpage(folio, wbc);
-
- spin_lock(&mapping->i_private_lock);
- if (!folio_test_private(folio)) {
- spin_unlock(&mapping->i_private_lock);
- return 0;
- }
-
- eb = folio_get_private(folio);
-
- /*
- * Shouldn't happen and normally this would be a BUG_ON but no point
- * crashing the machine for something we can survive anyway.
- */
- if (WARN_ON(!eb)) {
- spin_unlock(&mapping->i_private_lock);
- return 0;
- }
-
- if (eb == ctx->eb) {
- spin_unlock(&mapping->i_private_lock);
- return 0;
- }
- ret = atomic_inc_not_zero(&eb->refs);
- spin_unlock(&mapping->i_private_lock);
- if (!ret)
- return 0;
- ctx->eb = eb;
-
- ret = btrfs_check_meta_write_pointer(eb->fs_info, ctx);
- if (ret) {
- if (ret == -EBUSY)
- ret = 0;
- free_extent_buffer(eb);
- return ret;
- }
-
- if (!lock_extent_buffer_for_io(eb, wbc)) {
- free_extent_buffer(eb);
- return 0;
- }
- /* Implies write in zoned mode. */
- if (ctx->zoned_bg) {
- /* Mark the last eb in the block group. */
- btrfs_schedule_zone_finish_bg(ctx->zoned_bg, eb);
- ctx->zoned_bg->meta_write_pointer += eb->len;
+ while ((eb = eb_batch_next(&batch)) != NULL)
+ wait_on_extent_buffer_writeback(eb);
+ eb_batch_release(&batch);
+ cond_resched();
}
- write_one_eb(eb, wbc);
- free_extent_buffer(eb);
- return 1;
}
int btree_write_cache_pages(struct address_space *mapping,
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
- struct folio_batch fbatch;
- unsigned int nr_folios;
- pgoff_t index;
- pgoff_t end; /* Inclusive */
+ struct eb_batch batch;
+ unsigned int nr_ebs;
+ unsigned long index;
+ unsigned long end;
int scanned = 0;
xa_mark_t tag;
- folio_batch_init(&fbatch);
+ eb_batch_init(&batch);
if (wbc->range_cyclic) {
- index = mapping->writeback_index; /* Start from prev offset */
+ index = ((mapping->writeback_index << PAGE_SHIFT) >> fs_info->sectorsize_bits);
end = -1;
+
/*
* Start from the beginning does not need to cycle over the
* range, mark it as scanned.
*/
scanned = (index == 0);
} else {
- index = wbc->range_start >> PAGE_SHIFT;
- end = wbc->range_end >> PAGE_SHIFT;
+ index = (wbc->range_start >> fs_info->sectorsize_bits);
+ end = (wbc->range_end >> fs_info->sectorsize_bits);
+
scanned = 1;
}
if (wbc->sync_mode == WB_SYNC_ALL)
btrfs_zoned_meta_io_lock(fs_info);
retry:
if (wbc->sync_mode == WB_SYNC_ALL)
- tag_pages_for_writeback(mapping, index, end);
+ buffer_tree_tag_for_writeback(fs_info, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_folios = filemap_get_folios_tag(mapping, &index, end,
- tag, &fbatch))) {
- unsigned i;
+ (nr_ebs = buffer_tree_get_ebs_tag(fs_info, &index, end, tag, &batch))) {
+ struct extent_buffer *eb;
- for (i = 0; i < nr_folios; i++) {
- struct folio *folio = fbatch.folios[i];
+ while ((eb = eb_batch_next(&batch)) != NULL) {
+ ctx.eb = eb;
+
+ ret = btrfs_check_meta_write_pointer(eb->fs_info, &ctx);
+ if (ret) {
+ if (ret == -EBUSY)
+ ret = 0;
- ret = submit_eb_page(folio, &ctx);
- if (ret == 0)
+ if (ret) {
+ done = 1;
+ break;
+ }
+ free_extent_buffer(eb);
continue;
- if (ret < 0) {
- done = 1;
- break;
}
- /*
- * the filesystem may choose to bump up nr_to_write.
- * We have to make sure to honor the new nr_to_write
- * at any time
- */
- nr_to_write_done = wbc->nr_to_write <= 0;
+ if (!lock_extent_buffer_for_io(eb, wbc))
+ continue;
+
+ /* Implies write in zoned mode. */
+ if (ctx.zoned_bg) {
+ /* Mark the last eb in the block group. */
+ btrfs_schedule_zone_finish_bg(ctx.zoned_bg, eb);
+ ctx.zoned_bg->meta_write_pointer += eb->len;
+ }
+ write_one_eb(eb, wbc);
}
- folio_batch_release(&fbatch);
+ nr_to_write_done = (wbc->nr_to_write <= 0);
+ eb_batch_release(&batch);
cond_resched();
}
if (!scanned && !done) {