From ef7e7cbb323f8a39381a2d4bb8392712bf3cf7ae Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 15 Jul 2025 13:21:19 -0700 Subject: [PATCH] fuse: use iomap for writeback Use iomap for dirty folio writeback in ->writepages(). This allows for granular dirty writeback of large folios. Only the dirty portions of the large folio will be written instead of having to write out the entire folio. For example if there is a 1 MB large folio and only 2 bytes in it are dirty, only the page for those dirty bytes will be written out. .dirty_folio needs to be set to iomap_dirty_folio so that the bitmap iomap uses for dirty tracking correctly reflects dirty regions that need to be written back. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/20250715202122.2282532-3-joannelkoong@gmail.com Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/fuse/file.c | 133 ++++++++++++++++++++++++++++++------------------- 1 file changed, 82 insertions(+), 51 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ee6b7c7adba1b..3955eb7175a2c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1833,7 +1833,7 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa) * scope of the fi->lock alleviates xarray lock * contention and noticeably improves performance. */ - folio_end_writeback(ap->folios[i]); + iomap_finish_folio_write(inode, ap->folios[i], 1); dec_wb_stat(&bdi->wb, WB_WRITEBACK); wb_writeout_inc(&bdi->wb); } @@ -2020,19 +2020,20 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc, } static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio, - uint32_t folio_index) + uint32_t folio_index, loff_t offset, unsigned len) { struct inode *inode = folio->mapping->host; struct fuse_args_pages *ap = &wpa->ia.ap; ap->folios[folio_index] = folio; - ap->descs[folio_index].offset = 0; - ap->descs[folio_index].length = folio_size(folio); + ap->descs[folio_index].offset = offset; + ap->descs[folio_index].length = len; inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); } static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio, + size_t offset, struct fuse_file *ff) { struct inode *inode = folio->mapping->host; @@ -2045,7 +2046,7 @@ static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio return NULL; fuse_writepage_add_to_bucket(fc, wpa); - fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio), 0); + fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio) + offset, 0); wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; wpa->inode = inode; wpa->ia.ff = ff; @@ -2071,7 +2072,7 @@ static int fuse_writepage_locked(struct folio *folio) if (!ff) goto err; - wpa = fuse_writepage_args_setup(folio, ff); + wpa = fuse_writepage_args_setup(folio, 0, ff); error = -ENOMEM; if (!wpa) goto err_writepage_args; @@ -2080,7 +2081,7 @@ static int fuse_writepage_locked(struct folio *folio) ap->num_folios = 1; folio_start_writeback(folio); - fuse_writepage_args_page_fill(wpa, folio, 0); + fuse_writepage_args_page_fill(wpa, folio, 0, 0, folio_size(folio)); spin_lock(&fi->lock); list_add_tail(&wpa->queue_entry, &fi->queued_writes); @@ -2101,7 +2102,12 @@ struct fuse_fill_wb_data { struct fuse_file *ff; struct inode *inode; unsigned int max_folios; - unsigned int nr_pages; + /* + * nr_bytes won't overflow since fuse_writepage_need_send() caps + * wb requests to never exceed fc->max_pages (which has an upper bound + * of U16_MAX). + */ + unsigned int nr_bytes; }; static bool fuse_pages_realloc(struct fuse_fill_wb_data *data) @@ -2142,22 +2148,30 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) spin_unlock(&fi->lock); } -static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio, - struct fuse_args_pages *ap, +static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos, + unsigned len, struct fuse_args_pages *ap, struct fuse_fill_wb_data *data) { + struct folio *prev_folio; + struct fuse_folio_desc prev_desc; + unsigned bytes = data->nr_bytes + len; + loff_t prev_pos; + WARN_ON(!ap->num_folios); /* Reached max pages */ - if (data->nr_pages + folio_nr_pages(folio) > fc->max_pages) + if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages) return true; /* Reached max write bytes */ - if ((data->nr_pages * PAGE_SIZE) + folio_size(folio) > fc->max_write) + if (bytes > fc->max_write) return true; /* Discontinuity */ - if (folio_next_index(ap->folios[ap->num_folios - 1]) != folio->index) + prev_folio = ap->folios[ap->num_folios - 1]; + prev_desc = ap->descs[ap->num_folios - 1]; + prev_pos = folio_pos(prev_folio) + prev_desc.offset + prev_desc.length; + if (prev_pos != pos) return true; /* Need to grow the pages array? If so, did the expansion fail? */ @@ -2167,85 +2181,102 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio, return false; } -static int fuse_writepages_fill(struct folio *folio, - struct writeback_control *wbc, void *_data) +static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 pos, + unsigned len, u64 end_pos) { - struct fuse_fill_wb_data *data = _data; + struct fuse_fill_wb_data *data = wpc->wb_ctx; struct fuse_writepage_args *wpa = data->wpa; struct fuse_args_pages *ap = &wpa->ia.ap; struct inode *inode = data->inode; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); - int err; + loff_t offset = offset_in_folio(folio, pos); + + WARN_ON_ONCE(!data); + /* len will always be page aligned */ + WARN_ON_ONCE(len & (PAGE_SIZE - 1)); if (!data->ff) { - err = -EIO; data->ff = fuse_write_file_get(fi); if (!data->ff) - goto out_unlock; + return -EIO; } - if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) { + if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) { fuse_writepages_send(data); data->wpa = NULL; - data->nr_pages = 0; + data->nr_bytes = 0; } if (data->wpa == NULL) { - err = -ENOMEM; - wpa = fuse_writepage_args_setup(folio, data->ff); + wpa = fuse_writepage_args_setup(folio, offset, data->ff); if (!wpa) - goto out_unlock; + return -ENOMEM; fuse_file_get(wpa->ia.ff); data->max_folios = 1; ap = &wpa->ia.ap; } - folio_start_writeback(folio); - fuse_writepage_args_page_fill(wpa, folio, ap->num_folios); - data->nr_pages += folio_nr_pages(folio); + iomap_start_folio_write(inode, folio, 1); + fuse_writepage_args_page_fill(wpa, folio, ap->num_folios, + offset, len); + data->nr_bytes += len; - err = 0; ap->num_folios++; if (!data->wpa) data->wpa = wpa; -out_unlock: - folio_unlock(folio); - return err; + return len; +} + +static int fuse_iomap_writeback_submit(struct iomap_writepage_ctx *wpc, + int error) +{ + struct fuse_fill_wb_data *data = wpc->wb_ctx; + + WARN_ON_ONCE(!data); + + if (data->wpa) { + WARN_ON(!data->wpa->ia.ap.num_folios); + fuse_writepages_send(data); + } + + if (data->ff) + fuse_file_put(data->ff, false); + + return error; } +static const struct iomap_writeback_ops fuse_writeback_ops = { + .writeback_range = fuse_iomap_writeback_range, + .writeback_submit = fuse_iomap_writeback_submit, +}; + static int fuse_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_fill_wb_data data; - int err; + struct fuse_fill_wb_data data = { + .inode = inode, + }; + struct iomap_writepage_ctx wpc = { + .inode = inode, + .iomap.type = IOMAP_MAPPED, + .wbc = wbc, + .ops = &fuse_writeback_ops, + .wb_ctx = &data, + }; - err = -EIO; if (fuse_is_bad(inode)) - goto out; + return -EIO; if (wbc->sync_mode == WB_SYNC_NONE && fc->num_background >= fc->congestion_threshold) return 0; - data.inode = inode; - data.wpa = NULL; - data.ff = NULL; - data.nr_pages = 0; - - err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); - if (data.wpa) { - WARN_ON(!data.wpa->ia.ap.num_folios); - fuse_writepages_send(&data); - } - if (data.ff) - fuse_file_put(data.ff, false); - -out: - return err; + return iomap_writepages(&wpc); } static int fuse_launder_folio(struct folio *folio) @@ -3105,7 +3136,7 @@ static const struct address_space_operations fuse_file_aops = { .readahead = fuse_readahead, .writepages = fuse_writepages, .launder_folio = fuse_launder_folio, - .dirty_folio = filemap_dirty_folio, + .dirty_folio = iomap_dirty_folio, .release_folio = iomap_release_folio, .migrate_folio = filemap_migrate_folio, .bmap = fuse_bmap, -- 2.47.2