From: Matthew R. Ochs Date: Tue, 26 May 2026 15:20:21 +0000 (-0700) Subject: fuse: back uncached readdir buffers with pages X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4dd6f6d3085a84e74b0a1efec3a05ed0b5125dce;p=thirdparty%2Fkernel%2Flinux.git fuse: back uncached readdir buffers with pages Commit dabb90391028 ("fuse: increase readdir buffer size") changed fuse_readdir_uncached() to size its temporary buffer from ctx->count. This is useful for overlayfs and other in-kernel callers that use INT_MAX to indicate an unlimited directory read. The larger buffer is currently supplied as a kvec output argument. For virtiofs, kvec arguments are copied through req->argbuf, which is allocated with kmalloc(..., GFP_ATOMIC). A large uncached readdir buffer can therefore require a multi-megabyte contiguous atomic allocation before the request is queued. Avoid the large bounce-buffer allocation by backing uncached readdir output with pages and setting out_pages. Transports such as virtiofs can then pass the pages as scatter-gather entries instead of copying the output through argbuf. Map the pages with vm_map_ram() only while parsing the returned dirents. The existing parser can then continue to use a linear kernel mapping. [SzM: separate allocation of pages into a helper function] Fixes: dabb90391028 ("fuse: increase readdir buffer size") Cc: stable@vger.kernel.org Signed-off-by: Matthew R. Ochs Signed-off-by: Miklos Szeredi --- diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index db5ae8ec10305..c38139225a2e3 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -12,6 +12,7 @@ #include #include #include +#include static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) { @@ -335,6 +336,43 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, return 0; } +static struct page **fuse_readdir_alloc_buf(struct fuse_args_pages *ap, size_t *bufsize) +{ + unsigned int i, nr_alloc, nr_pages = DIV_ROUND_UP(*bufsize, PAGE_SIZE); + struct page **pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); + + if (!pages) + return NULL; + + nr_alloc = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); + if (!nr_alloc) + goto free_array; + + if (nr_alloc < nr_pages) { + nr_pages = nr_alloc; + *bufsize = (size_t) nr_pages << PAGE_SHIFT; + } + + ap->folios = fuse_folios_alloc(nr_pages, GFP_KERNEL, &ap->descs); + if (!ap->folios) + goto release_pages; + + for (i = 0; i < nr_pages; i++) { + ap->folios[i] = page_folio(pages[i]); + ap->descs[i].length = min_t(size_t, *bufsize - (size_t)i * PAGE_SIZE, PAGE_SIZE); + } + ap->num_folios = nr_pages; + ap->args.out_pages = true; + + return pages; + +release_pages: + release_pages(pages, nr_pages); +free_array: + kfree(pages); + return NULL; +} + static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) { int plus; @@ -343,18 +381,16 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_conn *fc = fm->fc; struct fuse_io_args ia = {}; - struct fuse_args *args = &ia.ap.args; + struct fuse_args_pages *ap = &ia.ap; void *buf; size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT); u64 attr_version = 0, evict_ctr = 0; bool locked; + struct page **pages = fuse_readdir_alloc_buf(ap, &bufsize); - buf = kvmalloc(bufsize, GFP_KERNEL); - if (!buf) + if (!pages) return -ENOMEM; - args->out_args[0].value = buf; - plus = fuse_use_readdirplus(inode, ctx); if (plus) { attr_version = fuse_get_attr_version(fm->fc); @@ -364,24 +400,37 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR); } locked = fuse_lock_inode(inode); - res = fuse_simple_request(fm, args); + res = fuse_simple_request(fm, &ap->args); fuse_unlock_inode(inode, locked); - if (res >= 0) { - if (!res) { - struct fuse_file *ff = file->private_data; - - if (ff->open_flags & FOPEN_CACHE_DIR) - fuse_readdir_cache_end(file, ctx->pos); - } else if (plus) { - res = parse_dirplusfile(buf, res, file, ctx, attr_version, - evict_ctr); - } else { + if (res < 0) + goto out; + + if (!res) { + struct fuse_file *ff = file->private_data; + + if (ff->open_flags & FOPEN_CACHE_DIR) + fuse_readdir_cache_end(file, ctx->pos); + goto out; + } + + buf = vm_map_ram(pages, ap->num_folios, -1); + if (!buf) { + res = -ENOMEM; + } else { + if (plus) + res = parse_dirplusfile(buf, res, file, ctx, attr_version, evict_ctr); + else res = parse_dirfile(buf, res, file, ctx); - } + + vm_unmap_ram(buf, ap->num_folios); } +out: + kfree(ap->folios); + release_pages(pages, ap->num_folios); + kfree(pages); - kvfree(buf); fuse_invalidate_atime(inode); + return res; }