From: Mike Rapoport (Microsoft) Date: Thu, 2 Apr 2026 04:11:49 +0000 (+0300) Subject: shmem, userfaultfd: use a VMA callback to handle UFFDIO_CONTINUE X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=dfc4d771820a171bd701d06252fcf920d0ede25c;p=thirdparty%2Fkernel%2Flinux.git shmem, userfaultfd: use a VMA callback to handle UFFDIO_CONTINUE When userspace resolves a page fault in a shmem VMA with UFFDIO_CONTINUE it needs to get a folio that already exists in the pagecache backing that VMA. Instead of using shmem_get_folio() for that, add a get_folio_noalloc() method to 'struct vm_uffd_ops' that will return a folio if it exists in the VMA's pagecache at given pgoff. Implement get_folio_noalloc() method for shmem and slightly refactor userfaultfd's mfill_get_vma() and mfill_atomic_pte_continue() to support this new API. Link: https://lore.kernel.org/20260402041156.1377214-9-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: James Houghton Cc: Andrea Arcangeli Cc: Andrei Vagin Cc: Axel Rasmussen Cc: Baolin Wang Cc: David Hildenbrand (Arm) Cc: Harry Yoo Cc: Harry Yoo (Oracle) Cc: Hugh Dickins Cc: Liam Howlett Cc: Lorenzo Stoakes (Oracle) Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Nikita Kalyazin Cc: Oscar Salvador Cc: Paolo Bonzini Cc: Peter Xu Cc: Sean Christopherson Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: David Carlier Signed-off-by: Andrew Morton --- diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 6d445dbfe8ff..4bda632dae88 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -87,6 +87,13 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); struct vm_uffd_ops { /* Checks if a VMA can support userfaultfd */ bool (*can_userfault)(struct vm_area_struct *vma, vm_flags_t vm_flags); + /* + * Called to resolve UFFDIO_CONTINUE request. + * Should return the folio found at pgoff in the VMA's pagecache if it + * exists or ERR_PTR otherwise. + * The returned folio is locked and with reference held. + */ + struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t pgoff); }; /* A combined operation mode + behavior flags. */ diff --git a/mm/shmem.c b/mm/shmem.c index 389b2d76396e..ed07d0c03312 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3289,13 +3289,26 @@ out_unacct_blocks: return ret; } +static struct folio *shmem_get_folio_noalloc(struct inode *inode, pgoff_t pgoff) +{ + struct folio *folio; + int err; + + err = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); + if (err) + return ERR_PTR(err); + + return folio; +} + static bool shmem_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags) { return true; } static const struct vm_uffd_ops shmem_uffd_ops = { - .can_userfault = shmem_can_userfault, + .can_userfault = shmem_can_userfault, + .get_folio_noalloc = shmem_get_folio_noalloc, }; #endif /* CONFIG_USERFAULTFD */ diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 3a824e034a09..5b204c3ec986 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -191,6 +191,7 @@ static int mfill_get_vma(struct mfill_state *state) struct userfaultfd_ctx *ctx = state->ctx; uffd_flags_t flags = state->flags; struct vm_area_struct *dst_vma; + const struct vm_uffd_ops *ops; int err; /* @@ -232,10 +233,12 @@ static int mfill_get_vma(struct mfill_state *state) if (is_vm_hugetlb_page(dst_vma)) return 0; - if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) + ops = vma_uffd_ops(dst_vma); + if (!ops) goto out_unlock; - if (!vma_is_shmem(dst_vma) && - uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) + + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE) && + !ops->get_folio_noalloc) goto out_unlock; return 0; @@ -575,6 +578,7 @@ out: static int mfill_atomic_pte_continue(struct mfill_state *state) { struct vm_area_struct *dst_vma = state->vma; + const struct vm_uffd_ops *ops = vma_uffd_ops(dst_vma); unsigned long dst_addr = state->dst_addr; pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); struct inode *inode = file_inode(dst_vma->vm_file); @@ -584,17 +588,16 @@ static int mfill_atomic_pte_continue(struct mfill_state *state) struct page *page; int ret; - ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); - /* Our caller expects us to return -EFAULT if we failed to find folio */ - if (ret == -ENOENT) - ret = -EFAULT; - if (ret) - goto out; - if (!folio) { - ret = -EFAULT; - goto out; + if (!ops) { + VM_WARN_ONCE(1, "UFFDIO_CONTINUE for unsupported VMA"); + return -EOPNOTSUPP; } + folio = ops->get_folio_noalloc(inode, pgoff); + /* Our caller expects us to return -EFAULT if we failed to find folio */ + if (IS_ERR_OR_NULL(folio)) + return -EFAULT; + page = folio_file_page(folio, pgoff); if (PageHWPoison(page)) { ret = -EIO; @@ -607,13 +610,12 @@ static int mfill_atomic_pte_continue(struct mfill_state *state) goto out_release; folio_unlock(folio); - ret = 0; -out: - return ret; + return 0; + out_release: folio_unlock(folio); folio_put(folio); - goto out; + return ret; } /* Handles UFFDIO_POISON for all non-hugetlb VMAs. */