]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
KVM: guest_memfd: Add plumbing to host to map guest_memfd pages
authorFuad Tabba <tabba@google.com>
Tue, 29 Jul 2025 22:54:41 +0000 (15:54 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 27 Aug 2025 08:35:00 +0000 (04:35 -0400)
Introduce the core infrastructure to enable host userspace to mmap()
guest_memfd-backed memory. This is needed for several evolving KVM use
cases:

* Non-CoCo VM backing: Allows VMMs like Firecracker to run guests
  entirely backed by guest_memfd, even for non-CoCo VMs [1]. This
  provides a unified memory management model and simplifies guest memory
  handling.

* Direct map removal for enhanced security: This is an important step
  for direct map removal of guest memory [2]. By allowing host userspace
  to fault in guest_memfd pages directly, we can avoid maintaining host
  kernel direct maps of guest memory. This provides additional hardening
  against Spectre-like transient execution attacks by removing a
  potential attack surface within the kernel.

* Future guest_memfd features: This also lays the groundwork for future
  enhancements to guest_memfd, such as supporting huge pages and
  enabling in-place sharing of guest memory with the host for CoCo
  platforms that permit it [3].

Enable the basic mmap and fault handling logic within guest_memfd, but
hold off on allow userspace to actually do mmap() until the architecture
support is also in place.

[1] https://github.com/firecracker-microvm/firecracker/tree/feature/secret-hiding
[2] https://lore.kernel.org/linux-mm/cc1bb8e9bc3e1ab637700a4d3defeec95b55060a.camel@amazon.com
[3] https://lore.kernel.org/all/c1c9591d-218a-495c-957b-ba356c8f8e09@redhat.com/T/#u

Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shivank Garg <shivankg@amd.com>
Acked-by: David Hildenbrand <david@redhat.com>
Co-developed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-11-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/x86.c
include/linux/kvm_host.h
virt/kvm/guest_memfd.c

index 604490b1cb19c7492f0a4241a0a0011c24fcc6bc..33fba801b205d9e7b0311ca34312e2fa40227674 100644 (file)
@@ -13521,6 +13521,16 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
 
+#ifdef CONFIG_KVM_GUEST_MEMFD
+/*
+ * KVM doesn't yet support mmap() on guest_memfd for VMs with private memory
+ * (the private vs. shared tracking needs to be moved into guest_memfd).
+ */
+bool kvm_arch_supports_gmem_mmap(struct kvm *kvm)
+{
+       return !kvm_arch_has_private_mem(kvm);
+}
+
 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
 {
@@ -13534,6 +13544,7 @@ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
        kvm_x86_call(gmem_invalidate)(start, end);
 }
 #endif
+#endif
 
 int kvm_spec_ctrl_test_value(u64 value)
 {
index 4d1c44622056c1179f9d0d1ec3f3c331162e56b7..26bad600f9fa32714b505de41079fa0067d26fff 100644 (file)
@@ -726,6 +726,10 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 }
 #endif
 
+#ifdef CONFIG_KVM_GUEST_MEMFD
+bool kvm_arch_supports_gmem_mmap(struct kvm *kvm);
+#endif
+
 #ifndef kvm_arch_has_readonly_mem
 static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
 {
index a99e11b8b77f0845c59a9d12a4b842f41a8d1f6a..67e7cd7210ef63884921181a3123cbdf2b7144ca 100644 (file)
@@ -312,7 +312,72 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
        return gfn - slot->base_gfn + slot->gmem.pgoff;
 }
 
+static bool kvm_gmem_supports_mmap(struct inode *inode)
+{
+       return false;
+}
+
+static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vmf->vma->vm_file);
+       struct folio *folio;
+       vm_fault_t ret = VM_FAULT_LOCKED;
+
+       if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
+               return VM_FAULT_SIGBUS;
+
+       folio = kvm_gmem_get_folio(inode, vmf->pgoff);
+       if (IS_ERR(folio)) {
+               int err = PTR_ERR(folio);
+
+               if (err == -EAGAIN)
+                       return VM_FAULT_RETRY;
+
+               return vmf_error(err);
+       }
+
+       if (WARN_ON_ONCE(folio_test_large(folio))) {
+               ret = VM_FAULT_SIGBUS;
+               goto out_folio;
+       }
+
+       if (!folio_test_uptodate(folio)) {
+               clear_highpage(folio_page(folio, 0));
+               kvm_gmem_mark_prepared(folio);
+       }
+
+       vmf->page = folio_file_page(folio, vmf->pgoff);
+
+out_folio:
+       if (ret != VM_FAULT_LOCKED) {
+               folio_unlock(folio);
+               folio_put(folio);
+       }
+
+       return ret;
+}
+
+static const struct vm_operations_struct kvm_gmem_vm_ops = {
+       .fault = kvm_gmem_fault_user_mapping,
+};
+
+static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       if (!kvm_gmem_supports_mmap(file_inode(file)))
+               return -ENODEV;
+
+       if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
+           (VM_SHARED | VM_MAYSHARE)) {
+               return -EINVAL;
+       }
+
+       vma->vm_ops = &kvm_gmem_vm_ops;
+
+       return 0;
+}
+
 static struct file_operations kvm_gmem_fops = {
+       .mmap           = kvm_gmem_mmap,
        .open           = generic_file_open,
        .release        = kvm_gmem_release,
        .fallocate      = kvm_gmem_fallocate,
@@ -391,6 +456,11 @@ static const struct inode_operations kvm_gmem_iops = {
        .setattr        = kvm_gmem_setattr,
 };
 
+bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
+{
+       return true;
+}
+
 static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
 {
        const char *anon_name = "[kvm-gmem]";