]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
KVM: arm64: Block cacheable PFNMAP mapping
authorAnkit Agrawal <ankita@nvidia.com>
Sat, 5 Jul 2025 07:17:14 +0000 (07:17 +0000)
committerOliver Upton <oliver.upton@linux.dev>
Mon, 7 Jul 2025 23:43:27 +0000 (16:43 -0700)
Fixes a security bug due to mismatched attributes between S1 and
S2 mapping.

Currently, it is possible for a region to be cacheable in the userspace
VMA, but mapped non cached in S2. This creates a potential issue where
the VMM may sanitize cacheable memory across VMs using cacheable stores,
ensuring it is zeroed. However, if KVM subsequently assigns this memory
to a VM as uncached, the VM could end up accessing stale, non-zeroed data
from a previous VM, leading to unintended data exposure. This is a security
risk.

Block such mismatch attributes case by returning EINVAL when userspace
try to map PFNMAP cacheable. Only allow NORMAL_NC and DEVICE_*.

CC: Oliver Upton <oliver.upton@linux.dev>
CC: Catalin Marinas <catalin.marinas@arm.com>
CC: Sean Christopherson <seanjc@google.com>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Tested-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20250705071717.5062-4-ankita@nvidia.com
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
arch/arm64/kvm/mmu.c

index 5fe24f30999d3f891444c1984f4e3dad82d6206e..708a635e38bc7a990d5b72bbca424de1065ddf4f 100644 (file)
@@ -1465,6 +1465,18 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
        return vma->vm_flags & VM_MTE_ALLOWED;
 }
 
+static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
+{
+       switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
+       case MT_NORMAL_NC:
+       case MT_DEVICE_nGnRnE:
+       case MT_DEVICE_nGnRE:
+               return false;
+       default:
+               return true;
+       }
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          struct kvm_s2_trans *nested,
                          struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1472,7 +1484,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 {
        int ret = 0;
        bool write_fault, writable, force_pte = false;
-       bool exec_fault, mte_allowed;
+       bool exec_fault, mte_allowed, is_vma_cacheable;
        bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
        unsigned long mmu_seq;
        phys_addr_t ipa = fault_ipa;
@@ -1617,6 +1629,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
        vm_flags = vma->vm_flags;
 
+       is_vma_cacheable = kvm_vma_is_cacheable(vma);
+
        /* Don't use the VMA after the unlock -- it may have vanished */
        vma = NULL;
 
@@ -1660,6 +1674,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                writable = false;
        }
 
+       /*
+        * Prevent non-cacheable mappings in the stage-2 if a region of memory
+        * is cacheable in the primary MMU and the kernel lacks a cacheable
+        * alias. KVM cannot guarantee coherency between the guest/host aliases
+        * without the ability to perform CMOs.
+        */
+       if (is_vma_cacheable && s2_force_noncacheable)
+               return -EINVAL;
+
        if (exec_fault && s2_force_noncacheable)
                return -ENOEXEC;
 
@@ -2219,6 +2242,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                ret = -EINVAL;
                                break;
                        }
+
+                       /* Cacheable PFNMAP is not allowed */
+                       if (kvm_vma_is_cacheable(vma)) {
+                               ret = -EINVAL;
+                               break;
+                       }
                }
                hva = min(reg_end, vma->vm_end);
        } while (hva < reg_end);