]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
s390/uv: Split large folios in gmap_make_secure()
authorDavid Hildenbrand <david@redhat.com>
Wed, 8 May 2024 18:29:48 +0000 (20:29 +0200)
committerAlexander Gordeev <agordeev@linux.ibm.com>
Wed, 5 Jun 2024 15:17:24 +0000 (17:17 +0200)
While s390x makes sure to never have PMD-mapped THP in processes that use
KVM -- by remapping them using PTEs in
thp_split_walk_pmd_entry()->split_huge_pmd() -- there is still the
possibility of having PTE-mapped THPs (large folios) mapped into guest
memory.

This would happen if user space allocates memory before calling
KVM_CREATE_VM (which would call s390_enable_sie()). With upstream QEMU,
this currently doesn't happen, because guest memory is setup and
conditionally preallocated after KVM_CREATE_VM.

Could it happen with shmem/file-backed memory when another process
allocated memory in the pagecache? Likely, although currently not a
common setup.

Trying to split any PTE-mapped large folios sounds like the right and
future-proof thing to do here. So let's call split_folio() and handle the
return values accordingly.

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20240508182955.358628-4-david@redhat.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
arch/s390/kernel/uv.c

index 25fe28d189df088eb1863580db5c8edd0c3fc789..3c6d86e3e8285b6c73c905872c2bc1c9d7b876f4 100644 (file)
@@ -338,11 +338,10 @@ again:
                goto out;
        if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
                folio = page_folio(pte_page(*ptep));
-               rc = -EINVAL;
-               if (folio_test_large(folio))
-                       goto unlock;
                rc = -EAGAIN;
-               if (folio_trylock(folio)) {
+               if (folio_test_large(folio)) {
+                       rc = -E2BIG;
+               } else if (folio_trylock(folio)) {
                        if (should_export_before_import(uvcb, gmap->mm))
                                uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
                        rc = make_folio_secure(folio, uvcb);
@@ -353,15 +352,35 @@ again:
                 * Once we drop the PTL, the folio may get unmapped and
                 * freed immediately. We need a temporary reference.
                 */
-               if (rc == -EAGAIN)
+               if (rc == -EAGAIN || rc == -E2BIG)
                        folio_get(folio);
        }
-unlock:
        pte_unmap_unlock(ptep, ptelock);
 out:
        mmap_read_unlock(gmap->mm);
 
        switch (rc) {
+       case -E2BIG:
+               folio_lock(folio);
+               rc = split_folio(folio);
+               folio_unlock(folio);
+               folio_put(folio);
+
+               switch (rc) {
+               case 0:
+                       /* Splitting succeeded, try again immediately. */
+                       goto again;
+               case -EAGAIN:
+                       /* Additional folio references. */
+                       if (drain_lru(&drain_lru_called))
+                               goto again;
+                       return -EAGAIN;
+               case -EBUSY:
+                       /* Unexpected race. */
+                       return -EAGAIN;
+               }
+               WARN_ON_ONCE(1);
+               return -ENXIO;
        case -EAGAIN:
                /*
                 * If we are here because the UVC returned busy or partial