]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mshv: Fix huge page handling in memory region traversal
authorStanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Wed, 3 Dec 2025 21:40:56 +0000 (21:40 +0000)
committerWei Liu <wei.liu@kernel.org>
Fri, 5 Dec 2025 23:20:25 +0000 (23:20 +0000)
The previous code assumed that if a region's first page was huge, the
entire region consisted of huge pages and stored this in a large_pages
flag. This premise is incorrect not only for movable regions (where
pages can be split and merged on invalidate callbacks or page faults),
but even for pinned regions: THPs can be split and merged during
allocation, so a large, pinned region may contain a mix of huge and
regular pages.

This change removes the large_pages flag and replaces region-wide
assumptions with per-chunk inspection of the actual page size when
mapping, unmapping, sharing, and unsharing. This makes huge page
handling correct for mixed-page regions and avoids relying on stale
metadata that can easily become invalid as memory is remapped.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Reviewed-by: Anirudh Rayabharam (Microsoft) <anirudh@anirudhrb.com>
Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
drivers/hv/mshv_regions.c
drivers/hv/mshv_root.h

index 35b866670840ed20900307db0b2b076d0c3086ed..40126c12ab335d6db9be145b9c9f134c83bb5549 100644 (file)
 
 #include "mshv_root.h"
 
+/**
+ * mshv_region_process_chunk - Processes a contiguous chunk of memory pages
+ *                             in a region.
+ * @region     : Pointer to the memory region structure.
+ * @flags      : Flags to pass to the handler.
+ * @page_offset: Offset into the region's pages array to start processing.
+ * @page_count : Number of pages to process.
+ * @handler    : Callback function to handle the chunk.
+ *
+ * This function scans the region's pages starting from @page_offset,
+ * checking for contiguous present pages of the same size (normal or huge).
+ * It invokes @handler for the chunk of contiguous pages found. Returns the
+ * number of pages handled, or a negative error code if the first page is
+ * not present or the handler fails.
+ *
+ * Note: The @handler callback must be able to handle both normal and huge
+ * pages.
+ *
+ * Return: Number of pages handled, or negative error code.
+ */
+static long mshv_region_process_chunk(struct mshv_mem_region *region,
+                                     u32 flags,
+                                     u64 page_offset, u64 page_count,
+                                     int (*handler)(struct mshv_mem_region *region,
+                                                    u32 flags,
+                                                    u64 page_offset,
+                                                    u64 page_count))
+{
+       u64 count, stride;
+       unsigned int page_order;
+       struct page *page;
+       int ret;
+
+       page = region->pages[page_offset];
+       if (!page)
+               return -EINVAL;
+
+       page_order = folio_order(page_folio(page));
+       /* The hypervisor only supports 4K and 2M page sizes */
+       if (page_order && page_order != HPAGE_PMD_ORDER)
+               return -EINVAL;
+
+       stride = 1 << page_order;
+
+       /* Start at stride since the first page is validated */
+       for (count = stride; count < page_count; count += stride) {
+               page = region->pages[page_offset + count];
+
+               /* Break if current page is not present */
+               if (!page)
+                       break;
+
+               /* Break if page size changes */
+               if (page_order != folio_order(page_folio(page)))
+                       break;
+       }
+
+       ret = handler(region, flags, page_offset, count);
+       if (ret)
+               return ret;
+
+       return count;
+}
+
+/**
+ * mshv_region_process_range - Processes a range of memory pages in a
+ *                             region.
+ * @region     : Pointer to the memory region structure.
+ * @flags      : Flags to pass to the handler.
+ * @page_offset: Offset into the region's pages array to start processing.
+ * @page_count : Number of pages to process.
+ * @handler    : Callback function to handle each chunk of contiguous
+ *               pages.
+ *
+ * Iterates over the specified range of pages in @region, skipping
+ * non-present pages. For each contiguous chunk of present pages, invokes
+ * @handler via mshv_region_process_chunk.
+ *
+ * Note: The @handler callback must be able to handle both normal and huge
+ * pages.
+ *
+ * Returns 0 on success, or a negative error code on failure.
+ */
+static int mshv_region_process_range(struct mshv_mem_region *region,
+                                    u32 flags,
+                                    u64 page_offset, u64 page_count,
+                                    int (*handler)(struct mshv_mem_region *region,
+                                                   u32 flags,
+                                                   u64 page_offset,
+                                                   u64 page_count))
+{
+       long ret;
+
+       if (page_offset + page_count > region->nr_pages)
+               return -EINVAL;
+
+       while (page_count) {
+               /* Skip non-present pages */
+               if (!region->pages[page_offset]) {
+                       page_offset++;
+                       page_count--;
+                       continue;
+               }
+
+               ret = mshv_region_process_chunk(region, flags,
+                                               page_offset,
+                                               page_count,
+                                               handler);
+               if (ret < 0)
+                       return ret;
+
+               page_offset += ret;
+               page_count -= ret;
+       }
+
+       return 0;
+}
+
 struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
                                           u64 uaddr, u32 flags,
                                           bool is_mmio)
@@ -33,55 +151,86 @@ struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
        if (flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE))
                region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE;
 
-       /* Note: large_pages flag populated when we pin the pages */
        if (!is_mmio)
                region->flags.range_pinned = true;
 
        return region;
 }
 
+static int mshv_region_chunk_share(struct mshv_mem_region *region,
+                                  u32 flags,
+                                  u64 page_offset, u64 page_count)
+{
+       struct page *page = region->pages[page_offset];
+
+       if (PageHuge(page) || PageTransCompound(page))
+               flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
+
+       return hv_call_modify_spa_host_access(region->partition->pt_id,
+                                             region->pages + page_offset,
+                                             page_count,
+                                             HV_MAP_GPA_READABLE |
+                                             HV_MAP_GPA_WRITABLE,
+                                             flags, true);
+}
+
 int mshv_region_share(struct mshv_mem_region *region)
 {
        u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED;
 
-       if (region->flags.large_pages)
+       return mshv_region_process_range(region, flags,
+                                        0, region->nr_pages,
+                                        mshv_region_chunk_share);
+}
+
+static int mshv_region_chunk_unshare(struct mshv_mem_region *region,
+                                    u32 flags,
+                                    u64 page_offset, u64 page_count)
+{
+       struct page *page = region->pages[page_offset];
+
+       if (PageHuge(page) || PageTransCompound(page))
                flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
 
        return hv_call_modify_spa_host_access(region->partition->pt_id,
-                       region->pages, region->nr_pages,
-                       HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE,
-                       flags, true);
+                                             region->pages + page_offset,
+                                             page_count, 0,
+                                             flags, false);
 }
 
 int mshv_region_unshare(struct mshv_mem_region *region)
 {
        u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE;
 
-       if (region->flags.large_pages)
-               flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
-
-       return hv_call_modify_spa_host_access(region->partition->pt_id,
-                       region->pages, region->nr_pages,
-                       0,
-                       flags, false);
+       return mshv_region_process_range(region, flags,
+                                        0, region->nr_pages,
+                                        mshv_region_chunk_unshare);
 }
 
-static int mshv_region_remap_pages(struct mshv_mem_region *region,
-                                  u32 map_flags,
+static int mshv_region_chunk_remap(struct mshv_mem_region *region,
+                                  u32 flags,
                                   u64 page_offset, u64 page_count)
 {
-       if (page_offset + page_count > region->nr_pages)
-               return -EINVAL;
+       struct page *page = region->pages[page_offset];
 
-       if (region->flags.large_pages)
-               map_flags |= HV_MAP_GPA_LARGE_PAGE;
+       if (PageHuge(page) || PageTransCompound(page))
+               flags |= HV_MAP_GPA_LARGE_PAGE;
 
        return hv_call_map_gpa_pages(region->partition->pt_id,
                                     region->start_gfn + page_offset,
-                                    page_count, map_flags,
+                                    page_count, flags,
                                     region->pages + page_offset);
 }
 
+static int mshv_region_remap_pages(struct mshv_mem_region *region,
+                                  u32 map_flags,
+                                  u64 page_offset, u64 page_count)
+{
+       return mshv_region_process_range(region, map_flags,
+                                        page_offset, page_count,
+                                        mshv_region_chunk_remap);
+}
+
 int mshv_region_map(struct mshv_mem_region *region)
 {
        u32 map_flags = region->hv_map_flags;
@@ -134,9 +283,6 @@ int mshv_region_pin(struct mshv_mem_region *region)
                        goto release_pages;
        }
 
-       if (PageHuge(region->pages[0]))
-               region->flags.large_pages = true;
-
        return 0;
 
 release_pages:
@@ -144,10 +290,30 @@ release_pages:
        return ret;
 }
 
+static int mshv_region_chunk_unmap(struct mshv_mem_region *region,
+                                  u32 flags,
+                                  u64 page_offset, u64 page_count)
+{
+       struct page *page = region->pages[page_offset];
+
+       if (PageHuge(page) || PageTransCompound(page))
+               flags |= HV_UNMAP_GPA_LARGE_PAGE;
+
+       return hv_call_unmap_gpa_pages(region->partition->pt_id,
+                                      region->start_gfn + page_offset,
+                                      page_count, flags);
+}
+
+static int mshv_region_unmap(struct mshv_mem_region *region)
+{
+       return mshv_region_process_range(region, 0,
+                                        0, region->nr_pages,
+                                        mshv_region_chunk_unmap);
+}
+
 void mshv_region_destroy(struct mshv_mem_region *region)
 {
        struct mshv_partition *partition = region->partition;
-       u32 unmap_flags = 0;
        int ret;
 
        hlist_del(&region->hnode);
@@ -162,12 +328,7 @@ void mshv_region_destroy(struct mshv_mem_region *region)
                }
        }
 
-       if (region->flags.large_pages)
-               unmap_flags |= HV_UNMAP_GPA_LARGE_PAGE;
-
-       /* ignore unmap failures and continue as process may be exiting */
-       hv_call_unmap_gpa_pages(partition->pt_id, region->start_gfn,
-                               region->nr_pages, unmap_flags);
+       mshv_region_unmap(region);
 
        mshv_region_invalidate(region);
 
index 0366f416c2f0da60dfd1f08d897455670f79e2d1..ff3374f13691da700954264c12838fc7c31b369e 100644 (file)
@@ -77,9 +77,8 @@ struct mshv_mem_region {
        u64 start_uaddr;
        u32 hv_map_flags;
        struct {
-               u64 large_pages:  1; /* 2MiB */
                u64 range_pinned: 1;
-               u64 reserved:    62;
+               u64 reserved:    63;
        } flags;
        struct mshv_partition *partition;
        struct page *pages[];