mm: shmem: add mTHP support for anonymous shmem

author Baolin Wang <baolin.wang@linux.alibaba.com>

Tue, 11 Jun 2024 10:11:08 +0000 (18:11 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Thu, 4 Jul 2024 02:30:04 +0000 (19:30 -0700)
author Baolin Wang <baolin.wang@linux.alibaba.com>
Tue, 11 Jun 2024 10:11:08 +0000 (18:11 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Thu, 4 Jul 2024 02:30:04 +0000 (19:30 -0700)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h

index 6a1edd75296880fa89c306879814a34edefa08e8..53b7a137f4603246e6406e6d37f98cd8672fcbc2 100644 (file)
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -560,6 +560,16 @@ static inline bool thp_migration_supported(void)
  {
         return false;
  }
+
+static inline int highest_order(unsigned long orders)
+{
+       return 0;
+}
+
+static inline int next_order(unsigned long *orders, int prev)
+{
+       return 0;
+}
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  
  static inline int split_folio_to_list_to_order(struct folio *folio,
diff --git a/mm/shmem.c b/mm/shmem.c

index d80608f9d6affbbfb1d109ee39236b5cf712a13e..8225b2f230ebb6a0bd93801df96d0fe0500e61ba 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1610,6 +1610,107 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
         return result;
  }
  
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static unsigned long shmem_allowable_huge_orders(struct inode *inode,
+                               struct vm_area_struct *vma, pgoff_t index,
+                               bool global_huge)
+{
+       unsigned long mask = READ_ONCE(huge_shmem_orders_always);
+       unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
+       unsigned long vm_flags = vma->vm_flags;
+       /*
+        * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that
+        * are enabled for this vma.
+        */
+       unsigned long orders = BIT(PMD_ORDER + 1) - 1;
+       loff_t i_size;
+       int order;
+
+       if ((vm_flags & VM_NOHUGEPAGE) ||
+           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+               return 0;
+
+       /* If the hardware/firmware marked hugepage support disabled. */
+       if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
+               return 0;
+
+       /*
+        * Following the 'deny' semantics of the top level, force the huge
+        * option off from all mounts.
+        */
+       if (shmem_huge == SHMEM_HUGE_DENY)
+               return 0;
+
+       /*
+        * Only allow inherit orders if the top-level value is 'force', which
+        * means non-PMD sized THP can not override 'huge' mount option now.
+        */
+       if (shmem_huge == SHMEM_HUGE_FORCE)
+               return READ_ONCE(huge_shmem_orders_inherit);
+
+       /* Allow mTHP that will be fully within i_size. */
+       order = highest_order(within_size_orders);
+       while (within_size_orders) {
+               index = round_up(index + 1, order);
+               i_size = round_up(i_size_read(inode), PAGE_SIZE);
+               if (i_size >> PAGE_SHIFT >= index) {
+                       mask |= within_size_orders;
+                       break;
+               }
+
+               order = next_order(&within_size_orders, order);
+       }
+
+       if (vm_flags & VM_HUGEPAGE)
+               mask |= READ_ONCE(huge_shmem_orders_madvise);
+
+       if (global_huge)
+               mask |= READ_ONCE(huge_shmem_orders_inherit);
+
+       return orders & mask;
+}
+
+static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
+                                          struct address_space *mapping, pgoff_t index,
+                                          unsigned long orders)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       unsigned long pages;
+       int order;
+
+       orders = thp_vma_suitable_orders(vma, vmf->address, orders);
+       if (!orders)
+               return 0;
+
+       /* Find the highest order that can add into the page cache */
+       order = highest_order(orders);
+       while (orders) {
+               pages = 1UL << order;
+               index = round_down(index, pages);
+               if (!xa_find(&mapping->i_pages, &index,
+                            index + pages - 1, XA_PRESENT))
+                       break;
+               order = next_order(&orders, order);
+       }
+
+       return orders;
+}
+#else
+static unsigned long shmem_allowable_huge_orders(struct inode *inode,
+                               struct vm_area_struct *vma, pgoff_t index,
+                               bool global_huge)
+{
+       return 0;
+}
+
+static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
+                                          struct address_space *mapping, pgoff_t index,
+                                          unsigned long orders)
+{
+       return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
  static struct folio *shmem_alloc_folio(gfp_t gfp, int order,
                 struct shmem_inode_info *info, pgoff_t index)
  {
@@ -1624,38 +1725,55 @@ static struct folio *shmem_alloc_folio(gfp_t gfp, int order,
         return folio;
  }
  
-static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
-               struct inode *inode, pgoff_t index,
-               struct mm_struct *fault_mm, bool huge)
+static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
+               gfp_t gfp, struct inode *inode, pgoff_t index,
+               struct mm_struct *fault_mm, unsigned long orders)
  {
         struct address_space *mapping = inode->i_mapping;
         struct shmem_inode_info *info = SHMEM_I(inode);
-       struct folio *folio;
+       struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
+       unsigned long suitable_orders = 0;
+       struct folio *folio = NULL;
         long pages;
-       int error;
+       int error, order;
  
         if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
-               huge = false;
+               orders = 0;
  
-       if (huge) {
-               pages = HPAGE_PMD_NR;
-               index = round_down(index, HPAGE_PMD_NR);
+       if (orders > 0) {
+               if (vma && vma_is_anon_shmem(vma)) {
+                       suitable_orders = shmem_suitable_orders(inode, vmf,
+                                                       mapping, index, orders);
+               } else if (orders & BIT(HPAGE_PMD_ORDER)) {
+                       pages = HPAGE_PMD_NR;
+                       suitable_orders = BIT(HPAGE_PMD_ORDER);
+                       index = round_down(index, HPAGE_PMD_NR);
  
-               /*
-                * Check for conflict before waiting on a huge allocation.
-                * Conflict might be that a huge page has just been allocated
-                * and added to page cache by a racing thread, or that there
-                * is already at least one small page in the huge extent.
-                * Be careful to retry when appropriate, but not forever!
-                * Elsewhere -EEXIST would be the right code, but not here.
-                */
-               if (xa_find(&mapping->i_pages, &index,
-                               index + HPAGE_PMD_NR - 1, XA_PRESENT))
-                       return ERR_PTR(-E2BIG);
+                       /*
+                        * Check for conflict before waiting on a huge allocation.
+                        * Conflict might be that a huge page has just been allocated
+                        * and added to page cache by a racing thread, or that there
+                        * is already at least one small page in the huge extent.
+                        * Be careful to retry when appropriate, but not forever!
+                        * Elsewhere -EEXIST would be the right code, but not here.
+                        */
+                       if (xa_find(&mapping->i_pages, &index,
+                                   index + HPAGE_PMD_NR - 1, XA_PRESENT))
+                               return ERR_PTR(-E2BIG);
+               }
  
-               folio = shmem_alloc_folio(gfp, HPAGE_PMD_ORDER, info, index);
-               if (!folio && pages == HPAGE_PMD_NR)
-                       count_vm_event(THP_FILE_FALLBACK);
+               order = highest_order(suitable_orders);
+               while (suitable_orders) {
+                       pages = 1UL << order;
+                       index = round_down(index, pages);
+                       folio = shmem_alloc_folio(gfp, order, info, index);
+                       if (folio)
+                               goto allocated;
+
+                       if (pages == HPAGE_PMD_NR)
+                               count_vm_event(THP_FILE_FALLBACK);
+                       order = next_order(&suitable_orders, order);
+               }
         } else {
                 pages = 1;
                 folio = shmem_alloc_folio(gfp, 0, info, index);
@@ -1663,6 +1781,7 @@ static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
         if (!folio)
                 return ERR_PTR(-ENOMEM);
  
+allocated:
         __folio_set_locked(folio);
         __folio_set_swapbacked(folio);
  
@@ -1957,7 +2076,8 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
         struct mm_struct *fault_mm;
         struct folio *folio;
         int error;
-       bool alloced;
+       bool alloced, huge;
+       unsigned long orders = 0;
  
         if (WARN_ON_ONCE(!shmem_mapping(inode->i_mapping)))
                 return -EINVAL;
@@ -2029,14 +2149,21 @@ repeat:
                 return 0;
         }
  
-       if (shmem_is_huge(inode, index, false, fault_mm,
-                         vma ? vma->vm_flags : 0)) {
+       huge = shmem_is_huge(inode, index, false, fault_mm,
+                            vma ? vma->vm_flags : 0);
+       /* Find hugepage orders that are allowed for anonymous shmem. */
+       if (vma && vma_is_anon_shmem(vma))
+               orders = shmem_allowable_huge_orders(inode, vma, index, huge);
+       else if (huge)
+               orders = BIT(HPAGE_PMD_ORDER);
+
+       if (orders > 0) {
                 gfp_t huge_gfp;
  
                 huge_gfp = vma_thp_gfp_mask(vma);
                 huge_gfp = limit_gfp_mask(huge_gfp, gfp);
-               folio = shmem_alloc_and_add_folio(huge_gfp,
-                               inode, index, fault_mm, true);
+               folio = shmem_alloc_and_add_folio(vmf, huge_gfp,
+                               inode, index, fault_mm, orders);
                 if (!IS_ERR(folio)) {
                         if (folio_test_pmd_mappable(folio))
                                 count_vm_event(THP_FILE_ALLOC);
@@ -2046,7 +2173,7 @@ repeat:
                         goto repeat;
         }
  
-       folio = shmem_alloc_and_add_folio(gfp, inode, index, fault_mm, false);
+       folio = shmem_alloc_and_add_folio(vmf, gfp, inode, index, fault_mm, 0);
         if (IS_ERR(folio)) {
                 error = PTR_ERR(folio);
                 if (error == -EEXIST)
@@ -2057,7 +2184,7 @@ repeat:
  
  alloced:
         alloced = true;
-       if (folio_test_pmd_mappable(folio) &&
+       if (folio_test_large(folio) &&
             DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
                                         folio_next_index(folio) - 1) {
                 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
author	Baolin Wang <baolin.wang@linux.alibaba.com>
	Tue, 11 Jun 2024 10:11:08 +0000 (18:11 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Thu, 4 Jul 2024 02:30:04 +0000 (19:30 -0700)
include/linux/huge_mm.h		patch \| blob \| blame \| history
mm/shmem.c		patch \| blob \| blame \| history