]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
mm: hugetlb: fix incorrect fallback for subpool
authorWupeng Ma <mawupeng1@huawei.com>
Thu, 10 Apr 2025 06:26:33 +0000 (14:26 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 22 May 2025 12:31:55 +0000 (14:31 +0200)
commit a833a693a490ecff8ba377654c6d4d333718b6b1 upstream.

During our testing with hugetlb subpool enabled, we observe that
hstate->resv_huge_pages may underflow into negative values.  Root cause
analysis reveals a race condition in subpool reservation fallback handling
as follow:

hugetlb_reserve_pages()
    /* Attempt subpool reservation */
    gbl_reserve = hugepage_subpool_get_pages(spool, chg);

    /* Global reservation may fail after subpool allocation */
    if (hugetlb_acct_memory(h, gbl_reserve) < 0)
        goto out_put_pages;

out_put_pages:
    /* This incorrectly restores reservation to subpool */
    hugepage_subpool_put_pages(spool, chg);

When hugetlb_acct_memory() fails after subpool allocation, the current
implementation over-commits subpool reservations by returning the full
'chg' value instead of the actual allocated 'gbl_reserve' amount.  This
discrepancy propagates to global reservations during subsequent releases,
eventually causing resv_huge_pages underflow.

This problem can be trigger easily with the following steps:
1. reverse hugepage for hugeltb allocation
2. mount hugetlbfs with min_size to enable hugetlb subpool
3. alloc hugepages with two task(make sure the second will fail due to
   insufficient amount of hugepages)
4. with for a few seconds and repeat step 3 which will make
   hstate->resv_huge_pages to go below zero.

To fix this problem, return corrent amount of pages to subpool during the
fallback after hugepage_subpool_get_pages is called.

Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com
Fixes: 1c5ecae3a93f ("hugetlbfs: add minimum size accounting to subpools")
Signed-off-by: Wupeng Ma <mawupeng1@huawei.com>
Tested-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Ma Wupeng <mawupeng1@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
mm/hugetlb.c

index 44b8feb83402b3552486c4280863bba25cf0f233..8acd95964ad15d75fa97dfb77801c494727a4caa 100644 (file)
@@ -2987,7 +2987,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
        struct hugepage_subpool *spool = subpool_vma(vma);
        struct hstate *h = hstate_vma(vma);
        struct folio *folio;
-       long retval, gbl_chg;
+       long retval, gbl_chg, gbl_reserve;
        map_chg_state map_chg;
        int ret, idx;
        struct hugetlb_cgroup *h_cg = NULL;
@@ -3140,8 +3140,16 @@ out_uncharge_cgroup_reservation:
                hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
                                                    h_cg);
 out_subpool_put:
-       if (map_chg)
-               hugepage_subpool_put_pages(spool, 1);
+       /*
+        * put page to subpool iff the quota of subpool's rsv_hpages is used
+        * during hugepage_subpool_get_pages.
+        */
+       if (map_chg && !gbl_chg) {
+               gbl_reserve = hugepage_subpool_put_pages(spool, 1);
+               hugetlb_acct_memory(h, -gbl_reserve);
+       }
+
+
 out_end_reservation:
        if (map_chg != MAP_CHG_ENFORCED)
                vma_end_reservation(h, vma, addr);
@@ -6949,7 +6957,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
                                        struct vm_area_struct *vma,
                                        vm_flags_t vm_flags)
 {
-       long chg = -1, add = -1;
+       long chg = -1, add = -1, spool_resv, gbl_resv;
        struct hstate *h = hstate_inode(inode);
        struct hugepage_subpool *spool = subpool_inode(inode);
        struct resv_map *resv_map;
@@ -7084,8 +7092,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
        return true;
 
 out_put_pages:
-       /* put back original number of pages, chg */
-       (void)hugepage_subpool_put_pages(spool, chg);
+       spool_resv = chg - gbl_reserve;
+       if (spool_resv) {
+               /* put sub pool's reservation back, chg - gbl_reserve */
+               gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
+               /*
+                * subpool's reserved pages can not be put back due to race,
+                * return to hstate.
+                */
+               hugetlb_acct_memory(h, -gbl_resv);
+       }
 out_uncharge_cgroup:
        hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
                                            chg * pages_per_huge_page(h), h_cg);