]>
Commit | Line | Data |
---|---|---|
7c34048e GKH |
1 | From e125fe405abedc1dc8a5b2229b80ee91c1434015 Mon Sep 17 00:00:00 2001 |
2 | From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> | |
3 | Date: Fri, 5 Oct 2018 15:51:41 -0700 | |
4 | Subject: mm, thp: fix mlocking THP page with migration enabled | |
5 | ||
6 | From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | |
7 | ||
8 | commit e125fe405abedc1dc8a5b2229b80ee91c1434015 upstream. | |
9 | ||
10 | A transparent huge page is represented by a single entry on an LRU list. | |
11 | Therefore, we can only make unevictable an entire compound page, not | |
12 | individual subpages. | |
13 | ||
14 | If a user tries to mlock() part of a huge page, we want the rest of the | |
15 | page to be reclaimable. | |
16 | ||
17 | We handle this by keeping PTE-mapped huge pages on normal LRU lists: the | |
18 | PMD on border of VM_LOCKED VMA will be split into PTE table. | |
19 | ||
20 | Introduction of THP migration breaks[1] the rules around mlocking THP | |
21 | pages. If we had a single PMD mapping of the page in mlocked VMA, the | |
22 | page will get mlocked, regardless of PTE mappings of the page. | |
23 | ||
24 | For tmpfs/shmem it's easy to fix by checking PageDoubleMap() in | |
25 | remove_migration_pmd(). | |
26 | ||
27 | Anon THP pages can only be shared between processes via fork(). Mlocked | |
28 | page can only be shared if parent mlocked it before forking, otherwise CoW | |
29 | will be triggered on mlock(). | |
30 | ||
31 | For Anon-THP, we can fix the issue by munlocking the page on removing PTE | |
32 | migration entry for the page. PTEs for the page will always come after | |
33 | mlocked PMD: rmap walks VMAs from oldest to newest. | |
34 | ||
35 | Test-case: | |
36 | ||
37 | #include <unistd.h> | |
38 | #include <sys/mman.h> | |
39 | #include <sys/wait.h> | |
40 | #include <linux/mempolicy.h> | |
41 | #include <numaif.h> | |
42 | ||
43 | int main(void) | |
44 | { | |
45 | unsigned long nodemask = 4; | |
46 | void *addr; | |
47 | ||
48 | addr = mmap((void *)0x20000000UL, 2UL << 20, PROT_READ | PROT_WRITE, | |
49 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0); | |
50 | ||
51 | if (fork()) { | |
52 | wait(NULL); | |
53 | return 0; | |
54 | } | |
55 | ||
56 | mlock(addr, 4UL << 10); | |
57 | mbind(addr, 2UL << 20, MPOL_PREFERRED | MPOL_F_RELATIVE_NODES, | |
58 | &nodemask, 4, MPOL_MF_MOVE); | |
59 | ||
60 | return 0; | |
61 | } | |
62 | ||
63 | [1] https://lkml.kernel.org/r/CAOMGZ=G52R-30rZvhGxEbkTw7rLLwBGadVYeo--iizcD3upL3A@mail.gmail.com | |
64 | ||
65 | Link: http://lkml.kernel.org/r/20180917133816.43995-1-kirill.shutemov@linux.intel.com | |
66 | Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path") | |
67 | Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | |
68 | Reported-by: Vegard Nossum <vegard.nossum@oracle.com> | |
69 | Reviewed-by: Zi Yan <zi.yan@cs.rutgers.edu> | |
70 | Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | |
71 | Cc: Vlastimil Babka <vbabka@suse.cz> | |
72 | Cc: Andrea Arcangeli <aarcange@redhat.com> | |
73 | Cc: <stable@vger.kernel.org> [4.14+] | |
74 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
75 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
76 | ||
77 | --- | |
78 | mm/huge_memory.c | 2 +- | |
79 | mm/migrate.c | 3 +++ | |
80 | 2 files changed, 4 insertions(+), 1 deletion(-) | |
81 | ||
82 | --- a/mm/huge_memory.c | |
83 | +++ b/mm/huge_memory.c | |
84 | @@ -2929,7 +2929,7 @@ void remove_migration_pmd(struct page_vm | |
85 | else | |
86 | page_add_file_rmap(new, true); | |
87 | set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); | |
88 | - if (vma->vm_flags & VM_LOCKED) | |
89 | + if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new)) | |
90 | mlock_vma_page(new); | |
91 | update_mmu_cache_pmd(vma, address, pvmw->pmd); | |
92 | } | |
93 | --- a/mm/migrate.c | |
94 | +++ b/mm/migrate.c | |
95 | @@ -275,6 +275,9 @@ static bool remove_migration_pte(struct | |
96 | if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new)) | |
97 | mlock_vma_page(new); | |
98 | ||
99 | + if (PageTransHuge(page) && PageMlocked(page)) | |
100 | + clear_page_mlock(page); | |
101 | + | |
102 | /* No need to invalidate - it was non-present before */ | |
103 | update_mmu_cache(vma, pvmw.address, pvmw.pte); | |
104 | } |