]>
Commit | Line | Data |
---|---|---|
1fd55d47 GKH |
1 | From a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 Mon Sep 17 00:00:00 2001 |
2 | From: Yang Shi <yang.shi@linux.alibaba.com> | |
3 | Date: Thu, 28 Mar 2019 20:43:55 -0700 | |
4 | Subject: mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified | |
5 | ||
6 | From: Yang Shi <yang.shi@linux.alibaba.com> | |
7 | ||
8 | commit a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 upstream. | |
9 | ||
10 | When MPOL_MF_STRICT was specified and an existing page was already on a | |
11 | node that does not follow the policy, mbind() should return -EIO. But | |
12 | commit 6f4576e3687b ("mempolicy: apply page table walker on | |
13 | queue_pages_range()") broke the rule. | |
14 | ||
15 | And commit c8633798497c ("mm: mempolicy: mbind and migrate_pages support | |
16 | thp migration") didn't return the correct value for THP mbind() too. | |
17 | ||
18 | If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it | |
19 | reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an | |
20 | existing page was already on a node that does not follow the policy. | |
21 | And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or | |
22 | MPOL_MF_MOVE_ALL was specified. | |
23 | ||
24 | Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c | |
25 | ||
26 | [akpm@linux-foundation.org: tweak code comment] | |
27 | Link: http://lkml.kernel.org/r/1553020556-38583-1-git-send-email-yang.shi@linux.alibaba.com | |
28 | Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") | |
29 | Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com> | |
30 | Signed-off-by: Oscar Salvador <osalvador@suse.de> | |
31 | Reported-by: Cyril Hrubis <chrubis@suse.cz> | |
32 | Suggested-by: Kirill A. Shutemov <kirill@shutemov.name> | |
33 | Acked-by: Rafael Aquini <aquini@redhat.com> | |
34 | Reviewed-by: Oscar Salvador <osalvador@suse.de> | |
35 | Acked-by: David Rientjes <rientjes@google.com> | |
36 | Cc: Vlastimil Babka <vbabka@suse.cz> | |
37 | Cc: <stable@vger.kernel.org> | |
38 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
39 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
40 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
41 | ||
42 | ||
43 | --- | |
44 | mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++------- | |
45 | 1 file changed, 33 insertions(+), 7 deletions(-) | |
46 | ||
47 | --- a/mm/mempolicy.c | |
48 | +++ b/mm/mempolicy.c | |
49 | @@ -427,6 +427,13 @@ static inline bool queue_pages_required( | |
50 | return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); | |
51 | } | |
52 | ||
53 | +/* | |
54 | + * queue_pages_pmd() has three possible return values: | |
55 | + * 1 - pages are placed on the right node or queued successfully. | |
56 | + * 0 - THP was split. | |
57 | + * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing | |
58 | + * page was already on a node that does not follow the policy. | |
59 | + */ | |
60 | static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, | |
61 | unsigned long end, struct mm_walk *walk) | |
62 | { | |
63 | @@ -436,7 +443,7 @@ static int queue_pages_pmd(pmd_t *pmd, s | |
64 | unsigned long flags; | |
65 | ||
66 | if (unlikely(is_pmd_migration_entry(*pmd))) { | |
67 | - ret = 1; | |
68 | + ret = -EIO; | |
69 | goto unlock; | |
70 | } | |
71 | page = pmd_page(*pmd); | |
72 | @@ -462,8 +469,15 @@ static int queue_pages_pmd(pmd_t *pmd, s | |
73 | ret = 1; | |
74 | flags = qp->flags; | |
75 | /* go to thp migration */ | |
76 | - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | |
77 | + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | |
78 | + if (!vma_migratable(walk->vma)) { | |
79 | + ret = -EIO; | |
80 | + goto unlock; | |
81 | + } | |
82 | + | |
83 | migrate_page_add(page, qp->pagelist, flags); | |
84 | + } else | |
85 | + ret = -EIO; | |
86 | unlock: | |
87 | spin_unlock(ptl); | |
88 | out: | |
89 | @@ -488,8 +502,10 @@ static int queue_pages_pte_range(pmd_t * | |
90 | ptl = pmd_trans_huge_lock(pmd, vma); | |
91 | if (ptl) { | |
92 | ret = queue_pages_pmd(pmd, ptl, addr, end, walk); | |
93 | - if (ret) | |
94 | + if (ret > 0) | |
95 | return 0; | |
96 | + else if (ret < 0) | |
97 | + return ret; | |
98 | } | |
99 | ||
100 | if (pmd_trans_unstable(pmd)) | |
101 | @@ -526,11 +542,16 @@ retry: | |
102 | goto retry; | |
103 | } | |
104 | ||
105 | - migrate_page_add(page, qp->pagelist, flags); | |
106 | + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | |
107 | + if (!vma_migratable(vma)) | |
108 | + break; | |
109 | + migrate_page_add(page, qp->pagelist, flags); | |
110 | + } else | |
111 | + break; | |
112 | } | |
113 | pte_unmap_unlock(pte - 1, ptl); | |
114 | cond_resched(); | |
115 | - return 0; | |
116 | + return addr != end ? -EIO : 0; | |
117 | } | |
118 | ||
119 | static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, | |
120 | @@ -600,7 +621,12 @@ static int queue_pages_test_walk(unsigne | |
121 | unsigned long endvma = vma->vm_end; | |
122 | unsigned long flags = qp->flags; | |
123 | ||
124 | - if (!vma_migratable(vma)) | |
125 | + /* | |
126 | + * Need check MPOL_MF_STRICT to return -EIO if possible | |
127 | + * regardless of vma_migratable | |
128 | + */ | |
129 | + if (!vma_migratable(vma) && | |
130 | + !(flags & MPOL_MF_STRICT)) | |
131 | return 1; | |
132 | ||
133 | if (endvma > end) | |
134 | @@ -627,7 +653,7 @@ static int queue_pages_test_walk(unsigne | |
135 | } | |
136 | ||
137 | /* queue pages from current vma */ | |
138 | - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | |
139 | + if (flags & MPOL_MF_VALID) | |
140 | return 0; | |
141 | return 1; | |
142 | } |