]>
Commit | Line | Data |
---|---|---|
3a6c8c10 GKH |
1 | From 687cb0884a714ff484d038e9190edc874edcf146 Mon Sep 17 00:00:00 2001 |
2 | From: Wang Nan <wangnan0@huawei.com> | |
3 | Date: Wed, 29 Nov 2017 16:09:58 -0800 | |
4 | Subject: mm, oom_reaper: gather each vma to prevent leaking TLB entry | |
5 | ||
6 | From: Wang Nan <wangnan0@huawei.com> | |
7 | ||
8 | commit 687cb0884a714ff484d038e9190edc874edcf146 upstream. | |
9 | ||
10 | tlb_gather_mmu(&tlb, mm, 0, -1) means gathering the whole virtual memory | |
11 | space. In this case, tlb->fullmm is true. Some archs like arm64 | |
12 | doesn't flush TLB when tlb->fullmm is true: | |
13 | ||
14 | commit 5a7862e83000 ("arm64: tlbflush: avoid flushing when fullmm == 1"). | |
15 | ||
16 | Which causes leaking of tlb entries. | |
17 | ||
18 | Will clarifies his patch: | |
19 | "Basically, we tag each address space with an ASID (PCID on x86) which | |
20 | is resident in the TLB. This means we can elide TLB invalidation when | |
21 | pulling down a full mm because we won't ever assign that ASID to | |
22 | another mm without doing TLB invalidation elsewhere (which actually | |
23 | just nukes the whole TLB). | |
24 | ||
25 | I think that means that we could potentially not fault on a kernel | |
26 | uaccess, because we could hit in the TLB" | |
27 | ||
28 | There could be a window between complete_signal() sending IPI to other | |
29 | cores and all threads sharing this mm are really kicked off from cores. | |
30 | In this window, the oom reaper may calls tlb_flush_mmu_tlbonly() to | |
31 | flush TLB then frees pages. However, due to the above problem, the TLB | |
32 | entries are not really flushed on arm64. Other threads are possible to | |
33 | access these pages through TLB entries. Moreover, a copy_to_user() can | |
34 | also write to these pages without generating page fault, causes | |
35 | use-after-free bugs. | |
36 | ||
37 | This patch gathers each vma instead of gathering full vm space. In this | |
38 | case tlb->fullmm is not true. The behavior of oom reaper become similar | |
39 | to munmapping before do_exit, which should be safe for all archs. | |
40 | ||
41 | Link: http://lkml.kernel.org/r/20171107095453.179940-1-wangnan0@huawei.com | |
42 | Fixes: aac453635549 ("mm, oom: introduce oom reaper") | |
43 | Signed-off-by: Wang Nan <wangnan0@huawei.com> | |
44 | Acked-by: Michal Hocko <mhocko@suse.com> | |
45 | Acked-by: David Rientjes <rientjes@google.com> | |
46 | Cc: Minchan Kim <minchan@kernel.org> | |
47 | Cc: Will Deacon <will.deacon@arm.com> | |
48 | Cc: Bob Liu <liubo95@huawei.com> | |
49 | Cc: Ingo Molnar <mingo@kernel.org> | |
50 | Cc: Roman Gushchin <guro@fb.com> | |
51 | Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> | |
52 | Cc: Andrea Arcangeli <aarcange@redhat.com> | |
53 | Cc: <stable@vger.kernel.org> | |
54 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
55 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
56 | [backported to 4.9 stable tree] | |
57 | Signed-off-by: Michal Hocko <mhocko@suse.com> | |
58 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
59 | --- | |
60 | mm/oom_kill.c | 7 ++++--- | |
61 | 1 file changed, 4 insertions(+), 3 deletions(-) | |
62 | ||
63 | --- a/mm/oom_kill.c | |
64 | +++ b/mm/oom_kill.c | |
65 | @@ -524,7 +524,6 @@ static bool __oom_reap_task_mm(struct ta | |
66 | */ | |
67 | set_bit(MMF_UNSTABLE, &mm->flags); | |
68 | ||
69 | - tlb_gather_mmu(&tlb, mm, 0, -1); | |
70 | for (vma = mm->mmap ; vma; vma = vma->vm_next) { | |
71 | if (is_vm_hugetlb_page(vma)) | |
72 | continue; | |
73 | @@ -546,11 +545,13 @@ static bool __oom_reap_task_mm(struct ta | |
74 | * we do not want to block exit_mmap by keeping mm ref | |
75 | * count elevated without a good reason. | |
76 | */ | |
77 | - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) | |
78 | + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { | |
79 | + tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end); | |
80 | unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, | |
81 | &details); | |
82 | + tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); | |
83 | + } | |
84 | } | |
85 | - tlb_finish_mmu(&tlb, 0, -1); | |
86 | pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", | |
87 | task_pid_nr(tsk), tsk->comm, | |
88 | K(get_mm_counter(mm, MM_ANONPAGES)), |