]>
Commit | Line | Data |
---|---|---|
ec71c73b GKH |
1 | From fc8efd2ddfed3f343c11b693e87140ff358d7ff5 Mon Sep 17 00:00:00 2001 |
2 | From: Jan Stancek <jstancek@redhat.com> | |
3 | Date: Tue, 5 Mar 2019 15:50:08 -0800 | |
4 | Subject: mm/memory.c: do_fault: avoid usage of stale vm_area_struct | |
5 | ||
6 | From: Jan Stancek <jstancek@redhat.com> | |
7 | ||
8 | commit fc8efd2ddfed3f343c11b693e87140ff358d7ff5 upstream. | |
9 | ||
10 | LTP testcase mtest06 [1] can trigger a crash on s390x running 5.0.0-rc8. | |
11 | This is a stress test, where one thread mmaps/writes/munmaps memory area | |
12 | and other thread is trying to read from it: | |
13 | ||
14 | CPU: 0 PID: 2611 Comm: mmap1 Not tainted 5.0.0-rc8+ #51 | |
15 | Hardware name: IBM 2964 N63 400 (z/VM 6.4.0) | |
16 | Krnl PSW : 0404e00180000000 00000000001ac8d8 (__lock_acquire+0x7/0x7a8) | |
17 | Call Trace: | |
18 | ([<0000000000000000>] (null)) | |
19 | [<00000000001adae4>] lock_acquire+0xec/0x258 | |
20 | [<000000000080d1ac>] _raw_spin_lock_bh+0x5c/0x98 | |
21 | [<000000000012a780>] page_table_free+0x48/0x1a8 | |
22 | [<00000000002f6e54>] do_fault+0xdc/0x670 | |
23 | [<00000000002fadae>] __handle_mm_fault+0x416/0x5f0 | |
24 | [<00000000002fb138>] handle_mm_fault+0x1b0/0x320 | |
25 | [<00000000001248cc>] do_dat_exception+0x19c/0x2c8 | |
26 | [<000000000080e5ee>] pgm_check_handler+0x19e/0x200 | |
27 | ||
28 | page_table_free() is called with NULL mm parameter, but because "0" is a | |
29 | valid address on s390 (see S390_lowcore), it keeps going until it | |
30 | eventually crashes in lockdep's lock_acquire. This crash is | |
31 | reproducible at least since 4.14. | |
32 | ||
33 | Problem is that "vmf->vma" used in do_fault() can become stale. Because | |
34 | mmap_sem may be released, other threads can come in, call munmap() and | |
35 | cause "vma" be returned to kmem cache, and get zeroed/re-initialized and | |
36 | re-used: | |
37 | ||
38 | handle_mm_fault | | |
39 | __handle_mm_fault | | |
40 | do_fault | | |
41 | vma = vmf->vma | | |
42 | do_read_fault | | |
43 | __do_fault | | |
44 | vma->vm_ops->fault(vmf); | | |
45 | mmap_sem is released | | |
46 | | | |
47 | | do_munmap() | |
48 | | remove_vma_list() | |
49 | | remove_vma() | |
50 | | vm_area_free() | |
51 | | # vma is released | |
52 | | ... | |
53 | | # same vma is allocated | |
54 | | # from kmem cache | |
55 | | do_mmap() | |
56 | | vm_area_alloc() | |
57 | | memset(vma, 0, ...) | |
58 | | | |
59 | pte_free(vma->vm_mm, ...); | | |
60 | page_table_free | | |
61 | spin_lock_bh(&mm->context.lock);| | |
62 | <crash> | | |
63 | ||
64 | Cache mm_struct to avoid using potentially stale "vma". | |
65 | ||
66 | [1] https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/mem/mtest06/mmap1.c | |
67 | ||
68 | Link: http://lkml.kernel.org/r/5b3fdf19e2a5be460a384b936f5b56e13733f1b8.1551595137.git.jstancek@redhat.com | |
69 | Signed-off-by: Jan Stancek <jstancek@redhat.com> | |
70 | Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> | |
71 | Reviewed-by: Matthew Wilcox <willy@infradead.org> | |
72 | Acked-by: Rafael Aquini <aquini@redhat.com> | |
73 | Reviewed-by: Minchan Kim <minchan@kernel.org> | |
74 | Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | |
75 | Cc: Rik van Riel <riel@surriel.com> | |
76 | Cc: Michal Hocko <mhocko@suse.com> | |
77 | Cc: Huang Ying <ying.huang@intel.com> | |
78 | Cc: Souptick Joarder <jrdr.linux@gmail.com> | |
79 | Cc: Jerome Glisse <jglisse@redhat.com> | |
80 | Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> | |
81 | Cc: David Hildenbrand <david@redhat.com> | |
82 | Cc: Andrea Arcangeli <aarcange@redhat.com> | |
83 | Cc: David Rientjes <rientjes@google.com> | |
84 | Cc: Mel Gorman <mgorman@techsingularity.net> | |
85 | Cc: <stable@vger.kernel.org> | |
86 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
87 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
88 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
89 | ||
90 | --- | |
91 | mm/memory.c | 5 ++++- | |
92 | 1 file changed, 4 insertions(+), 1 deletion(-) | |
93 | ||
94 | --- a/mm/memory.c | |
95 | +++ b/mm/memory.c | |
96 | @@ -3762,10 +3762,13 @@ static vm_fault_t do_shared_fault(struct | |
97 | * but allow concurrent faults). | |
98 | * The mmap_sem may have been released depending on flags and our | |
99 | * return value. See filemap_fault() and __lock_page_or_retry(). | |
100 | + * If mmap_sem is released, vma may become invalid (for example | |
101 | + * by other thread calling munmap()). | |
102 | */ | |
103 | static vm_fault_t do_fault(struct vm_fault *vmf) | |
104 | { | |
105 | struct vm_area_struct *vma = vmf->vma; | |
106 | + struct mm_struct *vm_mm = vma->vm_mm; | |
107 | vm_fault_t ret; | |
108 | ||
109 | /* | |
110 | @@ -3806,7 +3809,7 @@ static vm_fault_t do_fault(struct vm_fau | |
111 | ||
112 | /* preallocated pagetable is unused: free it */ | |
113 | if (vmf->prealloc_pte) { | |
114 | - pte_free(vma->vm_mm, vmf->prealloc_pte); | |
115 | + pte_free(vm_mm, vmf->prealloc_pte); | |
116 | vmf->prealloc_pte = NULL; | |
117 | } | |
118 | return ret; |