]>
Commit | Line | Data |
---|---|---|
d074880a GKH |
1 | From 134fca9063ad4851de767d1768180e5dede9a881 Mon Sep 17 00:00:00 2001 |
2 | From: Jiri Kosina <jkosina@suse.cz> | |
3 | Date: Tue, 14 May 2019 15:41:38 -0700 | |
4 | Subject: mm/mincore.c: make mincore() more conservative | |
5 | ||
6 | From: Jiri Kosina <jkosina@suse.cz> | |
7 | ||
8 | commit 134fca9063ad4851de767d1768180e5dede9a881 upstream. | |
9 | ||
10 | The semantics of what mincore() considers to be resident is not | |
11 | completely clear, but Linux has always (since 2.3.52, which is when | |
12 | mincore() was initially done) treated it as "page is available in page | |
13 | cache". | |
14 | ||
15 | That's potentially a problem, as that [in]directly exposes | |
16 | meta-information about pagecache / memory mapping state even about | |
17 | memory not strictly belonging to the process executing the syscall, | |
18 | opening possibilities for sidechannel attacks. | |
19 | ||
20 | Change the semantics of mincore() so that it only reveals pagecache | |
21 | information for non-anonymous mappings that belog to files that the | |
22 | calling process could (if it tried to) successfully open for writing; | |
23 | otherwise we'd be including shared non-exclusive mappings, which | |
24 | ||
25 | - is the sidechannel | |
26 | ||
27 | - is not the usecase for mincore(), as that's primarily used for data, | |
28 | not (shared) text | |
29 | ||
30 | [jkosina@suse.cz: v2] | |
31 | Link: http://lkml.kernel.org/r/20190312141708.6652-2-vbabka@suse.cz | |
32 | [mhocko@suse.com: restructure can_do_mincore() conditions] | |
33 | Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1903062342020.19912@cbobk.fhfr.pm | |
34 | Signed-off-by: Jiri Kosina <jkosina@suse.cz> | |
35 | Signed-off-by: Vlastimil Babka <vbabka@suse.cz> | |
36 | Acked-by: Josh Snyder <joshs@netflix.com> | |
37 | Acked-by: Michal Hocko <mhocko@suse.com> | |
38 | Originally-by: Linus Torvalds <torvalds@linux-foundation.org> | |
39 | Originally-by: Dominique Martinet <asmadeus@codewreck.org> | |
40 | Cc: Andy Lutomirski <luto@amacapital.net> | |
41 | Cc: Dave Chinner <david@fromorbit.com> | |
42 | Cc: Kevin Easton <kevin@guarana.org> | |
43 | Cc: Matthew Wilcox <willy@infradead.org> | |
44 | Cc: Cyril Hrubis <chrubis@suse.cz> | |
45 | Cc: Tejun Heo <tj@kernel.org> | |
46 | Cc: Kirill A. Shutemov <kirill@shutemov.name> | |
47 | Cc: Daniel Gruss <daniel@gruss.cc> | |
48 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
49 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
50 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
51 | ||
52 | --- | |
53 | mm/mincore.c | 23 ++++++++++++++++++++++- | |
54 | 1 file changed, 22 insertions(+), 1 deletion(-) | |
55 | ||
56 | --- a/mm/mincore.c | |
57 | +++ b/mm/mincore.c | |
58 | @@ -169,6 +169,22 @@ out: | |
59 | return 0; | |
60 | } | |
61 | ||
62 | +static inline bool can_do_mincore(struct vm_area_struct *vma) | |
63 | +{ | |
64 | + if (vma_is_anonymous(vma)) | |
65 | + return true; | |
66 | + if (!vma->vm_file) | |
67 | + return false; | |
68 | + /* | |
69 | + * Reveal pagecache information only for non-anonymous mappings that | |
70 | + * correspond to the files the calling process could (if tried) open | |
71 | + * for writing; otherwise we'd be including shared non-exclusive | |
72 | + * mappings, which opens a side channel. | |
73 | + */ | |
74 | + return inode_owner_or_capable(file_inode(vma->vm_file)) || | |
75 | + inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; | |
76 | +} | |
77 | + | |
78 | /* | |
79 | * Do a chunk of "sys_mincore()". We've already checked | |
80 | * all the arguments, we hold the mmap semaphore: we should | |
81 | @@ -189,8 +205,13 @@ static long do_mincore(unsigned long add | |
82 | vma = find_vma(current->mm, addr); | |
83 | if (!vma || addr < vma->vm_start) | |
84 | return -ENOMEM; | |
85 | - mincore_walk.mm = vma->vm_mm; | |
86 | end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); | |
87 | + if (!can_do_mincore(vma)) { | |
88 | + unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE); | |
89 | + memset(vec, 1, pages); | |
90 | + return pages; | |
91 | + } | |
92 | + mincore_walk.mm = vma->vm_mm; | |
93 | err = walk_page_range(addr, end, &mincore_walk); | |
94 | if (err < 0) | |
95 | return err; |