]>
Commit | Line | Data |
---|---|---|
e72951d8 GKH |
1 | From b2f73922d119686323f14fbbe46587f863852328 Mon Sep 17 00:00:00 2001 |
2 | From: Ingo Molnar <mingo@kernel.org> | |
3 | Date: Wed, 30 Sep 2015 15:59:17 +0200 | |
4 | Subject: fs/proc, core/debug: Don't expose absolute kernel addresses via wchan | |
5 | ||
6 | From: Ingo Molnar <mingo@kernel.org> | |
7 | ||
8 | commit b2f73922d119686323f14fbbe46587f863852328 upstream. | |
9 | ||
10 | So the /proc/PID/stat 'wchan' field (the 30th field, which contains | |
11 | the absolute kernel address of the kernel function a task is blocked in) | |
12 | leaks absolute kernel addresses to unprivileged user-space: | |
13 | ||
14 | seq_put_decimal_ull(m, ' ', wchan); | |
15 | ||
16 | The absolute address might also leak via /proc/PID/wchan as well, if | |
17 | KALLSYMS is turned off or if the symbol lookup fails for some reason: | |
18 | ||
19 | static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, | |
20 | struct pid *pid, struct task_struct *task) | |
21 | { | |
22 | unsigned long wchan; | |
23 | char symname[KSYM_NAME_LEN]; | |
24 | ||
25 | wchan = get_wchan(task); | |
26 | ||
27 | if (lookup_symbol_name(wchan, symname) < 0) { | |
28 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | |
29 | return 0; | |
30 | seq_printf(m, "%lu", wchan); | |
31 | } else { | |
32 | seq_printf(m, "%s", symname); | |
33 | } | |
34 | ||
35 | return 0; | |
36 | } | |
37 | ||
38 | This isn't ideal, because for example it trivially leaks the KASLR offset | |
39 | to any local attacker: | |
40 | ||
41 | fomalhaut:~> printf "%016lx\n" $(cat /proc/$$/stat | cut -d' ' -f35) | |
42 | ffffffff8123b380 | |
43 | ||
44 | Most real-life uses of wchan are symbolic: | |
45 | ||
46 | ps -eo pid:10,tid:10,wchan:30,comm | |
47 | ||
48 | and procps uses /proc/PID/wchan, not the absolute address in /proc/PID/stat: | |
49 | ||
50 | triton:~/tip> strace -f ps -eo pid:10,tid:10,wchan:30,comm 2>&1 | grep wchan | tail -1 | |
51 | open("/proc/30833/wchan", O_RDONLY) = 6 | |
52 | ||
53 | There's one compatibility quirk here: procps relies on whether the | |
54 | absolute value is non-zero - and we can provide that functionality | |
55 | by outputing "0" or "1" depending on whether the task is blocked | |
56 | (whether there's a wchan address). | |
57 | ||
58 | These days there appears to be very little legitimate reason | |
59 | user-space would be interested in the absolute address. The | |
60 | absolute address is mostly historic: from the days when we | |
61 | didn't have kallsyms and user-space procps had to do the | |
62 | decoding itself via the System.map. | |
63 | ||
64 | So this patch sets all numeric output to "0" or "1" and keeps only | |
65 | symbolic output, in /proc/PID/wchan. | |
66 | ||
67 | ( The absolute sleep address can generally still be profiled via | |
68 | perf, by tasks with sufficient privileges. ) | |
69 | ||
70 | Reviewed-by: Thomas Gleixner <tglx@linutronix.de> | |
71 | Acked-by: Kees Cook <keescook@chromium.org> | |
72 | Acked-by: Linus Torvalds <torvalds@linux-foundation.org> | |
73 | Cc: Al Viro <viro@zeniv.linux.org.uk> | |
74 | Cc: Alexander Potapenko <glider@google.com> | |
75 | Cc: Andrey Konovalov <andreyknvl@google.com> | |
76 | Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com> | |
77 | Cc: Andy Lutomirski <luto@amacapital.net> | |
78 | Cc: Andy Lutomirski <luto@kernel.org> | |
79 | Cc: Borislav Petkov <bp@alien8.de> | |
80 | Cc: Denys Vlasenko <dvlasenk@redhat.com> | |
81 | Cc: Dmitry Vyukov <dvyukov@google.com> | |
82 | Cc: Kostya Serebryany <kcc@google.com> | |
83 | Cc: Mike Galbraith <efault@gmx.de> | |
84 | Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> | |
85 | Cc: Peter Zijlstra <peterz@infradead.org> | |
86 | Cc: Sasha Levin <sasha.levin@oracle.com> | |
87 | Cc: kasan-dev <kasan-dev@googlegroups.com> | |
88 | Cc: linux-kernel@vger.kernel.org | |
89 | Link: http://lkml.kernel.org/r/20150930135917.GA3285@gmail.com | |
90 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
91 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
92 | ||
93 | --- | |
94 | Documentation/filesystems/proc.txt | 5 +++-- | |
95 | fs/proc/array.c | 16 ++++++++++++++-- | |
96 | fs/proc/base.c | 9 +++------ | |
97 | 3 files changed, 20 insertions(+), 10 deletions(-) | |
98 | ||
99 | --- a/Documentation/filesystems/proc.txt | |
100 | +++ b/Documentation/filesystems/proc.txt | |
101 | @@ -140,7 +140,8 @@ Table 1-1: Process specific entries in / | |
102 | stat Process status | |
103 | statm Process memory status information | |
104 | status Process status in human readable form | |
105 | - wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan | |
106 | + wchan Present with CONFIG_KALLSYMS=y: it shows the kernel function | |
107 | + symbol the task is blocked in - or "0" if not blocked. | |
108 | pagemap Page table | |
109 | stack Report full stack trace, enable via CONFIG_STACKTRACE | |
110 | smaps a extension based on maps, showing the memory consumption of | |
111 | @@ -310,7 +311,7 @@ Table 1-4: Contents of the stat files (a | |
112 | blocked bitmap of blocked signals | |
113 | sigign bitmap of ignored signals | |
114 | sigcatch bitmap of caught signals | |
115 | - wchan address where process went to sleep | |
116 | + 0 (place holder, used to be the wchan address, use /proc/PID/wchan instead) | |
117 | 0 (place holder) | |
118 | 0 (place holder) | |
119 | exit_signal signal to send to parent thread on exit | |
120 | --- a/fs/proc/array.c | |
121 | +++ b/fs/proc/array.c | |
122 | @@ -372,7 +372,7 @@ int proc_pid_status(struct seq_file *m, | |
123 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |
124 | struct pid *pid, struct task_struct *task, int whole) | |
125 | { | |
126 | - unsigned long vsize, eip, esp, wchan = ~0UL; | |
127 | + unsigned long vsize, eip, esp, wchan = 0; | |
128 | int priority, nice; | |
129 | int tty_pgrp = -1, tty_nr = 0; | |
130 | sigset_t sigign, sigcatch; | |
131 | @@ -504,7 +504,19 @@ static int do_task_stat(struct seq_file | |
132 | seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); | |
133 | seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); | |
134 | seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); | |
135 | - seq_put_decimal_ull(m, ' ', wchan); | |
136 | + | |
137 | + /* | |
138 | + * We used to output the absolute kernel address, but that's an | |
139 | + * information leak - so instead we show a 0/1 flag here, to signal | |
140 | + * to user-space whether there's a wchan field in /proc/PID/wchan. | |
141 | + * | |
142 | + * This works with older implementations of procps as well. | |
143 | + */ | |
144 | + if (wchan) | |
145 | + seq_puts(m, " 1"); | |
146 | + else | |
147 | + seq_puts(m, " 0"); | |
148 | + | |
149 | seq_put_decimal_ull(m, ' ', 0); | |
150 | seq_put_decimal_ull(m, ' ', 0); | |
151 | seq_put_decimal_ll(m, ' ', task->exit_signal); | |
152 | --- a/fs/proc/base.c | |
153 | +++ b/fs/proc/base.c | |
154 | @@ -430,13 +430,10 @@ static int proc_pid_wchan(struct seq_fil | |
155 | ||
156 | wchan = get_wchan(task); | |
157 | ||
158 | - if (lookup_symbol_name(wchan, symname) < 0) { | |
159 | - if (!ptrace_may_access(task, PTRACE_MODE_READ)) | |
160 | - return 0; | |
161 | - seq_printf(m, "%lu", wchan); | |
162 | - } else { | |
163 | + if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname)) | |
164 | seq_printf(m, "%s", symname); | |
165 | - } | |
166 | + else | |
167 | + seq_putc(m, '0'); | |
168 | ||
169 | return 0; | |
170 | } |