]>
Commit | Line | Data |
---|---|---|
afbe37e7 GKH |
1 | From bf959931ddb88c4e4366e96dd22e68fa0db9527c Mon Sep 17 00:00:00 2001 |
2 | From: Oleg Nesterov <oleg@redhat.com> | |
3 | Date: Mon, 23 May 2016 16:23:50 -0700 | |
4 | Subject: wait/ptrace: assume __WALL if the child is traced | |
5 | ||
6 | From: Oleg Nesterov <oleg@redhat.com> | |
7 | ||
8 | commit bf959931ddb88c4e4366e96dd22e68fa0db9527c upstream. | |
9 | ||
10 | The following program (simplified version of generated by syzkaller) | |
11 | ||
12 | #include <pthread.h> | |
13 | #include <unistd.h> | |
14 | #include <sys/ptrace.h> | |
15 | #include <stdio.h> | |
16 | #include <signal.h> | |
17 | ||
18 | void *thread_func(void *arg) | |
19 | { | |
20 | ptrace(PTRACE_TRACEME, 0,0,0); | |
21 | return 0; | |
22 | } | |
23 | ||
24 | int main(void) | |
25 | { | |
26 | pthread_t thread; | |
27 | ||
28 | if (fork()) | |
29 | return 0; | |
30 | ||
31 | while (getppid() != 1) | |
32 | ; | |
33 | ||
34 | pthread_create(&thread, NULL, thread_func, NULL); | |
35 | pthread_join(thread, NULL); | |
36 | return 0; | |
37 | } | |
38 | ||
39 | creates an unreapable zombie if /sbin/init doesn't use __WALL. | |
40 | ||
41 | This is not a kernel bug, at least in a sense that everything works as | |
42 | expected: debugger should reap a traced sub-thread before it can reap the | |
43 | leader, but without __WALL/__WCLONE do_wait() ignores sub-threads. | |
44 | ||
45 | Unfortunately, it seems that /sbin/init in most (all?) distributions | |
46 | doesn't use it and we have to change the kernel to avoid the problem. | |
47 | Note also that most init's use sys_waitid() which doesn't allow __WALL, so | |
48 | the necessary user-space fix is not that trivial. | |
49 | ||
50 | This patch just adds the "ptrace" check into eligible_child(). To some | |
51 | degree this matches the "tsk->ptrace" in exit_notify(), ->exit_signal is | |
52 | mostly ignored when the tracee reports to debugger. Or WSTOPPED, the | |
53 | tracer doesn't need to set this flag to wait for the stopped tracee. | |
54 | ||
55 | This obviously means the user-visible change: __WCLONE and __WALL no | |
56 | longer have any meaning for debugger. And I can only hope that this won't | |
57 | break something, but at least strace/gdb won't suffer. | |
58 | ||
59 | We could make a more conservative change. Say, we can take __WCLONE into | |
60 | account, or !thread_group_leader(). But it would be nice to not | |
61 | complicate these historical/confusing checks. | |
62 | ||
63 | Signed-off-by: Oleg Nesterov <oleg@redhat.com> | |
64 | Reported-by: Dmitry Vyukov <dvyukov@google.com> | |
65 | Cc: Denys Vlasenko <dvlasenk@redhat.com> | |
66 | Cc: Jan Kratochvil <jan.kratochvil@redhat.com> | |
67 | Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> | |
68 | Cc: Pedro Alves <palves@redhat.com> | |
69 | Cc: Roland McGrath <roland@hack.frob.com> | |
70 | Cc: <syzkaller@googlegroups.com> | |
71 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
72 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
73 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
74 | ||
75 | --- | |
76 | kernel/exit.c | 29 ++++++++++++++++++++--------- | |
77 | 1 file changed, 20 insertions(+), 9 deletions(-) | |
78 | ||
79 | --- a/kernel/exit.c | |
80 | +++ b/kernel/exit.c | |
81 | @@ -918,17 +918,28 @@ static int eligible_pid(struct wait_opts | |
82 | task_pid_type(p, wo->wo_type) == wo->wo_pid; | |
83 | } | |
84 | ||
85 | -static int eligible_child(struct wait_opts *wo, struct task_struct *p) | |
86 | +static int | |
87 | +eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p) | |
88 | { | |
89 | if (!eligible_pid(wo, p)) | |
90 | return 0; | |
91 | - /* Wait for all children (clone and not) if __WALL is set; | |
92 | - * otherwise, wait for clone children *only* if __WCLONE is | |
93 | - * set; otherwise, wait for non-clone children *only*. (Note: | |
94 | - * A "clone" child here is one that reports to its parent | |
95 | - * using a signal other than SIGCHLD.) */ | |
96 | - if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) | |
97 | - && !(wo->wo_flags & __WALL)) | |
98 | + | |
99 | + /* | |
100 | + * Wait for all children (clone and not) if __WALL is set or | |
101 | + * if it is traced by us. | |
102 | + */ | |
103 | + if (ptrace || (wo->wo_flags & __WALL)) | |
104 | + return 1; | |
105 | + | |
106 | + /* | |
107 | + * Otherwise, wait for clone children *only* if __WCLONE is set; | |
108 | + * otherwise, wait for non-clone children *only*. | |
109 | + * | |
110 | + * Note: a "clone" child here is one that reports to its parent | |
111 | + * using a signal other than SIGCHLD, or a non-leader thread which | |
112 | + * we can only see if it is traced by us. | |
113 | + */ | |
114 | + if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) | |
115 | return 0; | |
116 | ||
117 | return 1; | |
118 | @@ -1301,7 +1312,7 @@ static int wait_consider_task(struct wai | |
119 | if (unlikely(exit_state == EXIT_DEAD)) | |
120 | return 0; | |
121 | ||
122 | - ret = eligible_child(wo, p); | |
123 | + ret = eligible_child(wo, ptrace, p); | |
124 | if (!ret) | |
125 | return ret; | |
126 |