]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> |
2 | Subject: LTTng instrumentation - scheduler | |
3 | ||
4 | Original patch header: | |
5 | LTTng instrumentation - scheduler | |
6 | ||
7 | Instrument the scheduler activity (sched_switch, migration, wakeups, wait for a | |
8 | task, signal delivery) and process/thread creation/destruction (fork, exit, | |
9 | kthread stop). Actually, kthread creation is not instrumented in this patch | |
10 | because it is architecture dependent. It allows to connect tracers such as | |
11 | ftrace which detects scheduling latencies, good/bad scheduler decisions. Tools | |
12 | like LTTng can export this scheduler information along with instrumentation of | |
13 | the rest of the kernel activity to perform post-mortem analysis on the scheduler | |
14 | activity. | |
15 | ||
16 | About the performance impact of tracepoints (which is comparable to markers), | |
17 | even without immediate values optimizations, tests done by Hideo Aoki on ia64 | |
18 | show no regression. His test case was using hackbench on a kernel where | |
19 | scheduler instrumentation (about 5 events in code scheduler code) was added. | |
20 | See the "Tracepoints" patch header for performance result detail. | |
21 | ||
22 | Changelog : | |
23 | - Change instrumentation location and parameter to match ftrace instrumentation, | |
24 | previously done with kernel markers. | |
25 | ||
26 | Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | |
27 | CC: 'Peter Zijlstra' <peterz@infradead.org> | |
28 | CC: 'Steven Rostedt' <rostedt@goodmis.org> | |
29 | CC: Thomas Gleixner <tglx@linutronix.de> | |
30 | CC: Masami Hiramatsu <mhiramat@redhat.com> | |
31 | CC: "Frank Ch. Eigler" <fche@redhat.com> | |
32 | CC: 'Ingo Molnar' <mingo@elte.hu> | |
33 | CC: 'Hideo AOKI' <haoki@redhat.com> | |
34 | CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com> | |
35 | CC: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> | |
36 | ||
37 | Acked-by: Jan Blunck <jblunck@suse.de> | |
38 | --- | |
39 | --- | |
40 | include/trace/sched.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ | |
41 | kernel/exit.c | 10 +++++++++- | |
42 | kernel/fork.c | 3 +++ | |
43 | kernel/kthread.c | 5 +++++ | |
44 | kernel/sched.c | 17 ++++++----------- | |
45 | kernel/signal.c | 3 +++ | |
46 | 6 files changed, 71 insertions(+), 12 deletions(-) | |
47 | ||
48 | --- /dev/null | |
49 | +++ b/include/trace/sched.h | |
50 | @@ -0,0 +1,45 @@ | |
51 | +#ifndef _TRACE_SCHED_H | |
52 | +#define _TRACE_SCHED_H | |
53 | + | |
54 | +#include <linux/sched.h> | |
55 | +#include <linux/tracepoint.h> | |
56 | + | |
57 | +DEFINE_TRACE(sched_kthread_stop, | |
58 | + TPPROTO(struct task_struct *t), | |
59 | + TPARGS(t)); | |
60 | +DEFINE_TRACE(sched_kthread_stop_ret, | |
61 | + TPPROTO(int ret), | |
62 | + TPARGS(ret)); | |
63 | +DEFINE_TRACE(sched_wait_task, | |
64 | + TPPROTO(struct rq *rq, struct task_struct *p), | |
65 | + TPARGS(rq, p)); | |
66 | +DEFINE_TRACE(sched_wakeup, | |
67 | + TPPROTO(struct rq *rq, struct task_struct *p), | |
68 | + TPARGS(rq, p)); | |
69 | +DEFINE_TRACE(sched_wakeup_new, | |
70 | + TPPROTO(struct rq *rq, struct task_struct *p), | |
71 | + TPARGS(rq, p)); | |
72 | +DEFINE_TRACE(sched_switch, | |
73 | + TPPROTO(struct rq *rq, struct task_struct *prev, | |
74 | + struct task_struct *next), | |
75 | + TPARGS(rq, prev, next)); | |
76 | +DEFINE_TRACE(sched_migrate_task, | |
77 | + TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), | |
78 | + TPARGS(rq, p, dest_cpu)); | |
79 | +DEFINE_TRACE(sched_process_free, | |
80 | + TPPROTO(struct task_struct *p), | |
81 | + TPARGS(p)); | |
82 | +DEFINE_TRACE(sched_process_exit, | |
83 | + TPPROTO(struct task_struct *p), | |
84 | + TPARGS(p)); | |
85 | +DEFINE_TRACE(sched_process_wait, | |
86 | + TPPROTO(struct pid *pid), | |
87 | + TPARGS(pid)); | |
88 | +DEFINE_TRACE(sched_process_fork, | |
89 | + TPPROTO(struct task_struct *parent, struct task_struct *child), | |
90 | + TPARGS(parent, child)); | |
91 | +DEFINE_TRACE(sched_signal_send, | |
92 | + TPPROTO(int sig, struct task_struct *p), | |
93 | + TPARGS(sig, p)); | |
94 | + | |
95 | +#endif | |
96 | --- a/kernel/exit.c | |
97 | +++ b/kernel/exit.c | |
98 | @@ -50,6 +50,7 @@ | |
99 | #include <linux/blkdev.h> | |
100 | #include <linux/task_io_accounting_ops.h> | |
101 | #include <linux/tracehook.h> | |
102 | +#include <trace/sched.h> | |
103 | ||
104 | #include <asm/uaccess.h> | |
105 | #include <asm/unistd.h> | |
106 | @@ -152,7 +153,10 @@ static void __exit_signal(struct task_st | |
107 | ||
108 | static void delayed_put_task_struct(struct rcu_head *rhp) | |
109 | { | |
110 | - put_task_struct(container_of(rhp, struct task_struct, rcu)); | |
111 | + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | |
112 | + | |
113 | + trace_sched_process_free(tsk); | |
114 | + put_task_struct(tsk); | |
115 | } | |
116 | ||
117 | ||
118 | @@ -1076,6 +1080,8 @@ NORET_TYPE void do_exit(long code) | |
119 | ||
120 | if (group_dead) | |
121 | acct_process(); | |
122 | + trace_sched_process_exit(tsk); | |
123 | + | |
124 | exit_sem(tsk); | |
125 | exit_files(tsk); | |
126 | exit_fs(tsk); | |
127 | @@ -1679,6 +1685,8 @@ static long do_wait(enum pid_type type, | |
128 | struct task_struct *tsk; | |
129 | int retval; | |
130 | ||
131 | + trace_sched_process_wait(pid); | |
132 | + | |
133 | add_wait_queue(¤t->signal->wait_chldexit,&wait); | |
134 | repeat: | |
135 | /* | |
136 | --- a/kernel/fork.c | |
137 | +++ b/kernel/fork.c | |
138 | @@ -58,6 +58,7 @@ | |
139 | #include <linux/tty.h> | |
140 | #include <linux/proc_fs.h> | |
141 | #include <linux/blkdev.h> | |
142 | +#include <trace/sched.h> | |
143 | ||
144 | #include <asm/pgtable.h> | |
145 | #include <asm/pgalloc.h> | |
146 | @@ -1363,6 +1364,8 @@ long do_fork(unsigned long clone_flags, | |
147 | if (!IS_ERR(p)) { | |
148 | struct completion vfork; | |
149 | ||
150 | + trace_sched_process_fork(current, p); | |
151 | + | |
152 | nr = task_pid_vnr(p); | |
153 | ||
154 | if (clone_flags & CLONE_PARENT_SETTID) | |
155 | --- a/kernel/kthread.c | |
156 | +++ b/kernel/kthread.c | |
157 | @@ -13,6 +13,7 @@ | |
158 | #include <linux/file.h> | |
159 | #include <linux/module.h> | |
160 | #include <linux/mutex.h> | |
161 | +#include <trace/sched.h> | |
162 | ||
163 | #define KTHREAD_NICE_LEVEL (-5) | |
164 | ||
165 | @@ -206,6 +207,8 @@ int kthread_stop(struct task_struct *k) | |
166 | /* It could exit after stop_info.k set, but before wake_up_process. */ | |
167 | get_task_struct(k); | |
168 | ||
169 | + trace_sched_kthread_stop(k); | |
170 | + | |
171 | /* Must init completion *before* thread sees kthread_stop_info.k */ | |
172 | init_completion(&kthread_stop_info.done); | |
173 | smp_wmb(); | |
174 | @@ -221,6 +224,8 @@ int kthread_stop(struct task_struct *k) | |
175 | ret = kthread_stop_info.err; | |
176 | mutex_unlock(&kthread_stop_lock); | |
177 | ||
178 | + trace_sched_kthread_stop_ret(ret); | |
179 | + | |
180 | return ret; | |
181 | } | |
182 | EXPORT_SYMBOL(kthread_stop); | |
183 | --- a/kernel/sched.c | |
184 | +++ b/kernel/sched.c | |
185 | @@ -71,6 +71,7 @@ | |
186 | #include <linux/debugfs.h> | |
187 | #include <linux/ctype.h> | |
188 | #include <linux/ftrace.h> | |
189 | +#include <trace/sched.h> | |
190 | #include <linux/perfmon_kern.h> | |
191 | ||
192 | #include <asm/tlb.h> | |
193 | @@ -1914,6 +1915,7 @@ unsigned long wait_task_inactive(struct | |
194 | * just go back and repeat. | |
195 | */ | |
196 | rq = task_rq_lock(p, &flags); | |
197 | + trace_sched_wait_task(rq, p); | |
198 | running = task_running(rq, p); | |
199 | on_rq = p->se.on_rq; | |
200 | ncsw = 0; | |
201 | @@ -2306,9 +2308,7 @@ out_activate: | |
202 | success = 1; | |
203 | ||
204 | out_running: | |
205 | - trace_mark(kernel_sched_wakeup, | |
206 | - "pid %d state %ld ## rq %p task %p rq->curr %p", | |
207 | - p->pid, p->state, rq, p, rq->curr); | |
208 | + trace_sched_wakeup(rq, p); | |
209 | check_preempt_curr(rq, p); | |
210 | ||
211 | p->state = TASK_RUNNING; | |
212 | @@ -2441,9 +2441,7 @@ void wake_up_new_task(struct task_struct | |
213 | p->sched_class->task_new(rq, p); | |
214 | inc_nr_running(rq); | |
215 | } | |
216 | - trace_mark(kernel_sched_wakeup_new, | |
217 | - "pid %d state %ld ## rq %p task %p rq->curr %p", | |
218 | - p->pid, p->state, rq, p, rq->curr); | |
219 | + trace_sched_wakeup_new(rq, p); | |
220 | check_preempt_curr(rq, p); | |
221 | #ifdef CONFIG_SMP | |
222 | if (p->sched_class->task_wake_up) | |
223 | @@ -2616,11 +2614,7 @@ context_switch(struct rq *rq, struct tas | |
224 | struct mm_struct *mm, *oldmm; | |
225 | ||
226 | prepare_task_switch(rq, prev, next); | |
227 | - trace_mark(kernel_sched_schedule, | |
228 | - "prev_pid %d next_pid %d prev_state %ld " | |
229 | - "## rq %p prev %p next %p", | |
230 | - prev->pid, next->pid, prev->state, | |
231 | - rq, prev, next); | |
232 | + trace_sched_switch(rq, prev, next); | |
233 | mm = next->mm; | |
234 | oldmm = prev->active_mm; | |
235 | /* | |
236 | @@ -2860,6 +2854,7 @@ static void sched_migrate_task(struct ta | |
237 | || unlikely(!cpu_active(dest_cpu))) | |
238 | goto out; | |
239 | ||
240 | + trace_sched_migrate_task(rq, p, dest_cpu); | |
241 | /* force the process onto the specified CPU */ | |
242 | if (migrate_task(p, dest_cpu, &req)) { | |
243 | /* Need to wait for migration thread (might exit: take ref). */ | |
244 | --- a/kernel/signal.c | |
245 | +++ b/kernel/signal.c | |
246 | @@ -27,6 +27,7 @@ | |
247 | #include <linux/freezer.h> | |
248 | #include <linux/pid_namespace.h> | |
249 | #include <linux/nsproxy.h> | |
250 | +#include <trace/sched.h> | |
251 | ||
252 | #include <asm/param.h> | |
253 | #include <asm/uaccess.h> | |
254 | @@ -803,6 +804,8 @@ static int send_signal(int sig, struct s | |
255 | struct sigpending *pending; | |
256 | struct sigqueue *q; | |
257 | ||
258 | + trace_sched_signal_send(sig, t); | |
259 | + | |
260 | assert_spin_locked(&t->sighand->siglock); | |
261 | if (!prepare_signal(sig, t)) | |
262 | return 0; |