1 From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
2 Subject: LTTng instrumentation - scheduler
5 LTTng instrumentation - scheduler
7 Instrument the scheduler activity (sched_switch, migration, wakeups, wait for a
8 task, signal delivery) and process/thread creation/destruction (fork, exit,
9 kthread stop). Actually, kthread creation is not instrumented in this patch
10 because it is architecture dependent. It allows to connect tracers such as
11 ftrace which detects scheduling latencies, good/bad scheduler decisions. Tools
12 like LTTng can export this scheduler information along with instrumentation of
13 the rest of the kernel activity to perform post-mortem analysis on the scheduler
16 About the performance impact of tracepoints (which is comparable to markers),
17 even without immediate values optimizations, tests done by Hideo Aoki on ia64
18 show no regression. His test case was using hackbench on a kernel where
19 scheduler instrumentation (about 5 events in code scheduler code) was added.
20 See the "Tracepoints" patch header for performance result detail.
23 - Change instrumentation location and parameter to match ftrace instrumentation,
24 previously done with kernel markers.
26 Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
27 CC: 'Peter Zijlstra' <peterz@infradead.org>
28 CC: 'Steven Rostedt' <rostedt@goodmis.org>
29 CC: Thomas Gleixner <tglx@linutronix.de>
30 CC: Masami Hiramatsu <mhiramat@redhat.com>
31 CC: "Frank Ch. Eigler" <fche@redhat.com>
32 CC: 'Ingo Molnar' <mingo@elte.hu>
33 CC: 'Hideo AOKI' <haoki@redhat.com>
34 CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
35 CC: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
37 Acked-by: Jan Blunck <jblunck@suse.de>
40 include/trace/sched.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
41 kernel/exit.c | 10 +++++++++-
43 kernel/kthread.c | 5 +++++
44 kernel/sched.c | 17 ++++++-----------
45 kernel/signal.c | 3 +++
46 6 files changed, 71 insertions(+), 12 deletions(-)
49 +++ b/include/trace/sched.h
51 +#ifndef _TRACE_SCHED_H
52 +#define _TRACE_SCHED_H
54 +#include <linux/sched.h>
55 +#include <linux/tracepoint.h>
57 +DEFINE_TRACE(sched_kthread_stop,
58 + TPPROTO(struct task_struct *t),
60 +DEFINE_TRACE(sched_kthread_stop_ret,
63 +DEFINE_TRACE(sched_wait_task,
64 + TPPROTO(struct rq *rq, struct task_struct *p),
66 +DEFINE_TRACE(sched_wakeup,
67 + TPPROTO(struct rq *rq, struct task_struct *p),
69 +DEFINE_TRACE(sched_wakeup_new,
70 + TPPROTO(struct rq *rq, struct task_struct *p),
72 +DEFINE_TRACE(sched_switch,
73 + TPPROTO(struct rq *rq, struct task_struct *prev,
74 + struct task_struct *next),
75 + TPARGS(rq, prev, next));
76 +DEFINE_TRACE(sched_migrate_task,
77 + TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
78 + TPARGS(rq, p, dest_cpu));
79 +DEFINE_TRACE(sched_process_free,
80 + TPPROTO(struct task_struct *p),
82 +DEFINE_TRACE(sched_process_exit,
83 + TPPROTO(struct task_struct *p),
85 +DEFINE_TRACE(sched_process_wait,
86 + TPPROTO(struct pid *pid),
88 +DEFINE_TRACE(sched_process_fork,
89 + TPPROTO(struct task_struct *parent, struct task_struct *child),
90 + TPARGS(parent, child));
91 +DEFINE_TRACE(sched_signal_send,
92 + TPPROTO(int sig, struct task_struct *p),
99 #include <linux/blkdev.h>
100 #include <linux/task_io_accounting_ops.h>
101 #include <linux/tracehook.h>
102 +#include <trace/sched.h>
104 #include <asm/uaccess.h>
105 #include <asm/unistd.h>
106 @@ -152,7 +153,10 @@ static void __exit_signal(struct task_st
108 static void delayed_put_task_struct(struct rcu_head *rhp)
110 - put_task_struct(container_of(rhp, struct task_struct, rcu));
111 + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
113 + trace_sched_process_free(tsk);
114 + put_task_struct(tsk);
118 @@ -1076,6 +1080,8 @@ NORET_TYPE void do_exit(long code)
122 + trace_sched_process_exit(tsk);
127 @@ -1679,6 +1685,8 @@ static long do_wait(enum pid_type type,
128 struct task_struct *tsk;
131 + trace_sched_process_wait(pid);
133 add_wait_queue(¤t->signal->wait_chldexit,&wait);
139 #include <linux/tty.h>
140 #include <linux/proc_fs.h>
141 #include <linux/blkdev.h>
142 +#include <trace/sched.h>
144 #include <asm/pgtable.h>
145 #include <asm/pgalloc.h>
146 @@ -1363,6 +1364,8 @@ long do_fork(unsigned long clone_flags,
148 struct completion vfork;
150 + trace_sched_process_fork(current, p);
152 nr = task_pid_vnr(p);
154 if (clone_flags & CLONE_PARENT_SETTID)
155 --- a/kernel/kthread.c
156 +++ b/kernel/kthread.c
158 #include <linux/file.h>
159 #include <linux/module.h>
160 #include <linux/mutex.h>
161 +#include <trace/sched.h>
163 #define KTHREAD_NICE_LEVEL (-5)
165 @@ -206,6 +207,8 @@ int kthread_stop(struct task_struct *k)
166 /* It could exit after stop_info.k set, but before wake_up_process. */
169 + trace_sched_kthread_stop(k);
171 /* Must init completion *before* thread sees kthread_stop_info.k */
172 init_completion(&kthread_stop_info.done);
174 @@ -221,6 +224,8 @@ int kthread_stop(struct task_struct *k)
175 ret = kthread_stop_info.err;
176 mutex_unlock(&kthread_stop_lock);
178 + trace_sched_kthread_stop_ret(ret);
182 EXPORT_SYMBOL(kthread_stop);
186 #include <linux/debugfs.h>
187 #include <linux/ctype.h>
188 #include <linux/ftrace.h>
189 +#include <trace/sched.h>
190 #include <linux/perfmon_kern.h>
193 @@ -1914,6 +1915,7 @@ unsigned long wait_task_inactive(struct
194 * just go back and repeat.
196 rq = task_rq_lock(p, &flags);
197 + trace_sched_wait_task(rq, p);
198 running = task_running(rq, p);
201 @@ -2306,9 +2308,7 @@ out_activate:
205 - trace_mark(kernel_sched_wakeup,
206 - "pid %d state %ld ## rq %p task %p rq->curr %p",
207 - p->pid, p->state, rq, p, rq->curr);
208 + trace_sched_wakeup(rq, p);
209 check_preempt_curr(rq, p);
211 p->state = TASK_RUNNING;
212 @@ -2441,9 +2441,7 @@ void wake_up_new_task(struct task_struct
213 p->sched_class->task_new(rq, p);
216 - trace_mark(kernel_sched_wakeup_new,
217 - "pid %d state %ld ## rq %p task %p rq->curr %p",
218 - p->pid, p->state, rq, p, rq->curr);
219 + trace_sched_wakeup_new(rq, p);
220 check_preempt_curr(rq, p);
222 if (p->sched_class->task_wake_up)
223 @@ -2616,11 +2614,7 @@ context_switch(struct rq *rq, struct tas
224 struct mm_struct *mm, *oldmm;
226 prepare_task_switch(rq, prev, next);
227 - trace_mark(kernel_sched_schedule,
228 - "prev_pid %d next_pid %d prev_state %ld "
229 - "## rq %p prev %p next %p",
230 - prev->pid, next->pid, prev->state,
232 + trace_sched_switch(rq, prev, next);
234 oldmm = prev->active_mm;
236 @@ -2860,6 +2854,7 @@ static void sched_migrate_task(struct ta
237 || unlikely(!cpu_active(dest_cpu)))
240 + trace_sched_migrate_task(rq, p, dest_cpu);
241 /* force the process onto the specified CPU */
242 if (migrate_task(p, dest_cpu, &req)) {
243 /* Need to wait for migration thread (might exit: take ref). */
244 --- a/kernel/signal.c
245 +++ b/kernel/signal.c
247 #include <linux/freezer.h>
248 #include <linux/pid_namespace.h>
249 #include <linux/nsproxy.h>
250 +#include <trace/sched.h>
252 #include <asm/param.h>
253 #include <asm/uaccess.h>
254 @@ -803,6 +804,8 @@ static int send_signal(int sig, struct s
255 struct sigpending *pending;
258 + trace_sched_signal_send(sig, t);
260 assert_spin_locked(&t->sighand->siglock);
261 if (!prepare_signal(sig, t))