1 From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
2 Subject: LTTng instrumentation - fs
5 LTTng instrumentation - fs
7 Core filesystem tracepoints.
28 Instrument the core FS events, extracting the information when it is available.
29 e.g. the instrumentation of "reads" is inserted _after_ the read, when
30 the information is ready. This would not be possible if the instrumentation
31 would be placed elsewhere (at the beginning of the system call for instance).
33 Those tracepoints are used by LTTng.
35 About the performance impact of tracepoints (which is comparable to markers),
36 even without immediate values optimizations, tests done by Hideo Aoki on ia64
37 show no regression. His test case was using hackbench on a kernel where
38 scheduler instrumentation (about 5 events in code scheduler code) was added.
39 See the "Tracepoints" patch header for performance result detail.
41 Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
42 CC: Alexander Viro <viro@zeniv.linux.org.uk>
43 CC: Masami Hiramatsu <mhiramat@redhat.com>
44 CC: 'Peter Zijlstra' <peterz@infradead.org>
45 CC: "Frank Ch. Eigler" <fche@redhat.com>
46 CC: 'Ingo Molnar' <mingo@elte.hu>
47 CC: 'Hideo AOKI' <haoki@redhat.com>
48 CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
49 CC: 'Steven Rostedt' <rostedt@goodmis.org>
50 CC: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
52 Acked-by: Jan Blunck <jblunck@suse.de>
60 fs/read_write.c | 19 +++++++++++++--
62 include/trace/fs.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++
63 8 files changed, 98 insertions(+), 2 deletions(-)
68 #include <linux/bitops.h>
69 #include <linux/mpage.h>
70 #include <linux/bit_spinlock.h>
71 +#include <trace/fs.h>
73 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
75 @@ -89,7 +90,9 @@ void unlock_buffer(struct buffer_head *b
77 void __wait_on_buffer(struct buffer_head * bh)
79 + trace_fs_buffer_wait_start(bh);
80 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
81 + trace_fs_buffer_wait_end(bh);
88 #include <linux/poll.h>
90 #include <linux/eventpoll.h>
91 +#include <trace/fs.h>
93 #include <asm/uaccess.h>
94 #include <asm/mmu_context.h>
95 @@ -1411,6 +1412,7 @@ int compat_do_execve(char * filename,
97 retval = search_binary_handler(bprm, regs);
99 + trace_fs_exec(filename);
101 security_bprm_free(bprm);
102 acct_update_integrals(current);
106 #include <linux/cn_proc.h>
107 #include <linux/audit.h>
108 #include <linux/tracehook.h>
109 +#include <trace/fs.h>
111 #include <asm/uaccess.h>
112 #include <asm/mmu_context.h>
113 @@ -1353,6 +1354,7 @@ int do_execve(char * filename,
114 current->flags &= ~PF_KTHREAD;
115 retval = search_binary_handler(bprm,regs);
117 + trace_fs_exec(filename);
119 security_bprm_free(bprm);
120 acct_update_integrals(current);
124 #include <linux/security.h>
125 #include <linux/module.h>
126 #include <linux/uaccess.h>
127 +#include <trace/fs.h>
129 #include <asm/ioctls.h>
131 @@ -205,6 +206,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd,
135 + trace_fs_ioctl(fd, cmd, arg);
137 error = security_file_ioctl(filp, cmd, arg);
143 #include <linux/rcupdate.h>
144 #include <linux/audit.h>
145 #include <linux/falloc.h>
146 +#include <trace/fs.h>
148 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
150 @@ -1044,6 +1045,7 @@ long do_sys_open(int dfd, const char __u
151 fsnotify_open(f->f_path.dentry);
154 + trace_fs_open(fd, tmp);
158 @@ -1133,6 +1135,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
162 + trace_fs_close(fd);
163 rcu_assign_pointer(fdt->fd[fd], NULL);
164 FD_CLR(fd, fdt->close_on_exec);
165 __put_unused_fd(files, fd);
166 --- a/fs/read_write.c
167 +++ b/fs/read_write.c
169 #include <linux/syscalls.h>
170 #include <linux/pagemap.h>
171 #include <linux/splice.h>
172 +#include <trace/fs.h>
173 #include "read_write.h"
175 #include <asm/uaccess.h>
176 @@ -130,6 +131,9 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd,
177 if (res != (loff_t)retval)
178 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
181 + trace_fs_lseek(fd, offset, origin);
183 fput_light(file, fput_needed);
186 @@ -157,6 +161,8 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd
187 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
190 + trace_fs_llseek(fd, offset, origin);
192 retval = (int)offset;
195 @@ -344,6 +350,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd,
197 loff_t pos = file_pos_read(file);
198 ret = vfs_read(file, buf, count, &pos);
199 + trace_fs_read(fd, buf, count, ret);
200 file_pos_write(file, pos);
201 fput_light(file, fput_needed);
203 @@ -362,6 +369,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd,
205 loff_t pos = file_pos_read(file);
206 ret = vfs_write(file, buf, count, &pos);
207 + trace_fs_write(fd, buf, count, ret);
208 file_pos_write(file, pos);
209 fput_light(file, fput_needed);
211 @@ -382,8 +390,11 @@ SYSCALL_DEFINE(pread64)(unsigned int fd,
212 file = fget_light(fd, &fput_needed);
215 - if (file->f_mode & FMODE_PREAD)
216 + if (file->f_mode & FMODE_PREAD) {
217 ret = vfs_read(file, buf, count, &pos);
218 + trace_fs_pread64(fd, buf, count, pos, ret);
221 fput_light(file, fput_needed);
224 @@ -411,8 +422,10 @@ SYSCALL_DEFINE(pwrite64)(unsigned int fd
225 file = fget_light(fd, &fput_needed);
228 - if (file->f_mode & FMODE_PWRITE)
229 + if (file->f_mode & FMODE_PWRITE) {
230 ret = vfs_write(file, buf, count, &pos);
231 + trace_fs_pwrite64(fd, buf, count, pos, ret);
233 fput_light(file, fput_needed);
236 @@ -665,6 +678,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd
238 loff_t pos = file_pos_read(file);
239 ret = vfs_readv(file, vec, vlen, &pos);
240 + trace_fs_readv(fd, vec, vlen, ret);
241 file_pos_write(file, pos);
242 fput_light(file, fput_needed);
244 @@ -686,6 +700,7 @@ SYSCALL_DEFINE3(writev, unsigned long, f
246 loff_t pos = file_pos_read(file);
247 ret = vfs_writev(file, vec, vlen, &pos);
248 + trace_fs_writev(fd, vec, vlen, ret);
249 file_pos_write(file, pos);
250 fput_light(file, fput_needed);
255 #include <linux/fdtable.h>
256 #include <linux/fs.h>
257 #include <linux/rcupdate.h>
258 +#include <trace/fs.h>
260 #include <asm/uaccess.h>
262 @@ -232,6 +233,7 @@ int do_select(int n, fd_set_bits *fds, s
263 file = fget_light(i, &fput_needed);
266 + trace_fs_select(i, *timeout);
267 mask = DEFAULT_POLLMASK;
268 if (f_op && f_op->poll)
269 mask = (*f_op->poll)(file, retval ? NULL : wait);
270 @@ -561,6 +563,7 @@ static inline unsigned int do_pollfd(str
271 file = fget_light(fd, &fput_needed);
275 mask = DEFAULT_POLLMASK;
276 if (file->f_op && file->f_op->poll)
277 mask = file->f_op->poll(file, pwait);
279 +++ b/include/trace/fs.h
284 +#include <linux/buffer_head.h>
285 +#include <linux/tracepoint.h>
287 +DEFINE_TRACE(fs_buffer_wait_start,
288 + TPPROTO(struct buffer_head *bh),
290 +DEFINE_TRACE(fs_buffer_wait_end,
291 + TPPROTO(struct buffer_head *bh),
293 +DEFINE_TRACE(fs_exec,
294 + TPPROTO(char *filename),
296 +DEFINE_TRACE(fs_ioctl,
297 + TPPROTO(unsigned int fd, unsigned int cmd, unsigned long arg),
298 + TPARGS(fd, cmd, arg));
299 +DEFINE_TRACE(fs_open,
300 + TPPROTO(int fd, char *filename),
301 + TPARGS(fd, filename));
302 +DEFINE_TRACE(fs_close,
303 + TPPROTO(unsigned int fd),
305 +DEFINE_TRACE(fs_lseek,
306 + TPPROTO(unsigned int fd, long offset, unsigned int origin),
307 + TPARGS(fd, offset, origin));
308 +DEFINE_TRACE(fs_llseek,
309 + TPPROTO(unsigned int fd, loff_t offset, unsigned int origin),
310 + TPARGS(fd, offset, origin));
313 + * Probes must be aware that __user * may be modified by concurrent userspace
314 + * or kernel threads.
316 +DEFINE_TRACE(fs_read,
317 + TPPROTO(unsigned int fd, char __user *buf, size_t count, ssize_t ret),
318 + TPARGS(fd, buf, count, ret));
319 +DEFINE_TRACE(fs_write,
320 + TPPROTO(unsigned int fd, const char __user *buf, size_t count,
322 + TPARGS(fd, buf, count, ret));
323 +DEFINE_TRACE(fs_pread64,
324 + TPPROTO(unsigned int fd, char __user *buf, size_t count, loff_t pos,
326 + TPARGS(fd, buf, count, pos, ret));
327 +DEFINE_TRACE(fs_pwrite64,
328 + TPPROTO(unsigned int fd, const char __user *buf, size_t count,
329 + loff_t pos, ssize_t ret),
330 + TPARGS(fd, buf, count, pos, ret));
331 +DEFINE_TRACE(fs_readv,
332 + TPPROTO(unsigned long fd, const struct iovec __user *vec,
333 + unsigned long vlen, ssize_t ret),
334 + TPARGS(fd, vec, vlen, ret));
335 +DEFINE_TRACE(fs_writev,
336 + TPPROTO(unsigned long fd, const struct iovec __user *vec,
337 + unsigned long vlen, ssize_t ret),
338 + TPARGS(fd, vec, vlen, ret));
339 +DEFINE_TRACE(fs_select,
340 + TPPROTO(int fd, s64 timeout),
341 + TPARGS(fd, timeout));
342 +DEFINE_TRACE(fs_poll,