]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> |
2 | Subject: LTTng instrumentation - fs | |
3 | ||
4 | Original patch header: | |
5 | LTTng instrumentation - fs | |
6 | ||
7 | Core filesystem tracepoints. | |
8 | ||
9 | Tracepoints added : | |
10 | ||
11 | fs_buffer_wait_end | |
12 | fs_buffer_wait_start | |
13 | fs_close | |
14 | fs_exec | |
15 | fs_ioctl | |
16 | fs_llseek | |
17 | fs_lseek | |
18 | fs_open | |
19 | fs_poll | |
20 | fs_pread64 | |
21 | fs_pwrite64 | |
22 | fs_read | |
23 | fs_readv | |
24 | fs_select | |
25 | fs_write | |
26 | fs_writev | |
27 | ||
28 | Instrument the core FS events, extracting the information when it is available. | |
29 | e.g. the instrumentation of "reads" is inserted _after_ the read, when | |
30 | the information is ready. This would not be possible if the instrumentation | |
31 | would be placed elsewhere (at the beginning of the system call for instance). | |
32 | ||
33 | Those tracepoints are used by LTTng. | |
34 | ||
35 | About the performance impact of tracepoints (which is comparable to markers), | |
36 | even without immediate values optimizations, tests done by Hideo Aoki on ia64 | |
37 | show no regression. His test case was using hackbench on a kernel where | |
38 | scheduler instrumentation (about 5 events in code scheduler code) was added. | |
39 | See the "Tracepoints" patch header for performance result detail. | |
40 | ||
41 | Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | |
42 | CC: Alexander Viro <viro@zeniv.linux.org.uk> | |
43 | CC: Masami Hiramatsu <mhiramat@redhat.com> | |
44 | CC: 'Peter Zijlstra' <peterz@infradead.org> | |
45 | CC: "Frank Ch. Eigler" <fche@redhat.com> | |
46 | CC: 'Ingo Molnar' <mingo@elte.hu> | |
47 | CC: 'Hideo AOKI' <haoki@redhat.com> | |
48 | CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com> | |
49 | CC: 'Steven Rostedt' <rostedt@goodmis.org> | |
50 | CC: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> | |
51 | ||
52 | Acked-by: Jan Blunck <jblunck@suse.de> | |
53 | --- | |
54 | --- | |
55 | fs/buffer.c | 3 ++ | |
56 | fs/compat.c | 2 + | |
57 | fs/exec.c | 2 + | |
58 | fs/ioctl.c | 3 ++ | |
59 | fs/open.c | 3 ++ | |
60 | fs/read_write.c | 19 +++++++++++++-- | |
61 | fs/select.c | 3 ++ | |
62 | include/trace/fs.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
63 | 8 files changed, 98 insertions(+), 2 deletions(-) | |
64 | ||
65 | --- a/fs/buffer.c | |
66 | +++ b/fs/buffer.c | |
67 | @@ -41,6 +41,7 @@ | |
68 | #include <linux/bitops.h> | |
69 | #include <linux/mpage.h> | |
70 | #include <linux/bit_spinlock.h> | |
71 | +#include <trace/fs.h> | |
72 | ||
73 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); | |
74 | ||
75 | @@ -89,7 +90,9 @@ void unlock_buffer(struct buffer_head *b | |
76 | */ | |
77 | void __wait_on_buffer(struct buffer_head * bh) | |
78 | { | |
79 | + trace_fs_buffer_wait_start(bh); | |
80 | wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); | |
81 | + trace_fs_buffer_wait_end(bh); | |
82 | } | |
83 | ||
84 | static void | |
85 | --- a/fs/compat.c | |
86 | +++ b/fs/compat.c | |
87 | @@ -51,6 +51,7 @@ | |
88 | #include <linux/poll.h> | |
89 | #include <linux/mm.h> | |
90 | #include <linux/eventpoll.h> | |
91 | +#include <trace/fs.h> | |
92 | ||
93 | #include <asm/uaccess.h> | |
94 | #include <asm/mmu_context.h> | |
95 | @@ -1411,6 +1412,7 @@ int compat_do_execve(char * filename, | |
96 | ||
97 | retval = search_binary_handler(bprm, regs); | |
98 | if (retval >= 0) { | |
99 | + trace_fs_exec(filename); | |
100 | /* execve success */ | |
101 | security_bprm_free(bprm); | |
102 | acct_update_integrals(current); | |
103 | --- a/fs/exec.c | |
104 | +++ b/fs/exec.c | |
105 | @@ -50,6 +50,7 @@ | |
106 | #include <linux/cn_proc.h> | |
107 | #include <linux/audit.h> | |
108 | #include <linux/tracehook.h> | |
109 | +#include <trace/fs.h> | |
110 | ||
111 | #include <asm/uaccess.h> | |
112 | #include <asm/mmu_context.h> | |
113 | @@ -1353,6 +1354,7 @@ int do_execve(char * filename, | |
114 | current->flags &= ~PF_KTHREAD; | |
115 | retval = search_binary_handler(bprm,regs); | |
116 | if (retval >= 0) { | |
117 | + trace_fs_exec(filename); | |
118 | /* execve success */ | |
119 | security_bprm_free(bprm); | |
120 | acct_update_integrals(current); | |
121 | --- a/fs/ioctl.c | |
122 | +++ b/fs/ioctl.c | |
123 | @@ -13,6 +13,7 @@ | |
124 | #include <linux/security.h> | |
125 | #include <linux/module.h> | |
126 | #include <linux/uaccess.h> | |
127 | +#include <trace/fs.h> | |
128 | ||
129 | #include <asm/ioctls.h> | |
130 | ||
131 | @@ -205,6 +206,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, | |
132 | if (!filp) | |
133 | goto out; | |
134 | ||
135 | + trace_fs_ioctl(fd, cmd, arg); | |
136 | + | |
137 | error = security_file_ioctl(filp, cmd, arg); | |
138 | if (error) | |
139 | goto out_fput; | |
140 | --- a/fs/open.c | |
141 | +++ b/fs/open.c | |
142 | @@ -29,6 +29,7 @@ | |
143 | #include <linux/rcupdate.h> | |
144 | #include <linux/audit.h> | |
145 | #include <linux/falloc.h> | |
146 | +#include <trace/fs.h> | |
147 | ||
148 | int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |
149 | { | |
150 | @@ -1044,6 +1045,7 @@ long do_sys_open(int dfd, const char __u | |
151 | fsnotify_open(f->f_path.dentry); | |
152 | fd_install(fd, f); | |
153 | } | |
154 | + trace_fs_open(fd, tmp); | |
155 | } | |
156 | putname(tmp); | |
157 | } | |
158 | @@ -1133,6 +1135,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd) | |
159 | filp = fdt->fd[fd]; | |
160 | if (!filp) | |
161 | goto out_unlock; | |
162 | + trace_fs_close(fd); | |
163 | rcu_assign_pointer(fdt->fd[fd], NULL); | |
164 | FD_CLR(fd, fdt->close_on_exec); | |
165 | __put_unused_fd(files, fd); | |
166 | --- a/fs/read_write.c | |
167 | +++ b/fs/read_write.c | |
168 | @@ -16,6 +16,7 @@ | |
169 | #include <linux/syscalls.h> | |
170 | #include <linux/pagemap.h> | |
171 | #include <linux/splice.h> | |
172 | +#include <trace/fs.h> | |
173 | #include "read_write.h" | |
174 | ||
175 | #include <asm/uaccess.h> | |
176 | @@ -130,6 +131,9 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, | |
177 | if (res != (loff_t)retval) | |
178 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ | |
179 | } | |
180 | + | |
181 | + trace_fs_lseek(fd, offset, origin); | |
182 | + | |
183 | fput_light(file, fput_needed); | |
184 | bad: | |
185 | return retval; | |
186 | @@ -157,6 +161,8 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd | |
187 | offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, | |
188 | origin); | |
189 | ||
190 | + trace_fs_llseek(fd, offset, origin); | |
191 | + | |
192 | retval = (int)offset; | |
193 | if (offset >= 0) { | |
194 | retval = -EFAULT; | |
195 | @@ -344,6 +350,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, | |
196 | if (file) { | |
197 | loff_t pos = file_pos_read(file); | |
198 | ret = vfs_read(file, buf, count, &pos); | |
199 | + trace_fs_read(fd, buf, count, ret); | |
200 | file_pos_write(file, pos); | |
201 | fput_light(file, fput_needed); | |
202 | } | |
203 | @@ -362,6 +369,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, | |
204 | if (file) { | |
205 | loff_t pos = file_pos_read(file); | |
206 | ret = vfs_write(file, buf, count, &pos); | |
207 | + trace_fs_write(fd, buf, count, ret); | |
208 | file_pos_write(file, pos); | |
209 | fput_light(file, fput_needed); | |
210 | } | |
211 | @@ -382,8 +390,11 @@ SYSCALL_DEFINE(pread64)(unsigned int fd, | |
212 | file = fget_light(fd, &fput_needed); | |
213 | if (file) { | |
214 | ret = -ESPIPE; | |
215 | - if (file->f_mode & FMODE_PREAD) | |
216 | + if (file->f_mode & FMODE_PREAD) { | |
217 | ret = vfs_read(file, buf, count, &pos); | |
218 | + trace_fs_pread64(fd, buf, count, pos, ret); | |
219 | + } | |
220 | + | |
221 | fput_light(file, fput_needed); | |
222 | } | |
223 | ||
224 | @@ -411,8 +422,10 @@ SYSCALL_DEFINE(pwrite64)(unsigned int fd | |
225 | file = fget_light(fd, &fput_needed); | |
226 | if (file) { | |
227 | ret = -ESPIPE; | |
228 | - if (file->f_mode & FMODE_PWRITE) | |
229 | + if (file->f_mode & FMODE_PWRITE) { | |
230 | ret = vfs_write(file, buf, count, &pos); | |
231 | + trace_fs_pwrite64(fd, buf, count, pos, ret); | |
232 | + } | |
233 | fput_light(file, fput_needed); | |
234 | } | |
235 | ||
236 | @@ -665,6 +678,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd | |
237 | if (file) { | |
238 | loff_t pos = file_pos_read(file); | |
239 | ret = vfs_readv(file, vec, vlen, &pos); | |
240 | + trace_fs_readv(fd, vec, vlen, ret); | |
241 | file_pos_write(file, pos); | |
242 | fput_light(file, fput_needed); | |
243 | } | |
244 | @@ -686,6 +700,7 @@ SYSCALL_DEFINE3(writev, unsigned long, f | |
245 | if (file) { | |
246 | loff_t pos = file_pos_read(file); | |
247 | ret = vfs_writev(file, vec, vlen, &pos); | |
248 | + trace_fs_writev(fd, vec, vlen, ret); | |
249 | file_pos_write(file, pos); | |
250 | fput_light(file, fput_needed); | |
251 | } | |
252 | --- a/fs/select.c | |
253 | +++ b/fs/select.c | |
254 | @@ -24,6 +24,7 @@ | |
255 | #include <linux/fdtable.h> | |
256 | #include <linux/fs.h> | |
257 | #include <linux/rcupdate.h> | |
258 | +#include <trace/fs.h> | |
259 | ||
260 | #include <asm/uaccess.h> | |
261 | ||
262 | @@ -232,6 +233,7 @@ int do_select(int n, fd_set_bits *fds, s | |
263 | file = fget_light(i, &fput_needed); | |
264 | if (file) { | |
265 | f_op = file->f_op; | |
266 | + trace_fs_select(i, *timeout); | |
267 | mask = DEFAULT_POLLMASK; | |
268 | if (f_op && f_op->poll) | |
269 | mask = (*f_op->poll)(file, retval ? NULL : wait); | |
270 | @@ -561,6 +563,7 @@ static inline unsigned int do_pollfd(str | |
271 | file = fget_light(fd, &fput_needed); | |
272 | mask = POLLNVAL; | |
273 | if (file != NULL) { | |
274 | + trace_fs_poll(fd); | |
275 | mask = DEFAULT_POLLMASK; | |
276 | if (file->f_op && file->f_op->poll) | |
277 | mask = file->f_op->poll(file, pwait); | |
278 | --- /dev/null | |
279 | +++ b/include/trace/fs.h | |
280 | @@ -0,0 +1,65 @@ | |
281 | +#ifndef _TRACE_FS_H | |
282 | +#define _TRACE_FS_H | |
283 | + | |
284 | +#include <linux/buffer_head.h> | |
285 | +#include <linux/tracepoint.h> | |
286 | + | |
287 | +DEFINE_TRACE(fs_buffer_wait_start, | |
288 | + TPPROTO(struct buffer_head *bh), | |
289 | + TPARGS(bh)); | |
290 | +DEFINE_TRACE(fs_buffer_wait_end, | |
291 | + TPPROTO(struct buffer_head *bh), | |
292 | + TPARGS(bh)); | |
293 | +DEFINE_TRACE(fs_exec, | |
294 | + TPPROTO(char *filename), | |
295 | + TPARGS(filename)); | |
296 | +DEFINE_TRACE(fs_ioctl, | |
297 | + TPPROTO(unsigned int fd, unsigned int cmd, unsigned long arg), | |
298 | + TPARGS(fd, cmd, arg)); | |
299 | +DEFINE_TRACE(fs_open, | |
300 | + TPPROTO(int fd, char *filename), | |
301 | + TPARGS(fd, filename)); | |
302 | +DEFINE_TRACE(fs_close, | |
303 | + TPPROTO(unsigned int fd), | |
304 | + TPARGS(fd)); | |
305 | +DEFINE_TRACE(fs_lseek, | |
306 | + TPPROTO(unsigned int fd, long offset, unsigned int origin), | |
307 | + TPARGS(fd, offset, origin)); | |
308 | +DEFINE_TRACE(fs_llseek, | |
309 | + TPPROTO(unsigned int fd, loff_t offset, unsigned int origin), | |
310 | + TPARGS(fd, offset, origin)); | |
311 | + | |
312 | +/* | |
313 | + * Probes must be aware that __user * may be modified by concurrent userspace | |
314 | + * or kernel threads. | |
315 | + */ | |
316 | +DEFINE_TRACE(fs_read, | |
317 | + TPPROTO(unsigned int fd, char __user *buf, size_t count, ssize_t ret), | |
318 | + TPARGS(fd, buf, count, ret)); | |
319 | +DEFINE_TRACE(fs_write, | |
320 | + TPPROTO(unsigned int fd, const char __user *buf, size_t count, | |
321 | + ssize_t ret), | |
322 | + TPARGS(fd, buf, count, ret)); | |
323 | +DEFINE_TRACE(fs_pread64, | |
324 | + TPPROTO(unsigned int fd, char __user *buf, size_t count, loff_t pos, | |
325 | + ssize_t ret), | |
326 | + TPARGS(fd, buf, count, pos, ret)); | |
327 | +DEFINE_TRACE(fs_pwrite64, | |
328 | + TPPROTO(unsigned int fd, const char __user *buf, size_t count, | |
329 | + loff_t pos, ssize_t ret), | |
330 | + TPARGS(fd, buf, count, pos, ret)); | |
331 | +DEFINE_TRACE(fs_readv, | |
332 | + TPPROTO(unsigned long fd, const struct iovec __user *vec, | |
333 | + unsigned long vlen, ssize_t ret), | |
334 | + TPARGS(fd, vec, vlen, ret)); | |
335 | +DEFINE_TRACE(fs_writev, | |
336 | + TPPROTO(unsigned long fd, const struct iovec __user *vec, | |
337 | + unsigned long vlen, ssize_t ret), | |
338 | + TPARGS(fd, vec, vlen, ret)); | |
339 | +DEFINE_TRACE(fs_select, | |
340 | + TPPROTO(int fd, s64 timeout), | |
341 | + TPARGS(fd, timeout)); | |
342 | +DEFINE_TRACE(fs_poll, | |
343 | + TPPROTO(int fd), | |
344 | + TPARGS(fd)); | |
345 | +#endif |