1 From: Nick Piggin <npiggin@suse.de>
2 Subject: fs: introduce mnt_clone_write
4 Patch-upstream: no (could be submitted)
6 This patch speeds up lmbench lat_mmap test by about another 2% after the
17 (50 runs of each, stddev gives a reasonable confidence)
19 It does this by introducing mnt_clone_write, which avoids some heavyweight
20 operations of mnt_want_write if called on a vfsmount which we know already
21 has a write count; and mnt_want_write_file, which can call mnt_clone_write
22 if the file is open for write.
24 After these two patches, mnt_want_write and mnt_drop_write go from 7% on
25 the profile down to 1.3% (including mnt_clone_write).
30 fs/namespace.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
33 include/linux/mount.h | 5 +++
34 6 files changed, 74 insertions(+), 6 deletions(-)
36 Index: linux-2.6.27/fs/file_table.c
37 ===================================================================
38 --- linux-2.6.27.orig/fs/file_table.c
39 +++ linux-2.6.27/fs/file_table.c
40 @@ -210,7 +210,7 @@ int init_file(struct file *file, struct
42 if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
43 file_take_write(file);
44 - error = mnt_want_write(mnt);
45 + error = mnt_clone_write_2(mnt);
49 Index: linux-2.6.27/fs/inode.c
50 ===================================================================
51 --- linux-2.6.27.orig/fs/inode.c
52 +++ linux-2.6.27/fs/inode.c
53 @@ -1256,7 +1256,7 @@ void file_update_time(struct file *file)
54 if (IS_NOCMTIME(inode))
57 - err = mnt_want_write(file->f_path.mnt);
58 + err = mnt_want_write_file(file->f_path.mnt, file);
62 Index: linux-2.6.27/fs/namespace.c
63 ===================================================================
64 --- linux-2.6.27.orig/fs/namespace.c
65 +++ linux-2.6.27/fs/namespace.c
66 @@ -264,6 +264,69 @@ out:
67 EXPORT_SYMBOL_GPL(mnt_want_write);
70 + * mnt_clone_write - get write access to a mount
71 + * @mnt: the mount on which to take a write
73 + * This is effectively like mnt_want_write, except
74 + * it must only be used to take an extra write reference
75 + * on a mountpoint that we already know has a write reference
76 + * on it. This allows some optimisation.
78 + * The caller should really check __mnt_is_readonly before callint
79 + * mnt_clone_write. See mnt_clone_write_2.
81 + * After finished, mnt_drop_write must be called as usual to
82 + * drop the reference.
84 +void mnt_clone_write(struct vfsmount *mnt)
87 + inc_mnt_writers(mnt);
90 +EXPORT_SYMBOL_GPL(mnt_clone_write);
93 + * mnt_clone_write_2 - get write access to a mount
94 + * @mnt: the mount on which to take a write
96 + * Same as mnt_clone_write, but it performs the __mnt_is_readonly
97 + * check itself, and returns -error on failure. This is the preferred
98 + * function. This is here to preserve kABI compatibility.
100 + * After finished, mnt_drop_write must be called as usual to
101 + * drop the reference.
103 +int mnt_clone_write_2(struct vfsmount *mnt)
105 + /* superblock may be r/o */
106 + if (__mnt_is_readonly(mnt))
109 + inc_mnt_writers(mnt);
113 +EXPORT_SYMBOL_GPL(mnt_clone_write_2);
116 + * mnt_want_write_file - get write access to a file's mount
117 + * @file: the file who's mount on which to take a write
119 + * This is like mnt_want_write, but it takes a file and can
120 + * do some optimisations if the file is open for write already
122 +int mnt_want_write_file(struct vfsmount *mnt, struct file *file)
124 + struct inode *inode = file->f_dentry->d_inode;
125 + if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
126 + return mnt_want_write(mnt);
128 + return mnt_clone_write_2(mnt);
130 +EXPORT_SYMBOL_GPL(mnt_want_write_file);
133 * mnt_drop_write - give up write access to a mount
134 * @mnt: the mount on which to give up write access
136 Index: linux-2.6.27/fs/open.c
137 ===================================================================
138 --- linux-2.6.27.orig/fs/open.c
139 +++ linux-2.6.27/fs/open.c
140 @@ -616,7 +616,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd
142 audit_inode(NULL, dentry);
144 - err = mnt_want_write(file->f_path.mnt);
145 + err = mnt_want_write_file(file->f_path.mnt, file);
148 mutex_lock(&inode->i_mutex);
149 @@ -765,7 +765,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd
153 - error = mnt_want_write(file->f_path.mnt);
154 + error = mnt_want_write_file(file->f_path.mnt, file);
157 dentry = file->f_path.dentry;
158 Index: linux-2.6.27/fs/xattr.c
159 ===================================================================
160 --- linux-2.6.27.orig/fs/xattr.c
161 +++ linux-2.6.27/fs/xattr.c
162 @@ -301,7 +301,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons
164 dentry = f->f_path.dentry;
165 audit_inode(NULL, dentry);
166 - error = mnt_want_write(f->f_path.mnt);
167 + error = mnt_want_write_file(f->f_path.mnt, f);
169 error = setxattr(dentry, name, value, size, flags);
170 mnt_drop_write(f->f_path.mnt);
171 @@ -528,7 +528,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, c
173 dentry = f->f_path.dentry;
174 audit_inode(NULL, dentry);
175 - error = mnt_want_write(f->f_path.mnt);
176 + error = mnt_want_write_file(f->f_path.mnt, f);
178 error = removexattr(dentry, name);
179 mnt_drop_write(f->f_path.mnt);
180 Index: linux-2.6.27/include/linux/mount.h
181 ===================================================================
182 --- linux-2.6.27.orig/include/linux/mount.h
183 +++ linux-2.6.27/include/linux/mount.h
184 @@ -98,7 +98,12 @@ static inline struct vfsmount *mntget(st
190 extern int mnt_want_write(struct vfsmount *mnt);
191 +extern int mnt_want_write_file(struct vfsmount *mnt, struct file *file);
192 +extern void mnt_clone_write(struct vfsmount *mnt);
193 +extern int mnt_clone_write_2(struct vfsmount *mnt);
194 extern void mnt_drop_write(struct vfsmount *mnt);
195 extern void mntput_no_expire(struct vfsmount *mnt);
196 extern void mnt_pin(struct vfsmount *mnt);