]>
Commit | Line | Data |
---|---|---|
c8e5329d GKH |
1 | From 5e3cc1ee1405a7eb3487ed24f786dec01b4cbe1f Mon Sep 17 00:00:00 2001 |
2 | From: Hou Tao <houtao1@huawei.com> | |
3 | Date: Thu, 24 Jan 2019 14:35:13 +0800 | |
4 | Subject: 9p: use inode->i_lock to protect i_size_write() under 32-bit | |
5 | ||
6 | From: Hou Tao <houtao1@huawei.com> | |
7 | ||
8 | commit 5e3cc1ee1405a7eb3487ed24f786dec01b4cbe1f upstream. | |
9 | ||
10 | Use inode->i_lock to protect i_size_write(), else i_size_read() in | |
11 | generic_fillattr() may loop infinitely in read_seqcount_begin() when | |
12 | multiple processes invoke v9fs_vfs_getattr() or v9fs_vfs_getattr_dotl() | |
13 | simultaneously under 32-bit SMP environment, and a soft lockup will be | |
14 | triggered as show below: | |
15 | ||
16 | watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [stat:2217] | |
17 | Modules linked in: | |
18 | CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4 | |
19 | Hardware name: Generic DT based system | |
20 | PC is at generic_fillattr+0x104/0x108 | |
21 | LR is at 0xec497f00 | |
22 | pc : [<802b8898>] lr : [<ec497f00>] psr: 200c0013 | |
23 | sp : ec497e20 ip : ed608030 fp : ec497e3c | |
24 | r10: 00000000 r9 : ec497f00 r8 : ed608030 | |
25 | r7 : ec497ebc r6 : ec497f00 r5 : ee5c1550 r4 : ee005780 | |
26 | r3 : 0000052d r2 : 00000000 r1 : ec497f00 r0 : ed608030 | |
27 | Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none | |
28 | Control: 10c5387d Table: ac48006a DAC: 00000051 | |
29 | CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4 | |
30 | Hardware name: Generic DT based system | |
31 | Backtrace: | |
32 | [<8010d974>] (dump_backtrace) from [<8010dc88>] (show_stack+0x20/0x24) | |
33 | [<8010dc68>] (show_stack) from [<80a1d194>] (dump_stack+0xb0/0xdc) | |
34 | [<80a1d0e4>] (dump_stack) from [<80109f34>] (show_regs+0x1c/0x20) | |
35 | [<80109f18>] (show_regs) from [<801d0a80>] (watchdog_timer_fn+0x280/0x2f8) | |
36 | [<801d0800>] (watchdog_timer_fn) from [<80198658>] (__hrtimer_run_queues+0x18c/0x380) | |
37 | [<801984cc>] (__hrtimer_run_queues) from [<80198e60>] (hrtimer_run_queues+0xb8/0xf0) | |
38 | [<80198da8>] (hrtimer_run_queues) from [<801973e8>] (run_local_timers+0x28/0x64) | |
39 | [<801973c0>] (run_local_timers) from [<80197460>] (update_process_times+0x3c/0x6c) | |
40 | [<80197424>] (update_process_times) from [<801ab2b8>] (tick_nohz_handler+0xe0/0x1bc) | |
41 | [<801ab1d8>] (tick_nohz_handler) from [<80843050>] (arch_timer_handler_virt+0x38/0x48) | |
42 | [<80843018>] (arch_timer_handler_virt) from [<80180a64>] (handle_percpu_devid_irq+0x8c/0x240) | |
43 | [<801809d8>] (handle_percpu_devid_irq) from [<8017ac20>] (generic_handle_irq+0x34/0x44) | |
44 | [<8017abec>] (generic_handle_irq) from [<8017b344>] (__handle_domain_irq+0x6c/0xc4) | |
45 | [<8017b2d8>] (__handle_domain_irq) from [<801022e0>] (gic_handle_irq+0x4c/0x88) | |
46 | [<80102294>] (gic_handle_irq) from [<80101a30>] (__irq_svc+0x70/0x98) | |
47 | [<802b8794>] (generic_fillattr) from [<8056b284>] (v9fs_vfs_getattr_dotl+0x74/0xa4) | |
48 | [<8056b210>] (v9fs_vfs_getattr_dotl) from [<802b8904>] (vfs_getattr_nosec+0x68/0x7c) | |
49 | [<802b889c>] (vfs_getattr_nosec) from [<802b895c>] (vfs_getattr+0x44/0x48) | |
50 | [<802b8918>] (vfs_getattr) from [<802b8a74>] (vfs_statx+0x9c/0xec) | |
51 | [<802b89d8>] (vfs_statx) from [<802b9428>] (sys_lstat64+0x48/0x78) | |
52 | [<802b93e0>] (sys_lstat64) from [<80101000>] (ret_fast_syscall+0x0/0x28) | |
53 | ||
54 | [dominique.martinet@cea.fr: updated comment to not refer to a function | |
55 | in another subsystem] | |
56 | Link: http://lkml.kernel.org/r/20190124063514.8571-2-houtao1@huawei.com | |
57 | Cc: stable@vger.kernel.org | |
58 | Fixes: 7549ae3e81cc ("9p: Use the i_size_[read, write]() macros instead of using inode->i_size directly.") | |
59 | Reported-by: Xing Gaopeng <xingaopeng@huawei.com> | |
60 | Signed-off-by: Hou Tao <houtao1@huawei.com> | |
61 | Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr> | |
62 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
63 | ||
64 | --- | |
65 | fs/9p/v9fs_vfs.h | 23 +++++++++++++++++++++-- | |
66 | fs/9p/vfs_file.c | 6 +++++- | |
67 | fs/9p/vfs_inode.c | 23 +++++++++++------------ | |
68 | fs/9p/vfs_inode_dotl.c | 27 ++++++++++++++------------- | |
69 | fs/9p/vfs_super.c | 4 ++-- | |
70 | 5 files changed, 53 insertions(+), 30 deletions(-) | |
71 | ||
72 | --- a/fs/9p/v9fs_vfs.h | |
73 | +++ b/fs/9p/v9fs_vfs.h | |
74 | @@ -40,6 +40,9 @@ | |
75 | */ | |
76 | #define P9_LOCK_TIMEOUT (30*HZ) | |
77 | ||
78 | +/* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */ | |
79 | +#define V9FS_STAT2INODE_KEEP_ISIZE 1 | |
80 | + | |
81 | extern struct file_system_type v9fs_fs_type; | |
82 | extern const struct address_space_operations v9fs_addr_operations; | |
83 | extern const struct file_operations v9fs_file_operations; | |
84 | @@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_ | |
85 | struct inode *inode, umode_t mode, dev_t); | |
86 | void v9fs_evict_inode(struct inode *inode); | |
87 | ino_t v9fs_qid2ino(struct p9_qid *qid); | |
88 | -void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); | |
89 | -void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); | |
90 | +void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | |
91 | + struct super_block *sb, unsigned int flags); | |
92 | +void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, | |
93 | + unsigned int flags); | |
94 | int v9fs_dir_release(struct inode *inode, struct file *filp); | |
95 | int v9fs_file_open(struct inode *inode, struct file *file); | |
96 | void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); | |
97 | @@ -83,4 +88,18 @@ static inline void v9fs_invalidate_inode | |
98 | } | |
99 | ||
100 | int v9fs_open_to_dotl_flags(int flags); | |
101 | + | |
102 | +static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size) | |
103 | +{ | |
104 | + /* | |
105 | + * 32-bit need the lock, concurrent updates could break the | |
106 | + * sequences and make i_size_read() loop forever. | |
107 | + * 64-bit updates are atomic and can skip the locking. | |
108 | + */ | |
109 | + if (sizeof(i_size) > sizeof(long)) | |
110 | + spin_lock(&inode->i_lock); | |
111 | + i_size_write(inode, i_size); | |
112 | + if (sizeof(i_size) > sizeof(long)) | |
113 | + spin_unlock(&inode->i_lock); | |
114 | +} | |
115 | #endif | |
116 | --- a/fs/9p/vfs_file.c | |
117 | +++ b/fs/9p/vfs_file.c | |
118 | @@ -446,7 +446,11 @@ v9fs_file_write_iter(struct kiocb *iocb, | |
119 | i_size = i_size_read(inode); | |
120 | if (iocb->ki_pos > i_size) { | |
121 | inode_add_bytes(inode, iocb->ki_pos - i_size); | |
122 | - i_size_write(inode, iocb->ki_pos); | |
123 | + /* | |
124 | + * Need to serialize against i_size_write() in | |
125 | + * v9fs_stat2inode() | |
126 | + */ | |
127 | + v9fs_i_size_write(inode, iocb->ki_pos); | |
128 | } | |
129 | return retval; | |
130 | } | |
131 | --- a/fs/9p/vfs_inode.c | |
132 | +++ b/fs/9p/vfs_inode.c | |
133 | @@ -538,7 +538,7 @@ static struct inode *v9fs_qid_iget(struc | |
134 | if (retval) | |
135 | goto error; | |
136 | ||
137 | - v9fs_stat2inode(st, inode, sb); | |
138 | + v9fs_stat2inode(st, inode, sb, 0); | |
139 | v9fs_cache_inode_get_cookie(inode); | |
140 | unlock_new_inode(inode); | |
141 | return inode; | |
142 | @@ -1092,7 +1092,7 @@ v9fs_vfs_getattr(const struct path *path | |
143 | if (IS_ERR(st)) | |
144 | return PTR_ERR(st); | |
145 | ||
146 | - v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb); | |
147 | + v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0); | |
148 | generic_fillattr(d_inode(dentry), stat); | |
149 | ||
150 | p9stat_free(st); | |
151 | @@ -1170,12 +1170,13 @@ static int v9fs_vfs_setattr(struct dentr | |
152 | * @stat: Plan 9 metadata (mistat) structure | |
153 | * @inode: inode to populate | |
154 | * @sb: superblock of filesystem | |
155 | + * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) | |
156 | * | |
157 | */ | |
158 | ||
159 | void | |
160 | v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | |
161 | - struct super_block *sb) | |
162 | + struct super_block *sb, unsigned int flags) | |
163 | { | |
164 | umode_t mode; | |
165 | char ext[32]; | |
166 | @@ -1216,10 +1217,11 @@ v9fs_stat2inode(struct p9_wstat *stat, s | |
167 | mode = p9mode2perm(v9ses, stat); | |
168 | mode |= inode->i_mode & ~S_IALLUGO; | |
169 | inode->i_mode = mode; | |
170 | - i_size_write(inode, stat->length); | |
171 | ||
172 | + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) | |
173 | + v9fs_i_size_write(inode, stat->length); | |
174 | /* not real number of blocks, but 512 byte ones ... */ | |
175 | - inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; | |
176 | + inode->i_blocks = (stat->length + 512 - 1) >> 9; | |
177 | v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; | |
178 | } | |
179 | ||
180 | @@ -1416,9 +1418,9 @@ int v9fs_refresh_inode(struct p9_fid *fi | |
181 | { | |
182 | int umode; | |
183 | dev_t rdev; | |
184 | - loff_t i_size; | |
185 | struct p9_wstat *st; | |
186 | struct v9fs_session_info *v9ses; | |
187 | + unsigned int flags; | |
188 | ||
189 | v9ses = v9fs_inode2v9ses(inode); | |
190 | st = p9_client_stat(fid); | |
191 | @@ -1431,16 +1433,13 @@ int v9fs_refresh_inode(struct p9_fid *fi | |
192 | if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) | |
193 | goto out; | |
194 | ||
195 | - spin_lock(&inode->i_lock); | |
196 | /* | |
197 | * We don't want to refresh inode->i_size, | |
198 | * because we may have cached data | |
199 | */ | |
200 | - i_size = inode->i_size; | |
201 | - v9fs_stat2inode(st, inode, inode->i_sb); | |
202 | - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) | |
203 | - inode->i_size = i_size; | |
204 | - spin_unlock(&inode->i_lock); | |
205 | + flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ? | |
206 | + V9FS_STAT2INODE_KEEP_ISIZE : 0; | |
207 | + v9fs_stat2inode(st, inode, inode->i_sb, flags); | |
208 | out: | |
209 | p9stat_free(st); | |
210 | kfree(st); | |
211 | --- a/fs/9p/vfs_inode_dotl.c | |
212 | +++ b/fs/9p/vfs_inode_dotl.c | |
213 | @@ -143,7 +143,7 @@ static struct inode *v9fs_qid_iget_dotl( | |
214 | if (retval) | |
215 | goto error; | |
216 | ||
217 | - v9fs_stat2inode_dotl(st, inode); | |
218 | + v9fs_stat2inode_dotl(st, inode, 0); | |
219 | v9fs_cache_inode_get_cookie(inode); | |
220 | retval = v9fs_get_acl(inode, fid); | |
221 | if (retval) | |
222 | @@ -496,7 +496,7 @@ v9fs_vfs_getattr_dotl(const struct path | |
223 | if (IS_ERR(st)) | |
224 | return PTR_ERR(st); | |
225 | ||
226 | - v9fs_stat2inode_dotl(st, d_inode(dentry)); | |
227 | + v9fs_stat2inode_dotl(st, d_inode(dentry), 0); | |
228 | generic_fillattr(d_inode(dentry), stat); | |
229 | /* Change block size to what the server returned */ | |
230 | stat->blksize = st->st_blksize; | |
231 | @@ -607,11 +607,13 @@ int v9fs_vfs_setattr_dotl(struct dentry | |
232 | * v9fs_stat2inode_dotl - populate an inode structure with stat info | |
233 | * @stat: stat structure | |
234 | * @inode: inode to populate | |
235 | + * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) | |
236 | * | |
237 | */ | |
238 | ||
239 | void | |
240 | -v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) | |
241 | +v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, | |
242 | + unsigned int flags) | |
243 | { | |
244 | umode_t mode; | |
245 | struct v9fs_inode *v9inode = V9FS_I(inode); | |
246 | @@ -631,7 +633,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl | |
247 | mode |= inode->i_mode & ~S_IALLUGO; | |
248 | inode->i_mode = mode; | |
249 | ||
250 | - i_size_write(inode, stat->st_size); | |
251 | + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) | |
252 | + v9fs_i_size_write(inode, stat->st_size); | |
253 | inode->i_blocks = stat->st_blocks; | |
254 | } else { | |
255 | if (stat->st_result_mask & P9_STATS_ATIME) { | |
256 | @@ -661,8 +664,9 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl | |
257 | } | |
258 | if (stat->st_result_mask & P9_STATS_RDEV) | |
259 | inode->i_rdev = new_decode_dev(stat->st_rdev); | |
260 | - if (stat->st_result_mask & P9_STATS_SIZE) | |
261 | - i_size_write(inode, stat->st_size); | |
262 | + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && | |
263 | + stat->st_result_mask & P9_STATS_SIZE) | |
264 | + v9fs_i_size_write(inode, stat->st_size); | |
265 | if (stat->st_result_mask & P9_STATS_BLOCKS) | |
266 | inode->i_blocks = stat->st_blocks; | |
267 | } | |
268 | @@ -928,9 +932,9 @@ v9fs_vfs_get_link_dotl(struct dentry *de | |
269 | ||
270 | int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) | |
271 | { | |
272 | - loff_t i_size; | |
273 | struct p9_stat_dotl *st; | |
274 | struct v9fs_session_info *v9ses; | |
275 | + unsigned int flags; | |
276 | ||
277 | v9ses = v9fs_inode2v9ses(inode); | |
278 | st = p9_client_getattr_dotl(fid, P9_STATS_ALL); | |
279 | @@ -942,16 +946,13 @@ int v9fs_refresh_inode_dotl(struct p9_fi | |
280 | if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) | |
281 | goto out; | |
282 | ||
283 | - spin_lock(&inode->i_lock); | |
284 | /* | |
285 | * We don't want to refresh inode->i_size, | |
286 | * because we may have cached data | |
287 | */ | |
288 | - i_size = inode->i_size; | |
289 | - v9fs_stat2inode_dotl(st, inode); | |
290 | - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) | |
291 | - inode->i_size = i_size; | |
292 | - spin_unlock(&inode->i_lock); | |
293 | + flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ? | |
294 | + V9FS_STAT2INODE_KEEP_ISIZE : 0; | |
295 | + v9fs_stat2inode_dotl(st, inode, flags); | |
296 | out: | |
297 | kfree(st); | |
298 | return 0; | |
299 | --- a/fs/9p/vfs_super.c | |
300 | +++ b/fs/9p/vfs_super.c | |
301 | @@ -172,7 +172,7 @@ static struct dentry *v9fs_mount(struct | |
302 | goto release_sb; | |
303 | } | |
304 | d_inode(root)->i_ino = v9fs_qid2ino(&st->qid); | |
305 | - v9fs_stat2inode_dotl(st, d_inode(root)); | |
306 | + v9fs_stat2inode_dotl(st, d_inode(root), 0); | |
307 | kfree(st); | |
308 | } else { | |
309 | struct p9_wstat *st = NULL; | |
310 | @@ -183,7 +183,7 @@ static struct dentry *v9fs_mount(struct | |
311 | } | |
312 | ||
313 | d_inode(root)->i_ino = v9fs_qid2ino(&st->qid); | |
314 | - v9fs_stat2inode(st, d_inode(root), sb); | |
315 | + v9fs_stat2inode(st, d_inode(root), sb, 0); | |
316 | ||
317 | p9stat_free(st); | |
318 | kfree(st); |