]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
f2f26e57e296fdf1e5e7d61bd9b93da46f2e0ab9
[thirdparty/kernel/stable-queue.git] /
1 From ccebcc74c81d8399c7b204aea47c1f33b09c2b17 Mon Sep 17 00:00:00 2001
2 From: Vyacheslav Dubeyko <slava@dubeyko.com>
3 Date: Mon, 30 Sep 2013 13:45:12 -0700
4 Subject: nilfs2: fix issue with race condition of competition between segments for dirty blocks
5
6 From: Vyacheslav Dubeyko <slava@dubeyko.com>
7
8 commit 7f42ec3941560f0902fe3671e36f2c20ffd3af0a upstream.
9
10 Many NILFS2 users were reported about strange file system corruption
11 (for example):
12
13 NILFS: bad btree node (blocknr=185027): level = 0, flags = 0x0, nchildren = 768
14 NILFS error (device sda4): nilfs_bmap_last_key: broken bmap (inode number=11540)
15
16 But such error messages are consequence of file system's issue that takes
17 place more earlier. Fortunately, Jerome Poulin <jeromepoulin@gmail.com>
18 and Anton Eliasson <devel@antoneliasson.se> were reported about another
19 issue not so recently. These reports describe the issue with segctor
20 thread's crash:
21
22 BUG: unable to handle kernel paging request at 0000000000004c83
23 IP: nilfs_end_page_io+0x12/0xd0 [nilfs2]
24
25 Call Trace:
26 nilfs_segctor_do_construct+0xf25/0x1b20 [nilfs2]
27 nilfs_segctor_construct+0x17b/0x290 [nilfs2]
28 nilfs_segctor_thread+0x122/0x3b0 [nilfs2]
29 kthread+0xc0/0xd0
30 ret_from_fork+0x7c/0xb0
31
32 These two issues have one reason. This reason can raise third issue
33 too. Third issue results in hanging of segctor thread with eating of
34 100% CPU.
35
36 REPRODUCING PATH:
37
38 One of the possible way or the issue reproducing was described by
39 Jermoe me Poulin <jeromepoulin@gmail.com>:
40
41 1. init S to get to single user mode.
42 2. sysrq+E to make sure only my shell is running
43 3. start network-manager to get my wifi connection up
44 4. login as root and launch "screen"
45 5. cd /boot/log/nilfs which is a ext3 mount point and can log when NILFS dies.
46 6. lscp | xz -9e > lscp.txt.xz
47 7. mount my snapshot using mount -o cp=3360839,ro /dev/vgUbuntu/root /mnt/nilfs
48 8. start a screen to dump /proc/kmsg to text file since rsyslog is killed
49 9. start a screen and launch strace -f -o find-cat.log -t find
50 /mnt/nilfs -type f -exec cat {} > /dev/null \;
51 10. start a screen and launch strace -f -o apt-get.log -t apt-get update
52 11. launch the last command again as it did not crash the first time
53 12. apt-get crashes
54 13. ps aux > ps-aux-crashed.log
55 13. sysrq+W
56 14. sysrq+E wait for everything to terminate
57 15. sysrq+SUSB
58
59 Simplified way of the issue reproducing is starting kernel compilation
60 task and "apt-get update" in parallel.
61
62 REPRODUCIBILITY:
63
64 The issue is reproduced not stable [60% - 80%]. It is very important to
65 have proper environment for the issue reproducing. The critical
66 conditions for successful reproducing:
67
68 (1) It should have big modified file by mmap() way.
69
70 (2) This file should have the count of dirty blocks are greater that
71 several segments in size (for example, two or three) from time to time
72 during processing.
73
74 (3) It should be intensive background activity of files modification
75 in another thread.
76
77 INVESTIGATION:
78
79 First of all, it is possible to see that the reason of crash is not valid
80 page address:
81
82 NILFS [nilfs_segctor_complete_write]:2100 bh->b_count 0, bh->b_blocknr 13895680, bh->b_size 13897727, bh->b_page 0000000000001a82
83 NILFS [nilfs_segctor_complete_write]:2101 segbuf->sb_segnum 6783
84
85 Moreover, value of b_page (0x1a82) is 6786. This value looks like segment
86 number. And b_blocknr with b_size values look like block numbers. So,
87 buffer_head's pointer points on not proper address value.
88
89 Detailed investigation of the issue is discovered such picture:
90
91 [-----------------------------SEGMENT 6783-------------------------------]
92 NILFS [nilfs_segctor_do_construct]:2310 nilfs_segctor_begin_construction
93 NILFS [nilfs_segctor_do_construct]:2321 nilfs_segctor_collect
94 NILFS [nilfs_segctor_do_construct]:2336 nilfs_segctor_assign
95 NILFS [nilfs_segctor_do_construct]:2367 nilfs_segctor_update_segusage
96 NILFS [nilfs_segctor_do_construct]:2371 nilfs_segctor_prepare_write
97 NILFS [nilfs_segctor_do_construct]:2376 nilfs_add_checksums_on_logs
98 NILFS [nilfs_segctor_do_construct]:2381 nilfs_segctor_write
99 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111149024, segbuf->sb_segnum 6783
100
101 [-----------------------------SEGMENT 6784-------------------------------]
102 NILFS [nilfs_segctor_do_construct]:2310 nilfs_segctor_begin_construction
103 NILFS [nilfs_segctor_do_construct]:2321 nilfs_segctor_collect
104 NILFS [nilfs_lookup_dirty_data_buffers]:782 bh->b_count 1, bh->b_page ffffea000709b000, page->index 0, i_ino 1033103, i_size 25165824
105 NILFS [nilfs_lookup_dirty_data_buffers]:783 bh->b_assoc_buffers.next ffff8802174a6798, bh->b_assoc_buffers.prev ffff880221cffee8
106 NILFS [nilfs_segctor_do_construct]:2336 nilfs_segctor_assign
107 NILFS [nilfs_segctor_do_construct]:2367 nilfs_segctor_update_segusage
108 NILFS [nilfs_segctor_do_construct]:2371 nilfs_segctor_prepare_write
109 NILFS [nilfs_segctor_do_construct]:2376 nilfs_add_checksums_on_logs
110 NILFS [nilfs_segctor_do_construct]:2381 nilfs_segctor_write
111 NILFS [nilfs_segbuf_submit_bh]:575 bh->b_count 1, bh->b_page ffffea000709b000, page->index 0, i_ino 1033103, i_size 25165824
112 NILFS [nilfs_segbuf_submit_bh]:576 segbuf->sb_segnum 6784
113 NILFS [nilfs_segbuf_submit_bh]:577 bh->b_assoc_buffers.next ffff880218a0d5f8, bh->b_assoc_buffers.prev ffff880218bcdf50
114 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111150080, segbuf->sb_segnum 6784, segbuf->sb_nbio 0
115 [----------] ditto
116 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111164416, segbuf->sb_segnum 6784, segbuf->sb_nbio 15
117
118 [-----------------------------SEGMENT 6785-------------------------------]
119 NILFS [nilfs_segctor_do_construct]:2310 nilfs_segctor_begin_construction
120 NILFS [nilfs_segctor_do_construct]:2321 nilfs_segctor_collect
121 NILFS [nilfs_lookup_dirty_data_buffers]:782 bh->b_count 2, bh->b_page ffffea000709b000, page->index 0, i_ino 1033103, i_size 25165824
122 NILFS [nilfs_lookup_dirty_data_buffers]:783 bh->b_assoc_buffers.next ffff880219277e80, bh->b_assoc_buffers.prev ffff880221cffc88
123 NILFS [nilfs_segctor_do_construct]:2367 nilfs_segctor_update_segusage
124 NILFS [nilfs_segctor_do_construct]:2371 nilfs_segctor_prepare_write
125 NILFS [nilfs_segctor_do_construct]:2376 nilfs_add_checksums_on_logs
126 NILFS [nilfs_segctor_do_construct]:2381 nilfs_segctor_write
127 NILFS [nilfs_segbuf_submit_bh]:575 bh->b_count 2, bh->b_page ffffea000709b000, page->index 0, i_ino 1033103, i_size 25165824
128 NILFS [nilfs_segbuf_submit_bh]:576 segbuf->sb_segnum 6785
129 NILFS [nilfs_segbuf_submit_bh]:577 bh->b_assoc_buffers.next ffff880218a0d5f8, bh->b_assoc_buffers.prev ffff880222cc7ee8
130 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111165440, segbuf->sb_segnum 6785, segbuf->sb_nbio 0
131 [----------] ditto
132 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111177728, segbuf->sb_segnum 6785, segbuf->sb_nbio 12
133
134 NILFS [nilfs_segctor_do_construct]:2399 nilfs_segctor_wait
135 NILFS [nilfs_segbuf_wait]:676 segbuf->sb_segnum 6783
136 NILFS [nilfs_segbuf_wait]:676 segbuf->sb_segnum 6784
137 NILFS [nilfs_segbuf_wait]:676 segbuf->sb_segnum 6785
138
139 NILFS [nilfs_segctor_complete_write]:2100 bh->b_count 0, bh->b_blocknr 13895680, bh->b_size 13897727, bh->b_page 0000000000001a82
140
141 BUG: unable to handle kernel paging request at 0000000000001a82
142 IP: [<ffffffffa024d0f2>] nilfs_end_page_io+0x12/0xd0 [nilfs2]
143
144 Usually, for every segment we collect dirty files in list. Then, dirty
145 blocks are gathered for every dirty file, prepared for write and
146 submitted by means of nilfs_segbuf_submit_bh() call. Finally, it takes
147 place complete write phase after calling nilfs_end_bio_write() on the
148 block layer. Buffers/pages are marked as not dirty on final phase and
149 processed files removed from the list of dirty files.
150
151 It is possible to see that we had three prepare_write and submit_bio
152 phases before segbuf_wait and complete_write phase. Moreover, segments
153 compete between each other for dirty blocks because on every iteration
154 of segments processing dirty buffer_heads are added in several lists of
155 payload_buffers:
156
157 [SEGMENT 6784]: bh->b_assoc_buffers.next ffff880218a0d5f8, bh->b_assoc_buffers.prev ffff880218bcdf50
158 [SEGMENT 6785]: bh->b_assoc_buffers.next ffff880218a0d5f8, bh->b_assoc_buffers.prev ffff880222cc7ee8
159
160 The next pointer is the same but prev pointer has changed. It means
161 that buffer_head has next pointer from one list but prev pointer from
162 another. Such modification can be made several times. And, finally, it
163 can be resulted in various issues: (1) segctor hanging, (2) segctor
164 crashing, (3) file system metadata corruption.
165
166 FIX:
167 This patch adds:
168
169 (1) setting of BH_Async_Write flag in nilfs_segctor_prepare_write()
170 for every proccessed dirty block;
171
172 (2) checking of BH_Async_Write flag in
173 nilfs_lookup_dirty_data_buffers() and
174 nilfs_lookup_dirty_node_buffers();
175
176 (3) clearing of BH_Async_Write flag in nilfs_segctor_complete_write(),
177 nilfs_abort_logs(), nilfs_forget_buffer(), nilfs_clear_dirty_page().
178
179 Reported-by: Jerome Poulin <jeromepoulin@gmail.com>
180 Reported-by: Anton Eliasson <devel@antoneliasson.se>
181 Cc: Paul Fertser <fercerpav@gmail.com>
182 Cc: ARAI Shun-ichi <hermes@ceres.dti.ne.jp>
183 Cc: Piotr Szymaniak <szarpaj@grubelek.pl>
184 Cc: Juan Barry Manuel Canham <Linux@riotingpacifist.net>
185 Cc: Zahid Chowdhury <zahid.chowdhury@starsolutions.com>
186 Cc: Elmer Zhang <freeboy6716@gmail.com>
187 Cc: Kenneth Langga <klangga@gmail.com>
188 Signed-off-by: Vyacheslav Dubeyko <slava@dubeyko.com>
189 Acked-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
190 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
191 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
192 [bwh: Backported to 3.2: nilfs_clear_dirty_page() has not been separated
193 from nilfs_clear_dirty_pages()]
194 Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
195 Cc: Rui Xiang <rui.xiang@huawei.com>
196 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
197
198 ---
199 fs/nilfs2/page.c | 2 ++
200 fs/nilfs2/segment.c | 11 +++++++++--
201 2 files changed, 11 insertions(+), 2 deletions(-)
202
203 --- a/fs/nilfs2/page.c
204 +++ b/fs/nilfs2/page.c
205 @@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_h
206 clear_buffer_nilfs_volatile(bh);
207 clear_buffer_nilfs_checked(bh);
208 clear_buffer_nilfs_redirected(bh);
209 + clear_buffer_async_write(bh);
210 clear_buffer_dirty(bh);
211 if (nilfs_page_buffers_clean(page))
212 __nilfs_clear_page_dirty(page);
213 @@ -390,6 +391,7 @@ void nilfs_clear_dirty_pages(struct addr
214 bh = head = page_buffers(page);
215 do {
216 lock_buffer(bh);
217 + clear_buffer_async_write(bh);
218 clear_buffer_dirty(bh);
219 clear_buffer_nilfs_volatile(bh);
220 clear_buffer_nilfs_checked(bh);
221 --- a/fs/nilfs2/segment.c
222 +++ b/fs/nilfs2/segment.c
223 @@ -662,7 +662,7 @@ static size_t nilfs_lookup_dirty_data_bu
224
225 bh = head = page_buffers(page);
226 do {
227 - if (!buffer_dirty(bh))
228 + if (!buffer_dirty(bh) || buffer_async_write(bh))
229 continue;
230 get_bh(bh);
231 list_add_tail(&bh->b_assoc_buffers, listp);
232 @@ -696,7 +696,8 @@ static void nilfs_lookup_dirty_node_buff
233 for (i = 0; i < pagevec_count(&pvec); i++) {
234 bh = head = page_buffers(pvec.pages[i]);
235 do {
236 - if (buffer_dirty(bh)) {
237 + if (buffer_dirty(bh) &&
238 + !buffer_async_write(bh)) {
239 get_bh(bh);
240 list_add_tail(&bh->b_assoc_buffers,
241 listp);
242 @@ -1578,6 +1579,7 @@ static void nilfs_segctor_prepare_write(
243
244 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
245 b_assoc_buffers) {
246 + set_buffer_async_write(bh);
247 if (bh->b_page != bd_page) {
248 if (bd_page) {
249 lock_page(bd_page);
250 @@ -1591,6 +1593,7 @@ static void nilfs_segctor_prepare_write(
251
252 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
253 b_assoc_buffers) {
254 + set_buffer_async_write(bh);
255 if (bh == segbuf->sb_super_root) {
256 if (bh->b_page != bd_page) {
257 lock_page(bd_page);
258 @@ -1676,6 +1679,7 @@ static void nilfs_abort_logs(struct list
259 list_for_each_entry(segbuf, logs, sb_list) {
260 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
261 b_assoc_buffers) {
262 + clear_buffer_async_write(bh);
263 if (bh->b_page != bd_page) {
264 if (bd_page)
265 end_page_writeback(bd_page);
266 @@ -1685,6 +1689,7 @@ static void nilfs_abort_logs(struct list
267
268 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
269 b_assoc_buffers) {
270 + clear_buffer_async_write(bh);
271 if (bh == segbuf->sb_super_root) {
272 if (bh->b_page != bd_page) {
273 end_page_writeback(bd_page);
274 @@ -1754,6 +1759,7 @@ static void nilfs_segctor_complete_write
275 b_assoc_buffers) {
276 set_buffer_uptodate(bh);
277 clear_buffer_dirty(bh);
278 + clear_buffer_async_write(bh);
279 if (bh->b_page != bd_page) {
280 if (bd_page)
281 end_page_writeback(bd_page);
282 @@ -1775,6 +1781,7 @@ static void nilfs_segctor_complete_write
283 b_assoc_buffers) {
284 set_buffer_uptodate(bh);
285 clear_buffer_dirty(bh);
286 + clear_buffer_async_write(bh);
287 clear_buffer_delay(bh);
288 clear_buffer_nilfs_volatile(bh);
289 clear_buffer_nilfs_redirected(bh);