]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
a870cac6deb4e0d6a9fa6c6fb5a31338426fd2b0
[thirdparty/kernel/stable-queue.git] /
1 From 7f42ec3941560f0902fe3671e36f2c20ffd3af0a Mon Sep 17 00:00:00 2001
2 From: Vyacheslav Dubeyko <slava@dubeyko.com>
3 Date: Mon, 30 Sep 2013 13:45:12 -0700
4 Subject: nilfs2: fix issue with race condition of competition between segments for dirty blocks
5
6 From: Vyacheslav Dubeyko <slava@dubeyko.com>
7
8 commit 7f42ec3941560f0902fe3671e36f2c20ffd3af0a upstream.
9
10 Many NILFS2 users were reported about strange file system corruption
11 (for example):
12
13 NILFS: bad btree node (blocknr=185027): level = 0, flags = 0x0, nchildren = 768
14 NILFS error (device sda4): nilfs_bmap_last_key: broken bmap (inode number=11540)
15
16 But such error messages are consequence of file system's issue that takes
17 place more earlier. Fortunately, Jerome Poulin <jeromepoulin@gmail.com>
18 and Anton Eliasson <devel@antoneliasson.se> were reported about another
19 issue not so recently. These reports describe the issue with segctor
20 thread's crash:
21
22 BUG: unable to handle kernel paging request at 0000000000004c83
23 IP: nilfs_end_page_io+0x12/0xd0 [nilfs2]
24
25 Call Trace:
26 nilfs_segctor_do_construct+0xf25/0x1b20 [nilfs2]
27 nilfs_segctor_construct+0x17b/0x290 [nilfs2]
28 nilfs_segctor_thread+0x122/0x3b0 [nilfs2]
29 kthread+0xc0/0xd0
30 ret_from_fork+0x7c/0xb0
31
32 These two issues have one reason. This reason can raise third issue
33 too. Third issue results in hanging of segctor thread with eating of
34 100% CPU.
35
36 REPRODUCING PATH:
37
38 One of the possible way or the issue reproducing was described by
39 Jermoe me Poulin <jeromepoulin@gmail.com>:
40
41 1. init S to get to single user mode.
42 2. sysrq+E to make sure only my shell is running
43 3. start network-manager to get my wifi connection up
44 4. login as root and launch "screen"
45 5. cd /boot/log/nilfs which is a ext3 mount point and can log when NILFS dies.
46 6. lscp | xz -9e > lscp.txt.xz
47 7. mount my snapshot using mount -o cp=3360839,ro /dev/vgUbuntu/root /mnt/nilfs
48 8. start a screen to dump /proc/kmsg to text file since rsyslog is killed
49 9. start a screen and launch strace -f -o find-cat.log -t find
50 /mnt/nilfs -type f -exec cat {} > /dev/null \;
51 10. start a screen and launch strace -f -o apt-get.log -t apt-get update
52 11. launch the last command again as it did not crash the first time
53 12. apt-get crashes
54 13. ps aux > ps-aux-crashed.log
55 13. sysrq+W
56 14. sysrq+E wait for everything to terminate
57 15. sysrq+SUSB
58
59 Simplified way of the issue reproducing is starting kernel compilation
60 task and "apt-get update" in parallel.
61
62 REPRODUCIBILITY:
63
64 The issue is reproduced not stable [60% - 80%]. It is very important to
65 have proper environment for the issue reproducing. The critical
66 conditions for successful reproducing:
67
68 (1) It should have big modified file by mmap() way.
69
70 (2) This file should have the count of dirty blocks are greater that
71 several segments in size (for example, two or three) from time to time
72 during processing.
73
74 (3) It should be intensive background activity of files modification
75 in another thread.
76
77 INVESTIGATION:
78
79 First of all, it is possible to see that the reason of crash is not valid
80 page address:
81
82 NILFS [nilfs_segctor_complete_write]:2100 bh->b_count 0, bh->b_blocknr 13895680, bh->b_size 13897727, bh->b_page 0000000000001a82
83 NILFS [nilfs_segctor_complete_write]:2101 segbuf->sb_segnum 6783
84
85 Moreover, value of b_page (0x1a82) is 6786. This value looks like segment
86 number. And b_blocknr with b_size values look like block numbers. So,
87 buffer_head's pointer points on not proper address value.
88
89 Detailed investigation of the issue is discovered such picture:
90
91 [-----------------------------SEGMENT 6783-------------------------------]
92 NILFS [nilfs_segctor_do_construct]:2310 nilfs_segctor_begin_construction
93 NILFS [nilfs_segctor_do_construct]:2321 nilfs_segctor_collect
94 NILFS [nilfs_segctor_do_construct]:2336 nilfs_segctor_assign
95 NILFS [nilfs_segctor_do_construct]:2367 nilfs_segctor_update_segusage
96 NILFS [nilfs_segctor_do_construct]:2371 nilfs_segctor_prepare_write
97 NILFS [nilfs_segctor_do_construct]:2376 nilfs_add_checksums_on_logs
98 NILFS [nilfs_segctor_do_construct]:2381 nilfs_segctor_write
99 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111149024, segbuf->sb_segnum 6783
100
101 [-----------------------------SEGMENT 6784-------------------------------]
102 NILFS [nilfs_segctor_do_construct]:2310 nilfs_segctor_begin_construction
103 NILFS [nilfs_segctor_do_construct]:2321 nilfs_segctor_collect
104 NILFS [nilfs_lookup_dirty_data_buffers]:782 bh->b_count 1, bh->b_page ffffea000709b000, page->index 0, i_ino 1033103, i_size 25165824
105 NILFS [nilfs_lookup_dirty_data_buffers]:783 bh->b_assoc_buffers.next ffff8802174a6798, bh->b_assoc_buffers.prev ffff880221cffee8
106 NILFS [nilfs_segctor_do_construct]:2336 nilfs_segctor_assign
107 NILFS [nilfs_segctor_do_construct]:2367 nilfs_segctor_update_segusage
108 NILFS [nilfs_segctor_do_construct]:2371 nilfs_segctor_prepare_write
109 NILFS [nilfs_segctor_do_construct]:2376 nilfs_add_checksums_on_logs
110 NILFS [nilfs_segctor_do_construct]:2381 nilfs_segctor_write
111 NILFS [nilfs_segbuf_submit_bh]:575 bh->b_count 1, bh->b_page ffffea000709b000, page->index 0, i_ino 1033103, i_size 25165824
112 NILFS [nilfs_segbuf_submit_bh]:576 segbuf->sb_segnum 6784
113 NILFS [nilfs_segbuf_submit_bh]:577 bh->b_assoc_buffers.next ffff880218a0d5f8, bh->b_assoc_buffers.prev ffff880218bcdf50
114 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111150080, segbuf->sb_segnum 6784, segbuf->sb_nbio 0
115 [----------] ditto
116 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111164416, segbuf->sb_segnum 6784, segbuf->sb_nbio 15
117
118 [-----------------------------SEGMENT 6785-------------------------------]
119 NILFS [nilfs_segctor_do_construct]:2310 nilfs_segctor_begin_construction
120 NILFS [nilfs_segctor_do_construct]:2321 nilfs_segctor_collect
121 NILFS [nilfs_lookup_dirty_data_buffers]:782 bh->b_count 2, bh->b_page ffffea000709b000, page->index 0, i_ino 1033103, i_size 25165824
122 NILFS [nilfs_lookup_dirty_data_buffers]:783 bh->b_assoc_buffers.next ffff880219277e80, bh->b_assoc_buffers.prev ffff880221cffc88
123 NILFS [nilfs_segctor_do_construct]:2367 nilfs_segctor_update_segusage
124 NILFS [nilfs_segctor_do_construct]:2371 nilfs_segctor_prepare_write
125 NILFS [nilfs_segctor_do_construct]:2376 nilfs_add_checksums_on_logs
126 NILFS [nilfs_segctor_do_construct]:2381 nilfs_segctor_write
127 NILFS [nilfs_segbuf_submit_bh]:575 bh->b_count 2, bh->b_page ffffea000709b000, page->index 0, i_ino 1033103, i_size 25165824
128 NILFS [nilfs_segbuf_submit_bh]:576 segbuf->sb_segnum 6785
129 NILFS [nilfs_segbuf_submit_bh]:577 bh->b_assoc_buffers.next ffff880218a0d5f8, bh->b_assoc_buffers.prev ffff880222cc7ee8
130 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111165440, segbuf->sb_segnum 6785, segbuf->sb_nbio 0
131 [----------] ditto
132 NILFS [nilfs_segbuf_submit_bio]:464 bio->bi_sector 111177728, segbuf->sb_segnum 6785, segbuf->sb_nbio 12
133
134 NILFS [nilfs_segctor_do_construct]:2399 nilfs_segctor_wait
135 NILFS [nilfs_segbuf_wait]:676 segbuf->sb_segnum 6783
136 NILFS [nilfs_segbuf_wait]:676 segbuf->sb_segnum 6784
137 NILFS [nilfs_segbuf_wait]:676 segbuf->sb_segnum 6785
138
139 NILFS [nilfs_segctor_complete_write]:2100 bh->b_count 0, bh->b_blocknr 13895680, bh->b_size 13897727, bh->b_page 0000000000001a82
140
141 BUG: unable to handle kernel paging request at 0000000000001a82
142 IP: [<ffffffffa024d0f2>] nilfs_end_page_io+0x12/0xd0 [nilfs2]
143
144 Usually, for every segment we collect dirty files in list. Then, dirty
145 blocks are gathered for every dirty file, prepared for write and
146 submitted by means of nilfs_segbuf_submit_bh() call. Finally, it takes
147 place complete write phase after calling nilfs_end_bio_write() on the
148 block layer. Buffers/pages are marked as not dirty on final phase and
149 processed files removed from the list of dirty files.
150
151 It is possible to see that we had three prepare_write and submit_bio
152 phases before segbuf_wait and complete_write phase. Moreover, segments
153 compete between each other for dirty blocks because on every iteration
154 of segments processing dirty buffer_heads are added in several lists of
155 payload_buffers:
156
157 [SEGMENT 6784]: bh->b_assoc_buffers.next ffff880218a0d5f8, bh->b_assoc_buffers.prev ffff880218bcdf50
158 [SEGMENT 6785]: bh->b_assoc_buffers.next ffff880218a0d5f8, bh->b_assoc_buffers.prev ffff880222cc7ee8
159
160 The next pointer is the same but prev pointer has changed. It means
161 that buffer_head has next pointer from one list but prev pointer from
162 another. Such modification can be made several times. And, finally, it
163 can be resulted in various issues: (1) segctor hanging, (2) segctor
164 crashing, (3) file system metadata corruption.
165
166 FIX:
167 This patch adds:
168
169 (1) setting of BH_Async_Write flag in nilfs_segctor_prepare_write()
170 for every proccessed dirty block;
171
172 (2) checking of BH_Async_Write flag in
173 nilfs_lookup_dirty_data_buffers() and
174 nilfs_lookup_dirty_node_buffers();
175
176 (3) clearing of BH_Async_Write flag in nilfs_segctor_complete_write(),
177 nilfs_abort_logs(), nilfs_forget_buffer(), nilfs_clear_dirty_page().
178
179 Reported-by: Jerome Poulin <jeromepoulin@gmail.com>
180 Reported-by: Anton Eliasson <devel@antoneliasson.se>
181 Cc: Paul Fertser <fercerpav@gmail.com>
182 Cc: ARAI Shun-ichi <hermes@ceres.dti.ne.jp>
183 Cc: Piotr Szymaniak <szarpaj@grubelek.pl>
184 Cc: Juan Barry Manuel Canham <Linux@riotingpacifist.net>
185 Cc: Zahid Chowdhury <zahid.chowdhury@starsolutions.com>
186 Cc: Elmer Zhang <freeboy6716@gmail.com>
187 Cc: Kenneth Langga <klangga@gmail.com>
188 Signed-off-by: Vyacheslav Dubeyko <slava@dubeyko.com>
189 Acked-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
190 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
191 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
192 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
193
194 ---
195 fs/nilfs2/page.c | 2 ++
196 fs/nilfs2/segment.c | 11 +++++++++--
197 2 files changed, 11 insertions(+), 2 deletions(-)
198
199 --- a/fs/nilfs2/page.c
200 +++ b/fs/nilfs2/page.c
201 @@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_h
202 clear_buffer_nilfs_volatile(bh);
203 clear_buffer_nilfs_checked(bh);
204 clear_buffer_nilfs_redirected(bh);
205 + clear_buffer_async_write(bh);
206 clear_buffer_dirty(bh);
207 if (nilfs_page_buffers_clean(page))
208 __nilfs_clear_page_dirty(page);
209 @@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page
210 "discard block %llu, size %zu",
211 (u64)bh->b_blocknr, bh->b_size);
212 }
213 + clear_buffer_async_write(bh);
214 clear_buffer_dirty(bh);
215 clear_buffer_nilfs_volatile(bh);
216 clear_buffer_nilfs_checked(bh);
217 --- a/fs/nilfs2/segment.c
218 +++ b/fs/nilfs2/segment.c
219 @@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_bu
220
221 bh = head = page_buffers(page);
222 do {
223 - if (!buffer_dirty(bh))
224 + if (!buffer_dirty(bh) || buffer_async_write(bh))
225 continue;
226 get_bh(bh);
227 list_add_tail(&bh->b_assoc_buffers, listp);
228 @@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buff
229 for (i = 0; i < pagevec_count(&pvec); i++) {
230 bh = head = page_buffers(pvec.pages[i]);
231 do {
232 - if (buffer_dirty(bh)) {
233 + if (buffer_dirty(bh) &&
234 + !buffer_async_write(bh)) {
235 get_bh(bh);
236 list_add_tail(&bh->b_assoc_buffers,
237 listp);
238 @@ -1579,6 +1580,7 @@ static void nilfs_segctor_prepare_write(
239
240 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
241 b_assoc_buffers) {
242 + set_buffer_async_write(bh);
243 if (bh->b_page != bd_page) {
244 if (bd_page) {
245 lock_page(bd_page);
246 @@ -1592,6 +1594,7 @@ static void nilfs_segctor_prepare_write(
247
248 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
249 b_assoc_buffers) {
250 + set_buffer_async_write(bh);
251 if (bh == segbuf->sb_super_root) {
252 if (bh->b_page != bd_page) {
253 lock_page(bd_page);
254 @@ -1677,6 +1680,7 @@ static void nilfs_abort_logs(struct list
255 list_for_each_entry(segbuf, logs, sb_list) {
256 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
257 b_assoc_buffers) {
258 + clear_buffer_async_write(bh);
259 if (bh->b_page != bd_page) {
260 if (bd_page)
261 end_page_writeback(bd_page);
262 @@ -1686,6 +1690,7 @@ static void nilfs_abort_logs(struct list
263
264 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
265 b_assoc_buffers) {
266 + clear_buffer_async_write(bh);
267 if (bh == segbuf->sb_super_root) {
268 if (bh->b_page != bd_page) {
269 end_page_writeback(bd_page);
270 @@ -1755,6 +1760,7 @@ static void nilfs_segctor_complete_write
271 b_assoc_buffers) {
272 set_buffer_uptodate(bh);
273 clear_buffer_dirty(bh);
274 + clear_buffer_async_write(bh);
275 if (bh->b_page != bd_page) {
276 if (bd_page)
277 end_page_writeback(bd_page);
278 @@ -1776,6 +1782,7 @@ static void nilfs_segctor_complete_write
279 b_assoc_buffers) {
280 set_buffer_uptodate(bh);
281 clear_buffer_dirty(bh);
282 + clear_buffer_async_write(bh);
283 clear_buffer_delay(bh);
284 clear_buffer_nilfs_volatile(bh);
285 clear_buffer_nilfs_redirected(bh);