]>
Commit | Line | Data |
---|---|---|
666dcf07 GKH |
1 | From 72abc8f4b4e8574318189886de627a2bfe6cd0da Mon Sep 17 00:00:00 2001 |
2 | From: hujianyang <hujianyang@huawei.com> | |
3 | Date: Sat, 31 May 2014 11:39:32 +0800 | |
4 | Subject: UBIFS: Remove incorrect assertion in shrink_tnc() | |
5 | ||
6 | From: hujianyang <hujianyang@huawei.com> | |
7 | ||
8 | commit 72abc8f4b4e8574318189886de627a2bfe6cd0da upstream. | |
9 | ||
10 | I hit the same assert failed as Dolev Raviv reported in Kernel v3.10 | |
11 | shows like this: | |
12 | ||
13 | [ 9641.164028] UBIFS assert failed in shrink_tnc at 131 (pid 13297) | |
14 | [ 9641.234078] CPU: 1 PID: 13297 Comm: mmap.test Tainted: G O 3.10.40 #1 | |
15 | [ 9641.234116] [<c0011a6c>] (unwind_backtrace+0x0/0x12c) from [<c000d0b0>] (show_stack+0x20/0x24) | |
16 | [ 9641.234137] [<c000d0b0>] (show_stack+0x20/0x24) from [<c0311134>] (dump_stack+0x20/0x28) | |
17 | [ 9641.234188] [<c0311134>] (dump_stack+0x20/0x28) from [<bf22425c>] (shrink_tnc_trees+0x25c/0x350 [ubifs]) | |
18 | [ 9641.234265] [<bf22425c>] (shrink_tnc_trees+0x25c/0x350 [ubifs]) from [<bf2245ac>] (ubifs_shrinker+0x25c/0x310 [ubifs]) | |
19 | [ 9641.234307] [<bf2245ac>] (ubifs_shrinker+0x25c/0x310 [ubifs]) from [<c00cdad8>] (shrink_slab+0x1d4/0x2f8) | |
20 | [ 9641.234327] [<c00cdad8>] (shrink_slab+0x1d4/0x2f8) from [<c00d03d0>] (do_try_to_free_pages+0x300/0x544) | |
21 | [ 9641.234344] [<c00d03d0>] (do_try_to_free_pages+0x300/0x544) from [<c00d0a44>] (try_to_free_pages+0x2d0/0x398) | |
22 | [ 9641.234363] [<c00d0a44>] (try_to_free_pages+0x2d0/0x398) from [<c00c6a60>] (__alloc_pages_nodemask+0x494/0x7e8) | |
23 | [ 9641.234382] [<c00c6a60>] (__alloc_pages_nodemask+0x494/0x7e8) from [<c00f62d8>] (new_slab+0x78/0x238) | |
24 | [ 9641.234400] [<c00f62d8>] (new_slab+0x78/0x238) from [<c031081c>] (__slab_alloc.constprop.42+0x1a4/0x50c) | |
25 | [ 9641.234419] [<c031081c>] (__slab_alloc.constprop.42+0x1a4/0x50c) from [<c00f80e8>] (kmem_cache_alloc_trace+0x54/0x188) | |
26 | [ 9641.234459] [<c00f80e8>] (kmem_cache_alloc_trace+0x54/0x188) from [<bf227908>] (do_readpage+0x168/0x468 [ubifs]) | |
27 | [ 9641.234553] [<bf227908>] (do_readpage+0x168/0x468 [ubifs]) from [<bf2296a0>] (ubifs_readpage+0x424/0x464 [ubifs]) | |
28 | [ 9641.234606] [<bf2296a0>] (ubifs_readpage+0x424/0x464 [ubifs]) from [<c00c17c0>] (filemap_fault+0x304/0x418) | |
29 | [ 9641.234638] [<c00c17c0>] (filemap_fault+0x304/0x418) from [<c00de694>] (__do_fault+0xd4/0x530) | |
30 | [ 9641.234665] [<c00de694>] (__do_fault+0xd4/0x530) from [<c00e10c0>] (handle_pte_fault+0x480/0xf54) | |
31 | [ 9641.234690] [<c00e10c0>] (handle_pte_fault+0x480/0xf54) from [<c00e2bf8>] (handle_mm_fault+0x140/0x184) | |
32 | [ 9641.234716] [<c00e2bf8>] (handle_mm_fault+0x140/0x184) from [<c0316688>] (do_page_fault+0x150/0x3ac) | |
33 | [ 9641.234737] [<c0316688>] (do_page_fault+0x150/0x3ac) from [<c000842c>] (do_DataAbort+0x3c/0xa0) | |
34 | [ 9641.234759] [<c000842c>] (do_DataAbort+0x3c/0xa0) from [<c0314e38>] (__dabt_usr+0x38/0x40) | |
35 | ||
36 | After analyzing the code, I found a condition that may cause this failed | |
37 | in correct operations. Thus, I think this assertion is wrong and should be | |
38 | removed. | |
39 | ||
40 | Suppose there are two clean znodes and one dirty znode in TNC. So the | |
41 | per-filesystem atomic_t @clean_zn_cnt is (2). If commit start, dirty_znode | |
42 | is set to COW_ZNODE in get_znodes_to_commit() in case of potentially ops | |
43 | on this znode. We clear COW bit and DIRTY bit in write_index() without | |
44 | @tnc_mutex locked. We don't increase @clean_zn_cnt in this place. As the | |
45 | comments in write_index() shows, if another process hold @tnc_mutex and | |
46 | dirty this znode after we clean it, @clean_zn_cnt would be decreased to (1). | |
47 | We will increase @clean_zn_cnt to (2) with @tnc_mutex locked in | |
48 | free_obsolete_znodes() to keep it right. | |
49 | ||
50 | If shrink_tnc() performs between decrease and increase, it will release | |
51 | other 2 clean znodes it holds and found @clean_zn_cnt is less than zero | |
52 | (1 - 2 = -1), then hit the assertion. Because free_obsolete_znodes() will | |
53 | soon correct @clean_zn_cnt and no harm to fs in this case, I think this | |
54 | assertion could be removed. | |
55 | ||
56 | 2 clean zondes and 1 dirty znode, @clean_zn_cnt == 2 | |
57 | ||
58 | Thread A (commit) Thread B (write or others) Thread C (shrinker) | |
59 | ->write_index | |
60 | ->clear_bit(DIRTY_NODE) | |
61 | ->clear_bit(COW_ZNODE) | |
62 | ||
63 | @clean_zn_cnt == 2 | |
64 | ->mutex_locked(&tnc_mutex) | |
65 | ->dirty_cow_znode | |
66 | ->!ubifs_zn_cow(znode) | |
67 | ->!test_and_set_bit(DIRTY_NODE) | |
68 | ->atomic_dec(&clean_zn_cnt) | |
69 | ->mutex_unlocked(&tnc_mutex) | |
70 | ||
71 | @clean_zn_cnt == 1 | |
72 | ->mutex_locked(&tnc_mutex) | |
73 | ->shrink_tnc | |
74 | ->destroy_tnc_subtree | |
75 | ->atomic_sub(&clean_zn_cnt, 2) | |
76 | ->ubifs_assert <- hit | |
77 | ->mutex_unlocked(&tnc_mutex) | |
78 | ||
79 | @clean_zn_cnt == -1 | |
80 | ->mutex_lock(&tnc_mutex) | |
81 | ->free_obsolete_znodes | |
82 | ->atomic_inc(&clean_zn_cnt) | |
83 | ->mutux_unlock(&tnc_mutex) | |
84 | ||
85 | @clean_zn_cnt == 0 (correct after shrink) | |
86 | ||
87 | Signed-off-by: hujianyang <hujianyang@huawei.com> | |
88 | Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> | |
89 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
90 | ||
91 | --- | |
92 | fs/ubifs/shrinker.c | 1 - | |
93 | 1 file changed, 1 deletion(-) | |
94 | ||
95 | --- a/fs/ubifs/shrinker.c | |
96 | +++ b/fs/ubifs/shrinker.c | |
97 | @@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info | |
98 | freed = ubifs_destroy_tnc_subtree(znode); | |
99 | atomic_long_sub(freed, &ubifs_clean_zn_cnt); | |
100 | atomic_long_sub(freed, &c->clean_zn_cnt); | |
101 | - ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); | |
102 | total_freed += freed; | |
103 | znode = zprev; | |
104 | } |