]>
Commit | Line | Data |
---|---|---|
9ea105cc GKH |
1 | From fee13fe96529523a709d1fff487f14a5e0d56d34 Mon Sep 17 00:00:00 2001 |
2 | From: Dennis Zhou <dennis@kernel.org> | |
3 | Date: Fri, 17 May 2019 19:16:26 -0400 | |
4 | Subject: btrfs: correct zstd workspace manager lock to use spin_lock_bh() | |
5 | ||
6 | From: Dennis Zhou <dennis@kernel.org> | |
7 | ||
8 | commit fee13fe96529523a709d1fff487f14a5e0d56d34 upstream. | |
9 | ||
10 | The btrfs zstd workspace manager uses a background timer to reclaim not | |
11 | recently used workspaces. I used spin_lock() from this context which | |
12 | should have been caught with lockdep, but was not. This deadlock was | |
13 | reported in bugzilla. The fix is to switch the zstd wsm lock to use | |
14 | spin_lock_bh() from the softirq context. | |
15 | ||
16 | This happened quite relibably on ppc64, unlike on other architectures. | |
17 | ||
18 | [ 313.402874] ================================ | |
19 | [ 313.402875] WARNING: inconsistent lock state | |
20 | [ 313.402879] 5.1.0-rc7 #1 Not tainted | |
21 | [ 313.402880] -------------------------------- | |
22 | [ 313.402882] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. | |
23 | [ 313.402885] swapper/5/0 [HC0[0]:SC1[1]:HE1:SE0] takes: | |
24 | [ 313.402888] 0000000080d1120c (&(&wsm.lock)->rlock){+.?.}, at: .zstd_reclaim_timer_fn+0x40/0x230 | |
25 | [ 313.402895] {SOFTIRQ-ON-W} state was registered at: | |
26 | [ 313.402899] .lock_acquire+0xd0/0x240 | |
27 | [ 313.402903] ._raw_spin_lock+0x34/0x60 | |
28 | [ 313.402906] .zstd_get_workspace+0xd0/0x360 | |
29 | [ 313.402908] .end_compressed_bio_read+0x3b8/0x540 | |
30 | [ 313.402911] .bio_endio+0x174/0x2c0 | |
31 | [ 313.402914] .end_workqueue_fn+0x4c/0x70 | |
32 | [ 313.402917] .normal_work_helper+0x138/0x7e0 | |
33 | [ 313.402920] .process_one_work+0x324/0x790 | |
34 | [ 313.402922] .worker_thread+0x68/0x570 | |
35 | [ 313.402925] .kthread+0x19c/0x1b0 | |
36 | [ 313.402928] .ret_from_kernel_thread+0x58/0x78 | |
37 | [ 313.402930] irq event stamp: 2629216 | |
38 | [ 313.402933] hardirqs last enabled at (2629216): [<c0000000009da738>] ._raw_spin_unlock_irq+0x38/0x60 | |
39 | [ 313.402936] hardirqs last disabled at (2629215): [<c0000000009da4c4>] ._raw_spin_lock_irq+0x24/0x70 | |
40 | [ 313.402939] softirqs last enabled at (2629212): [<c0000000000af9fc>] .irq_enter+0x8c/0xd0 | |
41 | [ 313.402942] softirqs last disabled at (2629213): [<c0000000000afb58>] .irq_exit+0x118/0x170 | |
42 | [ 313.402944] | |
43 | other info that might help us debug this: | |
44 | [ 313.402945] Possible unsafe locking scenario: | |
45 | ||
46 | [ 313.402947] CPU0 | |
47 | [ 313.402948] ---- | |
48 | [ 313.402949] lock(&(&wsm.lock)->rlock); | |
49 | [ 313.402951] <Interrupt> | |
50 | [ 313.402952] lock(&(&wsm.lock)->rlock); | |
51 | [ 313.402954] | |
52 | *** DEADLOCK *** | |
53 | ||
54 | [ 313.402957] 1 lock held by swapper/5/0: | |
55 | [ 313.402958] #0: 000000004b612042 ((&wsm.timer)){+.-.}, at: .call_timer_fn+0x0/0x3c0 | |
56 | [ 313.402963] | |
57 | stack backtrace: | |
58 | [ 313.402967] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.1.0-rc7 #1 | |
59 | [ 313.402968] Call Trace: | |
60 | [ 313.402972] [c0000007fa262e70] [c0000000009b3294] .dump_stack+0xe0/0x15c (unreliable) | |
61 | [ 313.402975] [c0000007fa262f10] [c000000000125548] .print_usage_bug+0x348/0x390 | |
62 | [ 313.402978] [c0000007fa262fd0] [c000000000125cb4] .mark_lock+0x724/0x930 | |
63 | [ 313.402981] [c0000007fa263080] [c000000000126c20] .__lock_acquire+0xc90/0x16a0 | |
64 | [ 313.402984] [c0000007fa2631b0] [c000000000128040] .lock_acquire+0xd0/0x240 | |
65 | [ 313.402987] [c0000007fa263280] [c0000000009da2b4] ._raw_spin_lock+0x34/0x60 | |
66 | [ 313.402990] [c0000007fa263300] [c00000000054b0b0] .zstd_reclaim_timer_fn+0x40/0x230 | |
67 | [ 313.402993] [c0000007fa2633d0] [c000000000158b38] .call_timer_fn+0xc8/0x3c0 | |
68 | [ 313.402996] [c0000007fa2634a0] [c000000000158f74] .expire_timers+0x144/0x260 | |
69 | [ 313.402999] [c0000007fa263550] [c000000000159178] .run_timer_softirq+0xe8/0x230 | |
70 | [ 313.403002] [c0000007fa263680] [c0000000009db288] .__do_softirq+0x188/0x5d4 | |
71 | [ 313.403004] [c0000007fa263790] [c0000000000afb58] .irq_exit+0x118/0x170 | |
72 | [ 313.403008] [c0000007fa263800] [c000000000028d88] .timer_interrupt+0x158/0x430 | |
73 | [ 313.403012] [c0000007fa2638b0] [c0000000000091d4] decrementer_common+0x134/0x140 | |
74 | [ 313.403017] --- interrupt: 901 at replay_interrupt_return+0x0/0x4 | |
75 | LR = .arch_local_irq_restore.part.0+0x68/0x80 | |
76 | [ 313.403020] [c0000007fa263bb0] [c00000000001a3ac] .arch_local_irq_restore.part.0+0x2c/0x80 (unreliable) | |
77 | [ 313.403024] [c0000007fa263c30] [c0000000007bbbcc] .cpuidle_enter_state+0xec/0x670 | |
78 | [ 313.403027] [c0000007fa263d00] [c0000000000f5130] .call_cpuidle+0x40/0x90 | |
79 | [ 313.403031] [c0000007fa263d70] [c0000000000f554c] .do_idle+0x2dc/0x3a0 | |
80 | [ 313.403034] [c0000007fa263e30] [c0000000000f59ac] .cpu_startup_entry+0x2c/0x30 | |
81 | [ 313.403037] [c0000007fa263ea0] [c000000000045674] .start_secondary+0x644/0x650 | |
82 | [ 313.403041] [c0000007fa263f90] [c00000000000ad5c] start_secondary_prolog+0x10/0x14 | |
83 | ||
84 | Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203517 | |
85 | Fixes: 3f93aef535c8 ("btrfs: add zstd compression level support") | |
86 | CC: stable@vger.kernel.org # 5.1+ | |
87 | Signed-off-by: Dennis Zhou <dennis@kernel.org> | |
88 | Reviewed-by: David Sterba <dsterba@suse.com> | |
89 | Signed-off-by: David Sterba <dsterba@suse.com> | |
90 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
91 | ||
92 | --- | |
93 | fs/btrfs/zstd.c | 20 ++++++++++---------- | |
94 | 1 file changed, 10 insertions(+), 10 deletions(-) | |
95 | ||
96 | --- a/fs/btrfs/zstd.c | |
97 | +++ b/fs/btrfs/zstd.c | |
98 | @@ -102,10 +102,10 @@ static void zstd_reclaim_timer_fn(struct | |
99 | unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES; | |
100 | struct list_head *pos, *next; | |
101 | ||
102 | - spin_lock(&wsm.lock); | |
103 | + spin_lock_bh(&wsm.lock); | |
104 | ||
105 | if (list_empty(&wsm.lru_list)) { | |
106 | - spin_unlock(&wsm.lock); | |
107 | + spin_unlock_bh(&wsm.lock); | |
108 | return; | |
109 | } | |
110 | ||
111 | @@ -134,7 +134,7 @@ static void zstd_reclaim_timer_fn(struct | |
112 | if (!list_empty(&wsm.lru_list)) | |
113 | mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); | |
114 | ||
115 | - spin_unlock(&wsm.lock); | |
116 | + spin_unlock_bh(&wsm.lock); | |
117 | } | |
118 | ||
119 | /* | |
120 | @@ -195,7 +195,7 @@ static void zstd_cleanup_workspace_manag | |
121 | struct workspace *workspace; | |
122 | int i; | |
123 | ||
124 | - spin_lock(&wsm.lock); | |
125 | + spin_lock_bh(&wsm.lock); | |
126 | for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) { | |
127 | while (!list_empty(&wsm.idle_ws[i])) { | |
128 | workspace = container_of(wsm.idle_ws[i].next, | |
129 | @@ -205,7 +205,7 @@ static void zstd_cleanup_workspace_manag | |
130 | wsm.ops->free_workspace(&workspace->list); | |
131 | } | |
132 | } | |
133 | - spin_unlock(&wsm.lock); | |
134 | + spin_unlock_bh(&wsm.lock); | |
135 | ||
136 | del_timer_sync(&wsm.timer); | |
137 | } | |
138 | @@ -227,7 +227,7 @@ static struct list_head *zstd_find_works | |
139 | struct workspace *workspace; | |
140 | int i = level - 1; | |
141 | ||
142 | - spin_lock(&wsm.lock); | |
143 | + spin_lock_bh(&wsm.lock); | |
144 | for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) { | |
145 | if (!list_empty(&wsm.idle_ws[i])) { | |
146 | ws = wsm.idle_ws[i].next; | |
147 | @@ -239,11 +239,11 @@ static struct list_head *zstd_find_works | |
148 | list_del(&workspace->lru_list); | |
149 | if (list_empty(&wsm.idle_ws[i])) | |
150 | clear_bit(i, &wsm.active_map); | |
151 | - spin_unlock(&wsm.lock); | |
152 | + spin_unlock_bh(&wsm.lock); | |
153 | return ws; | |
154 | } | |
155 | } | |
156 | - spin_unlock(&wsm.lock); | |
157 | + spin_unlock_bh(&wsm.lock); | |
158 | ||
159 | return NULL; | |
160 | } | |
161 | @@ -302,7 +302,7 @@ static void zstd_put_workspace(struct li | |
162 | { | |
163 | struct workspace *workspace = list_to_workspace(ws); | |
164 | ||
165 | - spin_lock(&wsm.lock); | |
166 | + spin_lock_bh(&wsm.lock); | |
167 | ||
168 | /* A node is only taken off the lru if we are the corresponding level */ | |
169 | if (workspace->req_level == workspace->level) { | |
170 | @@ -322,7 +322,7 @@ static void zstd_put_workspace(struct li | |
171 | list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]); | |
172 | workspace->req_level = 0; | |
173 | ||
174 | - spin_unlock(&wsm.lock); | |
175 | + spin_unlock_bh(&wsm.lock); | |
176 | ||
177 | if (workspace->level == ZSTD_BTRFS_MAX_LEVEL) | |
178 | cond_wake_up(&wsm.wait); |