]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0-only | |
2 | /****************************************************************************** | |
3 | ******************************************************************************* | |
4 | ** | |
5 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. | |
7 | ** | |
8 | ** | |
9 | ******************************************************************************* | |
10 | ******************************************************************************/ | |
11 | ||
12 | #include "dlm_internal.h" | |
13 | #include "lockspace.h" | |
14 | #include "member.h" | |
15 | #include "dir.h" | |
16 | #include "ast.h" | |
17 | #include "recover.h" | |
18 | #include "lowcomms.h" | |
19 | #include "lock.h" | |
20 | #include "requestqueue.h" | |
21 | #include "recoverd.h" | |
22 | ||
23 | ||
24 | /* If the start for which we're re-enabling locking (seq) has been superseded | |
25 | by a newer stop (ls_recover_seq), we need to leave locking disabled. | |
26 | ||
27 | We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees | |
28 | locking stopped and b) adds a message to the requestqueue, but dlm_recoverd | |
29 | enables locking and clears the requestqueue between a and b. */ | |
30 | ||
31 | static int enable_locking(struct dlm_ls *ls, uint64_t seq) | |
32 | { | |
33 | int error = -EINTR; | |
34 | ||
35 | down_write(&ls->ls_recv_active); | |
36 | ||
37 | spin_lock(&ls->ls_recover_lock); | |
38 | if (ls->ls_recover_seq == seq) { | |
39 | set_bit(LSFL_RUNNING, &ls->ls_flags); | |
40 | /* unblocks processes waiting to enter the dlm */ | |
41 | up_write(&ls->ls_in_recovery); | |
42 | clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | |
43 | error = 0; | |
44 | } | |
45 | spin_unlock(&ls->ls_recover_lock); | |
46 | ||
47 | up_write(&ls->ls_recv_active); | |
48 | return error; | |
49 | } | |
50 | ||
51 | static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |
52 | { | |
53 | unsigned long start; | |
54 | int error, neg = 0; | |
55 | ||
56 | log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq); | |
57 | ||
58 | mutex_lock(&ls->ls_recoverd_active); | |
59 | ||
60 | dlm_callback_suspend(ls); | |
61 | ||
62 | dlm_clear_toss(ls); | |
63 | ||
64 | /* | |
65 | * This list of root rsb's will be the basis of most of the recovery | |
66 | * routines. | |
67 | */ | |
68 | ||
69 | dlm_create_root_list(ls); | |
70 | ||
71 | /* | |
72 | * Add or remove nodes from the lockspace's ls_nodes list. | |
73 | * | |
74 | * Due to the fact that we must report all membership changes to lsops | |
75 | * or midcomms layer, it is not permitted to abort ls_recover() until | |
76 | * this is done. | |
77 | */ | |
78 | ||
79 | error = dlm_recover_members(ls, rv, &neg); | |
80 | if (error) { | |
81 | log_rinfo(ls, "dlm_recover_members error %d", error); | |
82 | goto fail; | |
83 | } | |
84 | ||
85 | dlm_recover_dir_nodeid(ls); | |
86 | ||
87 | ls->ls_recover_dir_sent_res = 0; | |
88 | ls->ls_recover_dir_sent_msg = 0; | |
89 | ls->ls_recover_locks_in = 0; | |
90 | ||
91 | dlm_set_recover_status(ls, DLM_RS_NODES); | |
92 | ||
93 | error = dlm_recover_members_wait(ls); | |
94 | if (error) { | |
95 | log_rinfo(ls, "dlm_recover_members_wait error %d", error); | |
96 | goto fail; | |
97 | } | |
98 | ||
99 | start = jiffies; | |
100 | ||
101 | /* | |
102 | * Rebuild our own share of the directory by collecting from all other | |
103 | * nodes their master rsb names that hash to us. | |
104 | */ | |
105 | ||
106 | error = dlm_recover_directory(ls); | |
107 | if (error) { | |
108 | log_rinfo(ls, "dlm_recover_directory error %d", error); | |
109 | goto fail; | |
110 | } | |
111 | ||
112 | dlm_set_recover_status(ls, DLM_RS_DIR); | |
113 | ||
114 | error = dlm_recover_directory_wait(ls); | |
115 | if (error) { | |
116 | log_rinfo(ls, "dlm_recover_directory_wait error %d", error); | |
117 | goto fail; | |
118 | } | |
119 | ||
120 | log_rinfo(ls, "dlm_recover_directory %u out %u messages", | |
121 | ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); | |
122 | ||
123 | /* | |
124 | * We may have outstanding operations that are waiting for a reply from | |
125 | * a failed node. Mark these to be resent after recovery. Unlock and | |
126 | * cancel ops can just be completed. | |
127 | */ | |
128 | ||
129 | dlm_recover_waiters_pre(ls); | |
130 | ||
131 | if (dlm_recovery_stopped(ls)) { | |
132 | error = -EINTR; | |
133 | goto fail; | |
134 | } | |
135 | ||
136 | if (neg || dlm_no_directory(ls)) { | |
137 | /* | |
138 | * Clear lkb's for departed nodes. | |
139 | */ | |
140 | ||
141 | dlm_recover_purge(ls); | |
142 | ||
143 | /* | |
144 | * Get new master nodeid's for rsb's that were mastered on | |
145 | * departed nodes. | |
146 | */ | |
147 | ||
148 | error = dlm_recover_masters(ls); | |
149 | if (error) { | |
150 | log_rinfo(ls, "dlm_recover_masters error %d", error); | |
151 | goto fail; | |
152 | } | |
153 | ||
154 | /* | |
155 | * Send our locks on remastered rsb's to the new masters. | |
156 | */ | |
157 | ||
158 | error = dlm_recover_locks(ls); | |
159 | if (error) { | |
160 | log_rinfo(ls, "dlm_recover_locks error %d", error); | |
161 | goto fail; | |
162 | } | |
163 | ||
164 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | |
165 | ||
166 | error = dlm_recover_locks_wait(ls); | |
167 | if (error) { | |
168 | log_rinfo(ls, "dlm_recover_locks_wait error %d", error); | |
169 | goto fail; | |
170 | } | |
171 | ||
172 | log_rinfo(ls, "dlm_recover_locks %u in", | |
173 | ls->ls_recover_locks_in); | |
174 | ||
175 | /* | |
176 | * Finalize state in master rsb's now that all locks can be | |
177 | * checked. This includes conversion resolution and lvb | |
178 | * settings. | |
179 | */ | |
180 | ||
181 | dlm_recover_rsbs(ls); | |
182 | } else { | |
183 | /* | |
184 | * Other lockspace members may be going through the "neg" steps | |
185 | * while also adding us to the lockspace, in which case they'll | |
186 | * be doing the recover_locks (RS_LOCKS) barrier. | |
187 | */ | |
188 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | |
189 | ||
190 | error = dlm_recover_locks_wait(ls); | |
191 | if (error) { | |
192 | log_rinfo(ls, "dlm_recover_locks_wait error %d", error); | |
193 | goto fail; | |
194 | } | |
195 | } | |
196 | ||
197 | dlm_release_root_list(ls); | |
198 | ||
199 | /* | |
200 | * Purge directory-related requests that are saved in requestqueue. | |
201 | * All dir requests from before recovery are invalid now due to the dir | |
202 | * rebuild and will be resent by the requesting nodes. | |
203 | */ | |
204 | ||
205 | dlm_purge_requestqueue(ls); | |
206 | ||
207 | dlm_set_recover_status(ls, DLM_RS_DONE); | |
208 | ||
209 | error = dlm_recover_done_wait(ls); | |
210 | if (error) { | |
211 | log_rinfo(ls, "dlm_recover_done_wait error %d", error); | |
212 | goto fail; | |
213 | } | |
214 | ||
215 | dlm_clear_members_gone(ls); | |
216 | ||
217 | dlm_adjust_timeouts(ls); | |
218 | ||
219 | dlm_callback_resume(ls); | |
220 | ||
221 | error = enable_locking(ls, rv->seq); | |
222 | if (error) { | |
223 | log_rinfo(ls, "enable_locking error %d", error); | |
224 | goto fail; | |
225 | } | |
226 | ||
227 | error = dlm_process_requestqueue(ls); | |
228 | if (error) { | |
229 | log_rinfo(ls, "dlm_process_requestqueue error %d", error); | |
230 | goto fail; | |
231 | } | |
232 | ||
233 | error = dlm_recover_waiters_post(ls); | |
234 | if (error) { | |
235 | log_rinfo(ls, "dlm_recover_waiters_post error %d", error); | |
236 | goto fail; | |
237 | } | |
238 | ||
239 | dlm_recover_grant(ls); | |
240 | ||
241 | log_rinfo(ls, "dlm_recover %llu generation %u done: %u ms", | |
242 | (unsigned long long)rv->seq, ls->ls_generation, | |
243 | jiffies_to_msecs(jiffies - start)); | |
244 | mutex_unlock(&ls->ls_recoverd_active); | |
245 | ||
246 | return 0; | |
247 | ||
248 | fail: | |
249 | dlm_release_root_list(ls); | |
250 | mutex_unlock(&ls->ls_recoverd_active); | |
251 | ||
252 | return error; | |
253 | } | |
254 | ||
255 | /* The dlm_ls_start() that created the rv we take here may already have been | |
256 | stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP | |
257 | flag set. */ | |
258 | ||
259 | static void do_ls_recovery(struct dlm_ls *ls) | |
260 | { | |
261 | struct dlm_recover *rv = NULL; | |
262 | int error; | |
263 | ||
264 | spin_lock(&ls->ls_recover_lock); | |
265 | rv = ls->ls_recover_args; | |
266 | ls->ls_recover_args = NULL; | |
267 | if (rv && ls->ls_recover_seq == rv->seq) | |
268 | clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags); | |
269 | spin_unlock(&ls->ls_recover_lock); | |
270 | ||
271 | if (rv) { | |
272 | error = ls_recover(ls, rv); | |
273 | switch (error) { | |
274 | case 0: | |
275 | ls->ls_recovery_result = 0; | |
276 | complete(&ls->ls_recovery_done); | |
277 | ||
278 | dlm_lsop_recover_done(ls); | |
279 | break; | |
280 | case -EINTR: | |
281 | /* if recovery was interrupted -EINTR we wait for the next | |
282 | * ls_recover() iteration until it hopefully succeeds. | |
283 | */ | |
284 | log_rinfo(ls, "%s %llu interrupted and should be queued to run again", | |
285 | __func__, (unsigned long long)rv->seq); | |
286 | break; | |
287 | default: | |
288 | log_rinfo(ls, "%s %llu error %d", __func__, | |
289 | (unsigned long long)rv->seq, error); | |
290 | ||
291 | /* let new_lockspace() get aware of critical error */ | |
292 | ls->ls_recovery_result = error; | |
293 | complete(&ls->ls_recovery_done); | |
294 | break; | |
295 | } | |
296 | ||
297 | kfree(rv->nodes); | |
298 | kfree(rv); | |
299 | } | |
300 | } | |
301 | ||
302 | static int dlm_recoverd(void *arg) | |
303 | { | |
304 | struct dlm_ls *ls; | |
305 | ||
306 | ls = dlm_find_lockspace_local(arg); | |
307 | if (!ls) { | |
308 | log_print("dlm_recoverd: no lockspace %p", arg); | |
309 | return -1; | |
310 | } | |
311 | ||
312 | down_write(&ls->ls_in_recovery); | |
313 | set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | |
314 | wake_up(&ls->ls_recover_lock_wait); | |
315 | ||
316 | while (1) { | |
317 | /* | |
318 | * We call kthread_should_stop() after set_current_state(). | |
319 | * This is because it works correctly if kthread_stop() is | |
320 | * called just before set_current_state(). | |
321 | */ | |
322 | set_current_state(TASK_INTERRUPTIBLE); | |
323 | if (kthread_should_stop()) { | |
324 | set_current_state(TASK_RUNNING); | |
325 | break; | |
326 | } | |
327 | if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && | |
328 | !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { | |
329 | if (kthread_should_stop()) | |
330 | break; | |
331 | schedule(); | |
332 | } | |
333 | set_current_state(TASK_RUNNING); | |
334 | ||
335 | if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { | |
336 | down_write(&ls->ls_in_recovery); | |
337 | set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | |
338 | wake_up(&ls->ls_recover_lock_wait); | |
339 | } | |
340 | ||
341 | if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags)) | |
342 | do_ls_recovery(ls); | |
343 | } | |
344 | ||
345 | if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)) | |
346 | up_write(&ls->ls_in_recovery); | |
347 | ||
348 | dlm_put_lockspace(ls); | |
349 | return 0; | |
350 | } | |
351 | ||
352 | int dlm_recoverd_start(struct dlm_ls *ls) | |
353 | { | |
354 | struct task_struct *p; | |
355 | int error = 0; | |
356 | ||
357 | p = kthread_run(dlm_recoverd, ls, "dlm_recoverd"); | |
358 | if (IS_ERR(p)) | |
359 | error = PTR_ERR(p); | |
360 | else | |
361 | ls->ls_recoverd_task = p; | |
362 | return error; | |
363 | } | |
364 | ||
365 | void dlm_recoverd_stop(struct dlm_ls *ls) | |
366 | { | |
367 | kthread_stop(ls->ls_recoverd_task); | |
368 | } | |
369 | ||
370 | void dlm_recoverd_suspend(struct dlm_ls *ls) | |
371 | { | |
372 | wake_up(&ls->ls_wait_general); | |
373 | mutex_lock(&ls->ls_recoverd_active); | |
374 | } | |
375 | ||
376 | void dlm_recoverd_resume(struct dlm_ls *ls) | |
377 | { | |
378 | mutex_unlock(&ls->ls_recoverd_active); | |
379 | } | |
380 |