]>
Commit | Line | Data |
---|---|---|
d321ceac | 1 | /* |
da23017d NS |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | |
5000d01d | 4 | * |
da23017d NS |
5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | |
d321ceac | 7 | * published by the Free Software Foundation. |
5000d01d | 8 | * |
da23017d NS |
9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
5000d01d | 13 | * |
da23017d NS |
14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
d321ceac NS |
17 | */ |
18 | ||
1d7e80ee | 19 | #include <xfs/libxlog.h> |
d321ceac | 20 | |
a562a63b NS |
21 | #define xlog_unpack_data_checksum(rhead, dp, log) ((void)0) |
22 | #define xlog_clear_stale_blocks(log, tail_lsn) (0) | |
23 | #define xfs_readonly_buftarg(buftarg) (0) | |
24 | ||
d321ceac NS |
25 | /* |
26 | * This routine finds (to an approximation) the first block in the physical | |
4ed50f8a | 27 | * log which contains the given cycle. It uses a binary search algorithm. |
d321ceac NS |
28 | * Note that the algorithm can not be perfect because the disk will not |
29 | * necessarily be perfect. | |
30 | */ | |
31 | int | |
a562a63b NS |
32 | xlog_find_cycle_start( |
33 | xlog_t *log, | |
34 | xfs_buf_t *bp, | |
35 | xfs_daddr_t first_blk, | |
36 | xfs_daddr_t *last_blk, | |
37 | uint cycle) | |
d321ceac | 38 | { |
a562a63b | 39 | xfs_caddr_t offset; |
ffe29fb5 NS |
40 | xfs_daddr_t mid_blk; |
41 | uint mid_cycle; | |
42 | int error; | |
d321ceac NS |
43 | |
44 | mid_blk = BLK_AVG(first_blk, *last_blk); | |
45 | while (mid_blk != first_blk && mid_blk != *last_blk) { | |
46 | if ((error = xlog_bread(log, mid_blk, 1, bp))) | |
47 | return error; | |
a562a63b NS |
48 | offset = xlog_align(log, mid_blk, 1, bp); |
49 | mid_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
d321ceac NS |
50 | if (mid_cycle == cycle) { |
51 | *last_blk = mid_blk; | |
52 | /* last_half_cycle == mid_cycle */ | |
53 | } else { | |
54 | first_blk = mid_blk; | |
55 | /* first_half_cycle == mid_cycle */ | |
56 | } | |
57 | mid_blk = BLK_AVG(first_blk, *last_blk); | |
58 | } | |
59 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || | |
60 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); | |
61 | ||
62 | return 0; | |
a562a63b | 63 | } |
d321ceac NS |
64 | |
65 | /* | |
66 | * Check that the range of blocks does not contain the cycle number | |
67 | * given. The scan needs to occur from front to back and the ptr into the | |
68 | * region must be updated since a later routine will need to perform another | |
69 | * test. If the region is completely good, we end up returning the same | |
70 | * last block number. | |
71 | * | |
ce029dc1 | 72 | * Set blkno to -1 if we encounter no errors. This is an invalid block number |
d321ceac NS |
73 | * since we don't ever expect logs to get this large. |
74 | */ | |
ce029dc1 | 75 | STATIC int |
a562a63b NS |
76 | xlog_find_verify_cycle( |
77 | xlog_t *log, | |
78 | xfs_daddr_t start_blk, | |
79 | int nbblks, | |
80 | uint stop_on_cycle_no, | |
81 | xfs_daddr_t *new_blk) | |
d321ceac | 82 | { |
a562a63b NS |
83 | xfs_daddr_t i, j; |
84 | uint cycle; | |
85 | xfs_buf_t *bp; | |
86 | xfs_daddr_t bufblks; | |
87 | xfs_caddr_t buf = NULL; | |
88 | int error = 0; | |
85a875e9 ES |
89 | |
90 | bufblks = 1 << ffs(nbblks); | |
d321ceac | 91 | |
a562a63b | 92 | while (!(bp = xlog_get_bp(log, bufblks))) { |
5000d01d | 93 | /* can't get enough memory to do everything in one big buffer */ |
d321ceac | 94 | bufblks >>= 1; |
a562a63b | 95 | if (bufblks <= log->l_sectbb_log) |
5000d01d SL |
96 | return ENOMEM; |
97 | } | |
98 | ||
ffe29fb5 NS |
99 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { |
100 | int bcount; | |
d321ceac | 101 | |
ffe29fb5 | 102 | bcount = min(bufblks, (start_blk + nbblks - i)); |
d321ceac | 103 | |
5000d01d SL |
104 | if ((error = xlog_bread(log, i, bcount, bp))) |
105 | goto out; | |
d321ceac | 106 | |
a562a63b | 107 | buf = xlog_align(log, i, bcount, bp); |
d321ceac NS |
108 | for (j = 0; j < bcount; j++) { |
109 | cycle = GET_CYCLE(buf, ARCH_CONVERT); | |
110 | if (cycle == stop_on_cycle_no) { | |
e56fcdce | 111 | *new_blk = i+j; |
d321ceac NS |
112 | goto out; |
113 | } | |
5000d01d SL |
114 | |
115 | buf += BBSIZE; | |
d321ceac NS |
116 | } |
117 | } | |
118 | ||
ce029dc1 | 119 | *new_blk = -1; |
d321ceac NS |
120 | |
121 | out: | |
122 | xlog_put_bp(bp); | |
d321ceac | 123 | return error; |
a562a63b | 124 | } |
d321ceac NS |
125 | |
126 | /* | |
127 | * Potentially backup over partial log record write. | |
128 | * | |
129 | * In the typical case, last_blk is the number of the block directly after | |
130 | * a good log record. Therefore, we subtract one to get the block number | |
131 | * of the last block in the given buffer. extra_bblks contains the number | |
132 | * of blocks we would have read on a previous read. This happens when the | |
133 | * last log record is split over the end of the physical log. | |
134 | * | |
135 | * extra_bblks is the number of blocks potentially verified on a previous | |
136 | * call to this routine. | |
137 | */ | |
d321ceac | 138 | STATIC int |
a562a63b NS |
139 | xlog_find_verify_log_record( |
140 | xlog_t *log, | |
141 | xfs_daddr_t start_blk, | |
142 | xfs_daddr_t *last_blk, | |
143 | int extra_bblks) | |
d321ceac | 144 | { |
a562a63b NS |
145 | xfs_daddr_t i; |
146 | xfs_buf_t *bp; | |
147 | xfs_caddr_t offset = NULL; | |
148 | xlog_rec_header_t *head = NULL; | |
149 | int error = 0; | |
150 | int smallmem = 0; | |
151 | int num_blks = *last_blk - start_blk; | |
152 | int xhdrs; | |
153 | ||
154 | ASSERT(start_blk != 0 || *last_blk != start_blk); | |
155 | ||
156 | if (!(bp = xlog_get_bp(log, num_blks))) { | |
157 | if (!(bp = xlog_get_bp(log, 1))) | |
158 | return ENOMEM; | |
159 | smallmem = 1; | |
160 | } else { | |
161 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) | |
162 | goto out; | |
163 | offset = xlog_align(log, start_blk, num_blks, bp); | |
164 | offset += ((num_blks - 1) << BBSHIFT); | |
d321ceac NS |
165 | } |
166 | ||
a562a63b NS |
167 | for (i = (*last_blk) - 1; i >= 0; i--) { |
168 | if (i < start_blk) { | |
05bba5b7 | 169 | /* valid log record not found */ |
a562a63b NS |
170 | xlog_warn( |
171 | "XFS: Log inconsistent (didn't find previous header)"); | |
172 | ASSERT(0); | |
173 | error = XFS_ERROR(EIO); | |
174 | goto out; | |
175 | } | |
d321ceac | 176 | |
a562a63b NS |
177 | if (smallmem) { |
178 | if ((error = xlog_bread(log, i, 1, bp))) | |
179 | goto out; | |
180 | offset = xlog_align(log, i, 1, bp); | |
181 | } | |
182 | ||
183 | head = (xlog_rec_header_t *)offset; | |
d321ceac | 184 | |
a562a63b NS |
185 | if (XLOG_HEADER_MAGIC_NUM == |
186 | INT_GET(head->h_magicno, ARCH_CONVERT)) | |
187 | break; | |
188 | ||
189 | if (!smallmem) | |
190 | offset -= BBSIZE; | |
191 | } | |
192 | ||
193 | /* | |
194 | * We hit the beginning of the physical log & still no header. Return | |
195 | * to caller. If caller can handle a return of -1, then this routine | |
196 | * will be called again for the end of the physical log. | |
197 | */ | |
198 | if (i == -1) { | |
199 | error = -1; | |
200 | goto out; | |
201 | } | |
202 | ||
203 | /* | |
204 | * We have the final block of the good log (the first block | |
205 | * of the log record _before_ the head. So we check the uuid. | |
206 | */ | |
207 | if ((error = xlog_header_check_mount(log->l_mp, head))) | |
208 | goto out; | |
209 | ||
210 | /* | |
211 | * We may have found a log record header before we expected one. | |
212 | * last_blk will be the 1st block # with a given cycle #. We may end | |
213 | * up reading an entire log record. In this case, we don't want to | |
214 | * reset last_blk. Only when last_blk points in the middle of a log | |
215 | * record do we update last_blk. | |
216 | */ | |
217 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { | |
218 | uint h_size = INT_GET(head->h_size, ARCH_CONVERT); | |
219 | ||
220 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; | |
221 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
222 | xhdrs++; | |
223 | } else { | |
224 | xhdrs = 1; | |
225 | } | |
226 | ||
227 | if (*last_blk - i + extra_bblks | |
228 | != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs) | |
229 | *last_blk = i; | |
230 | ||
231 | out: | |
232 | xlog_put_bp(bp); | |
233 | return error; | |
234 | } | |
d321ceac NS |
235 | |
236 | /* | |
237 | * Head is defined to be the point of the log where the next log write | |
238 | * write could go. This means that incomplete LR writes at the end are | |
239 | * eliminated when calculating the head. We aren't guaranteed that previous | |
5000d01d | 240 | * LR have complete transactions. We only know that a cycle number of |
d321ceac NS |
241 | * current cycle number -1 won't be present in the log if we start writing |
242 | * from our current block number. | |
243 | * | |
244 | * last_blk contains the block number of the first block with a given | |
245 | * cycle number. | |
246 | * | |
d321ceac NS |
247 | * Return: zero if normal, non-zero if error. |
248 | */ | |
249 | int | |
a562a63b NS |
250 | xlog_find_head( |
251 | xlog_t *log, | |
252 | xfs_daddr_t *return_head_blk) | |
d321ceac | 253 | { |
a562a63b NS |
254 | xfs_buf_t *bp; |
255 | xfs_caddr_t offset; | |
256 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; | |
257 | int num_scan_bblks; | |
258 | uint first_half_cycle, last_half_cycle; | |
259 | uint stop_on_cycle; | |
260 | int error, log_bbnum = log->l_logBBsize; | |
261 | ||
262 | /* Is the end of the log device zeroed? */ | |
263 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { | |
264 | *return_head_blk = first_blk; | |
265 | ||
266 | /* Is the whole lot zeroed? */ | |
267 | if (!first_blk) { | |
268 | /* Linux XFS shouldn't generate totally zeroed logs - | |
269 | * mkfs etc write a dummy unmount record to a fresh | |
270 | * log so we can store the uuid in there | |
271 | */ | |
272 | xlog_warn("XFS: totally zeroed log"); | |
273 | } | |
274 | ||
275 | return 0; | |
276 | } else if (error) { | |
277 | xlog_warn("XFS: empty log check failed"); | |
278 | return error; | |
5000d01d SL |
279 | } |
280 | ||
a562a63b NS |
281 | first_blk = 0; /* get cycle # of 1st block */ |
282 | bp = xlog_get_bp(log, 1); | |
283 | if (!bp) | |
284 | return ENOMEM; | |
285 | if ((error = xlog_bread(log, 0, 1, bp))) | |
286 | goto bp_err; | |
287 | offset = xlog_align(log, 0, 1, bp); | |
288 | first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
289 | ||
290 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ | |
291 | if ((error = xlog_bread(log, last_blk, 1, bp))) | |
292 | goto bp_err; | |
293 | offset = xlog_align(log, last_blk, 1, bp); | |
294 | last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
295 | ASSERT(last_half_cycle != 0); | |
296 | ||
d321ceac | 297 | /* |
a562a63b NS |
298 | * If the 1st half cycle number is equal to the last half cycle number, |
299 | * then the entire log is stamped with the same cycle number. In this | |
300 | * case, head_blk can't be set to zero (which makes sense). The below | |
301 | * math doesn't work out properly with head_blk equal to zero. Instead, | |
05bba5b7 | 302 | * we set it to log_bbnum which is an invalid block number, but this |
a562a63b NS |
303 | * value makes the math correct. If head_blk doesn't changed through |
304 | * all the tests below, *head_blk is set to zero at the very end rather | |
305 | * than log_bbnum. In a sense, log_bbnum and zero are the same block | |
306 | * in a circular file. | |
d321ceac | 307 | */ |
a562a63b NS |
308 | if (first_half_cycle == last_half_cycle) { |
309 | /* | |
310 | * In this case we believe that the entire log should have | |
311 | * cycle number last_half_cycle. We need to scan backwards | |
312 | * from the end verifying that there are no holes still | |
313 | * containing last_half_cycle - 1. If we find such a hole, | |
314 | * then the start of that hole will be the new head. The | |
315 | * simple case looks like | |
316 | * x | x ... | x - 1 | x | |
317 | * Another case that fits this picture would be | |
318 | * x | x + 1 | x ... | x | |
319 | * In this case the head really is somwhere at the end of the | |
320 | * log, as one of the latest writes at the beginning was | |
321 | * incomplete. | |
322 | * One more case is | |
323 | * x | x + 1 | x ... | x - 1 | x | |
324 | * This is really the combination of the above two cases, and | |
325 | * the head has to end up at the start of the x-1 hole at the | |
326 | * end of the log. | |
327 | * | |
328 | * In the 256k log case, we will read from the beginning to the | |
329 | * end of the log and search for cycle numbers equal to x-1. | |
330 | * We don't worry about the x+1 blocks that we encounter, | |
331 | * because we know that they cannot be the head since the log | |
332 | * started with x. | |
333 | */ | |
334 | head_blk = log_bbnum; | |
335 | stop_on_cycle = last_half_cycle - 1; | |
336 | } else { | |
337 | /* | |
338 | * In this case we want to find the first block with cycle | |
339 | * number matching last_half_cycle. We expect the log to be | |
340 | * some variation on | |
341 | * x + 1 ... | x ... | |
342 | * The first block with cycle number x (last_half_cycle) will | |
343 | * be where the new head belongs. First we do a binary search | |
344 | * for the first occurrence of last_half_cycle. The binary | |
345 | * search may not be totally accurate, so then we scan back | |
346 | * from there looking for occurrences of last_half_cycle before | |
347 | * us. If that backwards scan wraps around the beginning of | |
348 | * the log, then we look for occurrences of last_half_cycle - 1 | |
349 | * at the end of the log. The cases we're looking for look | |
350 | * like | |
351 | * x + 1 ... | x | x + 1 | x ... | |
352 | * ^ binary search stopped here | |
353 | * or | |
354 | * x + 1 ... | x ... | x - 1 | x | |
355 | * <---------> less than scan distance | |
356 | */ | |
357 | stop_on_cycle = last_half_cycle; | |
358 | if ((error = xlog_find_cycle_start(log, bp, first_blk, | |
359 | &head_blk, last_half_cycle))) | |
360 | goto bp_err; | |
361 | } | |
362 | ||
d321ceac | 363 | /* |
a562a63b NS |
364 | * Now validate the answer. Scan back some number of maximum possible |
365 | * blocks and make sure each one has the expected cycle number. The | |
366 | * maximum is determined by the total possible amount of buffering | |
367 | * in the in-core log. The following number can be made tighter if | |
368 | * we actually look at the block size of the filesystem. | |
d321ceac | 369 | */ |
a562a63b NS |
370 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
371 | if (head_blk >= num_scan_bblks) { | |
372 | /* | |
373 | * We are guaranteed that the entire check can be performed | |
374 | * in one buffer. | |
375 | */ | |
376 | start_blk = head_blk - num_scan_bblks; | |
377 | if ((error = xlog_find_verify_cycle(log, | |
378 | start_blk, num_scan_bblks, | |
379 | stop_on_cycle, &new_blk))) | |
380 | goto bp_err; | |
381 | if (new_blk != -1) | |
382 | head_blk = new_blk; | |
383 | } else { /* need to read 2 parts of log */ | |
384 | /* | |
385 | * We are going to scan backwards in the log in two parts. | |
386 | * First we scan the physical end of the log. In this part | |
387 | * of the log, we are looking for blocks with cycle number | |
388 | * last_half_cycle - 1. | |
389 | * If we find one, then we know that the log starts there, as | |
390 | * we've found a hole that didn't get written in going around | |
391 | * the end of the physical log. The simple case for this is | |
392 | * x + 1 ... | x ... | x - 1 | x | |
393 | * <---------> less than scan distance | |
394 | * If all of the blocks at the end of the log have cycle number | |
395 | * last_half_cycle, then we check the blocks at the start of | |
396 | * the log looking for occurrences of last_half_cycle. If we | |
397 | * find one, then our current estimate for the location of the | |
398 | * first occurrence of last_half_cycle is wrong and we move | |
399 | * back to the hole we've found. This case looks like | |
400 | * x + 1 ... | x | x + 1 | x ... | |
401 | * ^ binary search stopped here | |
402 | * Another case we need to handle that only occurs in 256k | |
403 | * logs is | |
404 | * x + 1 ... | x ... | x+1 | x ... | |
405 | * ^ binary search stops here | |
406 | * In a 256k log, the scan at the end of the log will see the | |
407 | * x + 1 blocks. We need to skip past those since that is | |
408 | * certainly not the head of the log. By searching for | |
409 | * last_half_cycle-1 we accomplish that. | |
410 | */ | |
411 | start_blk = log_bbnum - num_scan_bblks + head_blk; | |
412 | ASSERT(head_blk <= INT_MAX && | |
413 | (xfs_daddr_t) num_scan_bblks - head_blk >= 0); | |
414 | if ((error = xlog_find_verify_cycle(log, start_blk, | |
415 | num_scan_bblks - (int)head_blk, | |
416 | (stop_on_cycle - 1), &new_blk))) | |
417 | goto bp_err; | |
418 | if (new_blk != -1) { | |
419 | head_blk = new_blk; | |
420 | goto bad_blk; | |
421 | } | |
422 | ||
423 | /* | |
424 | * Scan beginning of log now. The last part of the physical | |
425 | * log is good. This scan needs to verify that it doesn't find | |
426 | * the last_half_cycle. | |
427 | */ | |
428 | start_blk = 0; | |
429 | ASSERT(head_blk <= INT_MAX); | |
430 | if ((error = xlog_find_verify_cycle(log, | |
431 | start_blk, (int)head_blk, | |
432 | stop_on_cycle, &new_blk))) | |
433 | goto bp_err; | |
434 | if (new_blk != -1) | |
435 | head_blk = new_blk; | |
436 | } | |
437 | ||
438 | bad_blk: | |
5000d01d | 439 | /* |
a562a63b NS |
440 | * Now we need to make sure head_blk is not pointing to a block in |
441 | * the middle of a log record. | |
d321ceac | 442 | */ |
a562a63b NS |
443 | num_scan_bblks = XLOG_REC_SHIFT(log); |
444 | if (head_blk >= num_scan_bblks) { | |
445 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ | |
446 | ||
447 | /* start ptr at last block ptr before head_blk */ | |
448 | if ((error = xlog_find_verify_log_record(log, start_blk, | |
449 | &head_blk, 0)) == -1) { | |
450 | error = XFS_ERROR(EIO); | |
451 | goto bp_err; | |
452 | } else if (error) | |
453 | goto bp_err; | |
454 | } else { | |
455 | start_blk = 0; | |
456 | ASSERT(head_blk <= INT_MAX); | |
457 | if ((error = xlog_find_verify_log_record(log, start_blk, | |
458 | &head_blk, 0)) == -1) { | |
459 | /* We hit the beginning of the log during our search */ | |
460 | start_blk = log_bbnum - num_scan_bblks + head_blk; | |
461 | new_blk = log_bbnum; | |
462 | ASSERT(start_blk <= INT_MAX && | |
463 | (xfs_daddr_t) log_bbnum-start_blk >= 0); | |
464 | ASSERT(head_blk <= INT_MAX); | |
465 | if ((error = xlog_find_verify_log_record(log, | |
466 | start_blk, &new_blk, | |
467 | (int)head_blk)) == -1) { | |
468 | error = XFS_ERROR(EIO); | |
469 | goto bp_err; | |
470 | } else if (error) | |
471 | goto bp_err; | |
472 | if (new_blk != log_bbnum) | |
473 | head_blk = new_blk; | |
474 | } else if (error) | |
475 | goto bp_err; | |
d321ceac NS |
476 | } |
477 | ||
a562a63b NS |
478 | xlog_put_bp(bp); |
479 | if (head_blk == log_bbnum) | |
480 | *return_head_blk = 0; | |
481 | else | |
482 | *return_head_blk = head_blk; | |
d321ceac | 483 | /* |
a562a63b NS |
484 | * When returning here, we have a good block number. Bad block |
485 | * means that during a previous crash, we didn't have a clean break | |
486 | * from cycle number N to cycle number N-1. In this case, we need | |
487 | * to find the first block with cycle number N-1. | |
d321ceac | 488 | */ |
a562a63b | 489 | return 0; |
d321ceac | 490 | |
a562a63b | 491 | bp_err: |
d321ceac NS |
492 | xlog_put_bp(bp); |
493 | ||
5000d01d SL |
494 | if (error) |
495 | xlog_warn("XFS: failed to find log head"); | |
d321ceac | 496 | return error; |
a562a63b | 497 | } |
d321ceac NS |
498 | |
499 | /* | |
500 | * Find the sync block number or the tail of the log. | |
501 | * | |
502 | * This will be the block number of the last record to have its | |
503 | * associated buffers synced to disk. Every log record header has | |
504 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy | |
4ed50f8a | 505 | * to get a sync block number. The only concern is to figure out which |
d321ceac NS |
506 | * log record header to believe. |
507 | * | |
508 | * The following algorithm uses the log record header with the largest | |
4ed50f8a | 509 | * lsn. The entire log record does not need to be valid. We only care |
d321ceac NS |
510 | * that the header is valid. |
511 | * | |
512 | * We could speed up search by using current head_blk buffer, but it is not | |
513 | * available. | |
514 | */ | |
515 | int | |
a562a63b NS |
516 | xlog_find_tail( |
517 | xlog_t *log, | |
518 | xfs_daddr_t *head_blk, | |
519 | xfs_daddr_t *tail_blk, | |
520 | int readonly) | |
d321ceac NS |
521 | { |
522 | xlog_rec_header_t *rhead; | |
523 | xlog_op_header_t *op_head; | |
a562a63b | 524 | xfs_caddr_t offset = NULL; |
d321ceac NS |
525 | xfs_buf_t *bp; |
526 | int error, i, found; | |
527 | xfs_daddr_t umount_data_blk; | |
528 | xfs_daddr_t after_umount_blk; | |
529 | xfs_lsn_t tail_lsn; | |
73bf5988 | 530 | int hblks; |
5000d01d | 531 | |
1b6a0044 | 532 | found = 0; |
d321ceac NS |
533 | |
534 | /* | |
5000d01d | 535 | * Find previous log record |
d321ceac NS |
536 | */ |
537 | if ((error = xlog_find_head(log, head_blk))) | |
538 | return error; | |
539 | ||
a562a63b | 540 | bp = xlog_get_bp(log, 1); |
d321ceac | 541 | if (!bp) |
ce029dc1 | 542 | return ENOMEM; |
d321ceac NS |
543 | if (*head_blk == 0) { /* special case */ |
544 | if ((error = xlog_bread(log, 0, 1, bp))) | |
545 | goto bread_err; | |
a562a63b NS |
546 | offset = xlog_align(log, 0, 1, bp); |
547 | if (GET_CYCLE(offset, ARCH_CONVERT) == 0) { | |
d321ceac NS |
548 | *tail_blk = 0; |
549 | /* leave all other log inited values alone */ | |
550 | goto exit; | |
551 | } | |
552 | } | |
553 | ||
554 | /* | |
555 | * Search backwards looking for log record header block | |
556 | */ | |
557 | ASSERT(*head_blk < INT_MAX); | |
1b6a0044 | 558 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { |
d321ceac NS |
559 | if ((error = xlog_bread(log, i, 1, bp))) |
560 | goto bread_err; | |
a562a63b | 561 | offset = xlog_align(log, i, 1, bp); |
1b6a0044 | 562 | if (XLOG_HEADER_MAGIC_NUM == |
a562a63b | 563 | INT_GET(*(uint *)offset, ARCH_CONVERT)) { |
d321ceac NS |
564 | found = 1; |
565 | break; | |
566 | } | |
567 | } | |
568 | /* | |
569 | * If we haven't found the log record header block, start looking | |
570 | * again from the end of the physical log. XXXmiken: There should be | |
571 | * a check here to make sure we didn't search more than N blocks in | |
572 | * the previous code. | |
573 | */ | |
574 | if (!found) { | |
1b6a0044 | 575 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { |
d321ceac NS |
576 | if ((error = xlog_bread(log, i, 1, bp))) |
577 | goto bread_err; | |
a562a63b | 578 | offset = xlog_align(log, i, 1, bp); |
1b6a0044 | 579 | if (XLOG_HEADER_MAGIC_NUM == |
a562a63b | 580 | INT_GET(*(uint*)offset, ARCH_CONVERT)) { |
d321ceac NS |
581 | found = 2; |
582 | break; | |
583 | } | |
584 | } | |
585 | } | |
586 | if (!found) { | |
587 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); | |
588 | ASSERT(0); | |
589 | return XFS_ERROR(EIO); | |
590 | } | |
591 | ||
592 | /* find blk_no of tail of log */ | |
a562a63b | 593 | rhead = (xlog_rec_header_t *)offset; |
46eca962 | 594 | *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT)); |
d321ceac NS |
595 | |
596 | /* | |
597 | * Reset log values according to the state of the log when we | |
598 | * crashed. In the case where head_blk == 0, we bump curr_cycle | |
599 | * one because the next write starts a new cycle rather than | |
600 | * continuing the cycle of the last good log record. At this | |
601 | * point we have guaranteed that all partial log records have been | |
602 | * accounted for. Therefore, we know that the last good log record | |
603 | * written was complete and ended exactly on the end boundary | |
604 | * of the physical log. | |
605 | */ | |
606 | log->l_prev_block = i; | |
607 | log->l_curr_block = (int)*head_blk; | |
608 | log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT); | |
609 | if (found == 2) | |
610 | log->l_curr_cycle++; | |
611 | log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT); | |
612 | log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT); | |
613 | log->l_grant_reserve_cycle = log->l_curr_cycle; | |
614 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | |
615 | log->l_grant_write_cycle = log->l_curr_cycle; | |
616 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | |
617 | ||
618 | /* | |
619 | * Look for unmount record. If we find it, then we know there | |
4ed50f8a | 620 | * was a clean unmount. Since 'i' could be the last block in |
d321ceac NS |
621 | * the physical log, we convert to a log block before comparing |
622 | * to the head_blk. | |
623 | * | |
624 | * Save the current tail lsn to use to pass to | |
625 | * xlog_clear_stale_blocks() below. We won't want to clear the | |
626 | * unmount record if there is one, so we pass the lsn of the | |
627 | * unmount record rather than the block after it. | |
628 | */ | |
73bf5988 SL |
629 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
630 | int h_size = INT_GET(rhead->h_size, ARCH_CONVERT); | |
631 | int h_version = INT_GET(rhead->h_version, ARCH_CONVERT); | |
1b6a0044 NS |
632 | |
633 | if ((h_version & XLOG_VERSION_2) && | |
73bf5988 SL |
634 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
635 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | |
636 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
637 | hblks++; | |
638 | } else { | |
639 | hblks = 1; | |
640 | } | |
641 | } else { | |
642 | hblks = 1; | |
643 | } | |
1b6a0044 NS |
644 | after_umount_blk = (i + hblks + (int) |
645 | BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize; | |
d321ceac | 646 | tail_lsn = log->l_tail_lsn; |
1b6a0044 NS |
647 | if (*head_blk == after_umount_blk && |
648 | INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) { | |
73bf5988 | 649 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
d321ceac NS |
650 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { |
651 | goto bread_err; | |
652 | } | |
a562a63b NS |
653 | offset = xlog_align(log, umount_data_blk, 1, bp); |
654 | op_head = (xlog_op_header_t *)offset; | |
d321ceac NS |
655 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { |
656 | /* | |
657 | * Set tail and last sync so that newly written | |
658 | * log records will point recovery to after the | |
659 | * current unmount record. | |
660 | */ | |
46eca962 NS |
661 | ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle, |
662 | after_umount_blk); | |
663 | ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, | |
664 | after_umount_blk); | |
d321ceac | 665 | *tail_blk = after_umount_blk; |
46eca962 NS |
666 | |
667 | /* | |
668 | * Note that the unmount was clean. If the unmount | |
669 | * was not clean, we need to know this to rebuild the | |
670 | * superblock counters from the perag headers if we | |
671 | * have a filesystem using non-persistent counters. | |
672 | */ | |
673 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; | |
d321ceac NS |
674 | } |
675 | } | |
676 | ||
d321ceac NS |
677 | /* |
678 | * Make sure that there are no blocks in front of the head | |
679 | * with the same cycle number as the head. This can happen | |
680 | * because we allow multiple outstanding log writes concurrently, | |
681 | * and the later writes might make it out before earlier ones. | |
682 | * | |
683 | * We use the lsn from before modifying it so that we'll never | |
684 | * overwrite the unmount record after a clean unmount. | |
685 | * | |
686 | * Do this only if we are going to recover the filesystem | |
32181a02 NS |
687 | * |
688 | * NOTE: This used to say "if (!readonly)" | |
689 | * However on Linux, we can & do recover a read-only filesystem. | |
690 | * We only skip recovery if NORECOVERY is specified on mount, | |
691 | * in which case we would not be here. | |
692 | * | |
693 | * But... if the -device- itself is readonly, just skip this. | |
694 | * We can't recover this device anyway, so it won't matter. | |
d321ceac | 695 | */ |
a562a63b | 696 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { |
d321ceac | 697 | error = xlog_clear_stale_blocks(log, tail_lsn); |
32181a02 | 698 | } |
d321ceac NS |
699 | |
700 | bread_err: | |
701 | exit: | |
702 | xlog_put_bp(bp); | |
703 | ||
5000d01d SL |
704 | if (error) |
705 | xlog_warn("XFS: failed to locate log tail"); | |
d321ceac | 706 | return error; |
a562a63b | 707 | } |
4ed50f8a | 708 | |
d321ceac NS |
709 | /* |
710 | * Is the log zeroed at all? | |
711 | * | |
712 | * The last binary search should be changed to perform an X block read | |
4ed50f8a | 713 | * once X becomes small enough. You can then search linearly through |
d321ceac NS |
714 | * the X blocks. This will cut down on the number of reads we need to do. |
715 | * | |
716 | * If the log is partially zeroed, this routine will pass back the blkno | |
717 | * of the first block with cycle number 0. It won't have a complete LR | |
718 | * preceding it. | |
719 | * | |
720 | * Return: | |
721 | * 0 => the log is completely written to | |
722 | * -1 => use *blk_no as the first block of the log | |
723 | * >0 => error has occurred | |
724 | */ | |
725 | int | |
a562a63b NS |
726 | xlog_find_zeroed( |
727 | xlog_t *log, | |
728 | xfs_daddr_t *blk_no) | |
d321ceac NS |
729 | { |
730 | xfs_buf_t *bp; | |
a562a63b | 731 | xfs_caddr_t offset; |
4ed50f8a | 732 | uint first_cycle, last_cycle; |
d321ceac | 733 | xfs_daddr_t new_blk, last_blk, start_blk; |
4ed50f8a RC |
734 | xfs_daddr_t num_scan_bblks; |
735 | int error, log_bbnum = log->l_logBBsize; | |
d321ceac | 736 | |
d321ceac | 737 | /* check totally zeroed log */ |
a562a63b | 738 | bp = xlog_get_bp(log, 1); |
d321ceac | 739 | if (!bp) |
ce029dc1 | 740 | return ENOMEM; |
d321ceac NS |
741 | if ((error = xlog_bread(log, 0, 1, bp))) |
742 | goto bp_err; | |
a562a63b NS |
743 | offset = xlog_align(log, 0, 1, bp); |
744 | first_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
d321ceac NS |
745 | if (first_cycle == 0) { /* completely zeroed log */ |
746 | *blk_no = 0; | |
747 | xlog_put_bp(bp); | |
748 | return -1; | |
749 | } | |
750 | ||
751 | /* check partially zeroed log */ | |
752 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) | |
753 | goto bp_err; | |
a562a63b NS |
754 | offset = xlog_align(log, log_bbnum-1, 1, bp); |
755 | last_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
d321ceac NS |
756 | if (last_cycle != 0) { /* log completely written to */ |
757 | xlog_put_bp(bp); | |
758 | return 0; | |
759 | } else if (first_cycle != 1) { | |
760 | /* | |
761 | * If the cycle of the last block is zero, the cycle of | |
5000d01d SL |
762 | * the first block must be 1. If it's not, maybe we're |
763 | * not looking at a log... Bail out. | |
d321ceac | 764 | */ |
5000d01d | 765 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); |
d321ceac NS |
766 | return XFS_ERROR(EINVAL); |
767 | } | |
5000d01d | 768 | |
d321ceac NS |
769 | /* we have a partially zeroed log */ |
770 | last_blk = log_bbnum-1; | |
771 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) | |
772 | goto bp_err; | |
773 | ||
774 | /* | |
4ed50f8a | 775 | * Validate the answer. Because there is no way to guarantee that |
d321ceac NS |
776 | * the entire log is made up of log records which are the same size, |
777 | * we scan over the defined maximum blocks. At this point, the maximum | |
778 | * is not chosen to mean anything special. XXXmiken | |
779 | */ | |
73bf5988 | 780 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
d321ceac | 781 | ASSERT(num_scan_bblks <= INT_MAX); |
5000d01d | 782 | |
d321ceac NS |
783 | if (last_blk < num_scan_bblks) |
784 | num_scan_bblks = last_blk; | |
785 | start_blk = last_blk - num_scan_bblks; | |
5000d01d | 786 | |
d321ceac NS |
787 | /* |
788 | * We search for any instances of cycle number 0 that occur before | |
789 | * our current estimate of the head. What we're trying to detect is | |
4ed50f8a RC |
790 | * 1 ... | 0 | 1 | 0... |
791 | * ^ binary search ends here | |
d321ceac | 792 | */ |
ce029dc1 ES |
793 | if ((error = xlog_find_verify_cycle(log, start_blk, |
794 | (int)num_scan_bblks, 0, &new_blk))) | |
606d804d | 795 | goto bp_err; |
ce029dc1 ES |
796 | if (new_blk != -1) |
797 | last_blk = new_blk; | |
d321ceac NS |
798 | |
799 | /* | |
800 | * Potentially backup over partial log record write. We don't need | |
801 | * to search the end of the log because we know it is zero. | |
802 | */ | |
5000d01d | 803 | if ((error = xlog_find_verify_log_record(log, start_blk, |
79c48ada ES |
804 | &last_blk, 0)) == -1) { |
805 | error = XFS_ERROR(EIO); | |
806 | goto bp_err; | |
807 | } else if (error) | |
d321ceac NS |
808 | goto bp_err; |
809 | ||
810 | *blk_no = last_blk; | |
811 | bp_err: | |
812 | xlog_put_bp(bp); | |
813 | if (error) | |
814 | return error; | |
815 | return -1; | |
a562a63b | 816 | } |
d321ceac | 817 | |
d321ceac | 818 | STATIC void |
a562a63b NS |
819 | xlog_unpack_data( |
820 | xlog_rec_header_t *rhead, | |
821 | xfs_caddr_t dp, | |
822 | xlog_t *log) | |
d321ceac | 823 | { |
a562a63b NS |
824 | int i, j, k; |
825 | xlog_in_core_2_t *xhdr; | |
826 | ||
827 | for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && | |
73bf5988 | 828 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { |
5ce1d1f7 | 829 | *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; |
d321ceac NS |
830 | dp += BBSIZE; |
831 | } | |
73bf5988 SL |
832 | |
833 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { | |
a562a63b | 834 | xhdr = (xlog_in_core_2_t *)rhead; |
73bf5988 SL |
835 | for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { |
836 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | |
837 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | |
838 | *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; | |
839 | dp += BBSIZE; | |
840 | } | |
841 | } | |
842 | ||
a562a63b NS |
843 | xlog_unpack_data_checksum(rhead, dp, log); |
844 | } | |
d321ceac | 845 | |
d321ceac | 846 | STATIC xlog_recover_t * |
a562a63b NS |
847 | xlog_recover_find_tid( |
848 | xlog_recover_t *q, | |
849 | xlog_tid_t tid) | |
d321ceac | 850 | { |
a562a63b | 851 | xlog_recover_t *p = q; |
d321ceac NS |
852 | |
853 | while (p != NULL) { | |
854 | if (p->r_log_tid == tid) | |
855 | break; | |
856 | p = p->r_next; | |
857 | } | |
858 | return p; | |
a562a63b | 859 | } |
4ed50f8a | 860 | |
d321ceac | 861 | STATIC void |
a562a63b NS |
862 | xlog_recover_put_hashq( |
863 | xlog_recover_t **q, | |
864 | xlog_recover_t *trans) | |
d321ceac NS |
865 | { |
866 | trans->r_next = *q; | |
867 | *q = trans; | |
a562a63b | 868 | } |
4ed50f8a | 869 | |
d321ceac | 870 | STATIC void |
a562a63b NS |
871 | xlog_recover_new_tid( |
872 | xlog_recover_t **q, | |
873 | xlog_tid_t tid, | |
874 | xfs_lsn_t lsn) | |
d321ceac | 875 | { |
a562a63b | 876 | xlog_recover_t *trans; |
d321ceac | 877 | |
a562a63b | 878 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); |
d321ceac NS |
879 | trans->r_log_tid = tid; |
880 | trans->r_lsn = lsn; | |
881 | xlog_recover_put_hashq(q, trans); | |
a562a63b | 882 | } |
d321ceac NS |
883 | |
884 | STATIC int | |
a562a63b NS |
885 | xlog_recover_unlink_tid( |
886 | xlog_recover_t **q, | |
887 | xlog_recover_t *trans) | |
d321ceac | 888 | { |
a562a63b NS |
889 | xlog_recover_t *tp; |
890 | int found = 0; | |
d321ceac NS |
891 | |
892 | ASSERT(trans != 0); | |
893 | if (trans == *q) { | |
894 | *q = (*q)->r_next; | |
895 | } else { | |
896 | tp = *q; | |
897 | while (tp != 0) { | |
898 | if (tp->r_next == trans) { | |
899 | found = 1; | |
900 | break; | |
901 | } | |
902 | tp = tp->r_next; | |
903 | } | |
904 | if (!found) { | |
905 | xlog_warn( | |
906 | "XFS: xlog_recover_unlink_tid: trans not found"); | |
907 | ASSERT(0); | |
908 | return XFS_ERROR(EIO); | |
909 | } | |
910 | tp->r_next = tp->r_next->r_next; | |
911 | } | |
912 | return 0; | |
a562a63b | 913 | } |
d321ceac NS |
914 | |
915 | /* | |
916 | * Free up any resources allocated by the transaction | |
917 | * | |
918 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | |
919 | */ | |
920 | STATIC void | |
a562a63b NS |
921 | xlog_recover_free_trans( |
922 | xlog_recover_t *trans) | |
d321ceac | 923 | { |
a562a63b NS |
924 | xlog_recover_item_t *first_item, *item, *free_item; |
925 | int i; | |
d321ceac NS |
926 | |
927 | item = first_item = trans->r_itemq; | |
928 | do { | |
929 | free_item = item; | |
930 | item = item->ri_next; | |
931 | /* Free the regions in the item. */ | |
932 | for (i = 0; i < free_item->ri_cnt; i++) { | |
933 | kmem_free(free_item->ri_buf[i].i_addr, | |
934 | free_item->ri_buf[i].i_len); | |
935 | } | |
936 | /* Free the item itself */ | |
937 | kmem_free(free_item->ri_buf, | |
938 | (free_item->ri_total * sizeof(xfs_log_iovec_t))); | |
939 | kmem_free(free_item, sizeof(xlog_recover_item_t)); | |
940 | } while (first_item != item); | |
941 | /* Free the transaction recover structure */ | |
942 | kmem_free(trans, sizeof(xlog_recover_t)); | |
a562a63b | 943 | } |
d321ceac NS |
944 | |
945 | STATIC int | |
a562a63b NS |
946 | xlog_recover_commit_trans( |
947 | xlog_t *log, | |
948 | xlog_recover_t **q, | |
949 | xlog_recover_t *trans, | |
950 | int pass) | |
d321ceac | 951 | { |
a562a63b | 952 | int error; |
d321ceac NS |
953 | |
954 | if ((error = xlog_recover_unlink_tid(q, trans))) | |
955 | return error; | |
956 | if ((error = xlog_recover_do_trans(log, trans, pass))) | |
957 | return error; | |
958 | xlog_recover_free_trans(trans); /* no error */ | |
959 | return 0; | |
a562a63b | 960 | } |
d321ceac NS |
961 | |
962 | STATIC void | |
a562a63b NS |
963 | xlog_recover_insert_item_backq( |
964 | xlog_recover_item_t **q, | |
965 | xlog_recover_item_t *item) | |
d321ceac NS |
966 | { |
967 | if (*q == 0) { | |
968 | item->ri_prev = item->ri_next = item; | |
969 | *q = item; | |
970 | } else { | |
971 | item->ri_next = *q; | |
972 | item->ri_prev = (*q)->ri_prev; | |
973 | (*q)->ri_prev = item; | |
974 | item->ri_prev->ri_next = item; | |
975 | } | |
a562a63b | 976 | } |
d321ceac NS |
977 | |
978 | STATIC void | |
a562a63b NS |
979 | xlog_recover_add_item( |
980 | xlog_recover_item_t **itemq) | |
d321ceac | 981 | { |
a562a63b | 982 | xlog_recover_item_t *item; |
d321ceac | 983 | |
2b288ccf | 984 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); |
d321ceac | 985 | xlog_recover_insert_item_backq(itemq, item); |
a562a63b | 986 | } |
d321ceac | 987 | |
a562a63b NS |
988 | STATIC int |
989 | xlog_recover_add_to_cont_trans( | |
990 | xlog_recover_t *trans, | |
991 | xfs_caddr_t dp, | |
992 | int len) | |
993 | { | |
994 | xlog_recover_item_t *item; | |
995 | xfs_caddr_t ptr, old_ptr; | |
996 | int old_len; | |
997 | ||
998 | item = trans->r_itemq; | |
999 | if (item == 0) { | |
1000 | /* finish copying rest of trans header */ | |
1001 | xlog_recover_add_item(&trans->r_itemq); | |
1002 | ptr = (xfs_caddr_t) &trans->r_theader + | |
1003 | sizeof(xfs_trans_header_t) - len; | |
1004 | memcpy(ptr, dp, len); /* d, s, l */ | |
1005 | return 0; | |
1006 | } | |
1007 | item = item->ri_prev; | |
1008 | ||
1009 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | |
1010 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | |
1011 | ||
6239071d | 1012 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); |
a562a63b NS |
1013 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ |
1014 | item->ri_buf[item->ri_cnt-1].i_len += len; | |
1015 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | |
1016 | return 0; | |
1017 | } | |
1018 | ||
1019 | /* | |
1020 | * The next region to add is the start of a new region. It could be | |
d321ceac NS |
1021 | * a whole region or it could be the first part of a new region. Because |
1022 | * of this, the assumption here is that the type and size fields of all | |
1023 | * format structures fit into the first 32 bits of the structure. | |
1024 | * | |
1025 | * This works because all regions must be 32 bit aligned. Therefore, we | |
1026 | * either have both fields or we have neither field. In the case we have | |
1027 | * neither field, the data part of the region is zero length. We only have | |
1028 | * a log_op_header and can throw away the header since a new one will appear | |
1029 | * later. If we have at least 4 bytes, then we can determine how many regions | |
1030 | * will appear in the current log item. | |
1031 | */ | |
1032 | STATIC int | |
a562a63b NS |
1033 | xlog_recover_add_to_trans( |
1034 | xlog_recover_t *trans, | |
1035 | xfs_caddr_t dp, | |
1036 | int len) | |
d321ceac | 1037 | { |
a562a63b NS |
1038 | xfs_inode_log_format_t *in_f; /* any will do */ |
1039 | xlog_recover_item_t *item; | |
1040 | xfs_caddr_t ptr; | |
d321ceac NS |
1041 | |
1042 | if (!len) | |
1043 | return 0; | |
d321ceac NS |
1044 | item = trans->r_itemq; |
1045 | if (item == 0) { | |
1046 | ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); | |
1047 | if (len == sizeof(xfs_trans_header_t)) | |
1048 | xlog_recover_add_item(&trans->r_itemq); | |
32181a02 | 1049 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ |
d321ceac NS |
1050 | return 0; |
1051 | } | |
a562a63b NS |
1052 | |
1053 | ptr = kmem_alloc(len, KM_SLEEP); | |
1054 | memcpy(ptr, dp, len); | |
1055 | in_f = (xfs_inode_log_format_t *)ptr; | |
1056 | ||
d321ceac NS |
1057 | if (item->ri_prev->ri_total != 0 && |
1058 | item->ri_prev->ri_total == item->ri_prev->ri_cnt) { | |
1059 | xlog_recover_add_item(&trans->r_itemq); | |
1060 | } | |
1061 | item = trans->r_itemq; | |
1062 | item = item->ri_prev; | |
1063 | ||
1064 | if (item->ri_total == 0) { /* first region to be added */ | |
1065 | item->ri_total = in_f->ilf_size; | |
1066 | ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); | |
1067 | item->ri_buf = kmem_zalloc((item->ri_total * | |
2b288ccf | 1068 | sizeof(xfs_log_iovec_t)), KM_SLEEP); |
d321ceac NS |
1069 | } |
1070 | ASSERT(item->ri_total > item->ri_cnt); | |
1071 | /* Description region is ri_buf[0] */ | |
1072 | item->ri_buf[item->ri_cnt].i_addr = ptr; | |
1073 | item->ri_buf[item->ri_cnt].i_len = len; | |
1074 | item->ri_cnt++; | |
1075 | return 0; | |
a562a63b | 1076 | } |
d321ceac NS |
1077 | |
1078 | STATIC int | |
a562a63b NS |
1079 | xlog_recover_unmount_trans( |
1080 | xlog_recover_t *trans) | |
d321ceac NS |
1081 | { |
1082 | /* Do nothing now */ | |
1083 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); | |
a562a63b NS |
1084 | return 0; |
1085 | } | |
d321ceac | 1086 | |
a562a63b NS |
1087 | /* |
1088 | * There are two valid states of the r_state field. 0 indicates that the | |
1089 | * transaction structure is in a normal state. We have either seen the | |
1090 | * start of the transaction or the last operation we added was not a partial | |
1091 | * operation. If the last operation we added to the transaction was a | |
1092 | * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. | |
1093 | * | |
1094 | * NOTE: skip LRs with 0 data length. | |
1095 | */ | |
d321ceac | 1096 | STATIC int |
a562a63b NS |
1097 | xlog_recover_process_data( |
1098 | xlog_t *log, | |
1099 | xlog_recover_t *rhash[], | |
1100 | xlog_rec_header_t *rhead, | |
1101 | xfs_caddr_t dp, | |
1102 | int pass) | |
d321ceac | 1103 | { |
a562a63b NS |
1104 | xfs_caddr_t lp; |
1105 | int num_logops; | |
1106 | xlog_op_header_t *ohead; | |
1107 | xlog_recover_t *trans; | |
1108 | xlog_tid_t tid; | |
1109 | int error; | |
1110 | unsigned long hash; | |
1111 | uint flags; | |
1112 | ||
1113 | lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT); | |
1114 | num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); | |
1115 | ||
1116 | /* check the log format matches our own - else we can't recover */ | |
1117 | if (xlog_header_check_recover(log->l_mp, rhead)) | |
1118 | return (XFS_ERROR(EIO)); | |
1119 | ||
1120 | while ((dp < lp) && num_logops) { | |
1121 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); | |
1122 | ohead = (xlog_op_header_t *)dp; | |
1123 | dp += sizeof(xlog_op_header_t); | |
1124 | if (ohead->oh_clientid != XFS_TRANSACTION && | |
1125 | ohead->oh_clientid != XFS_LOG) { | |
1126 | xlog_warn( | |
1127 | "XFS: xlog_recover_process_data: bad clientid"); | |
1128 | ASSERT(0); | |
1129 | return (XFS_ERROR(EIO)); | |
d321ceac | 1130 | } |
a562a63b NS |
1131 | tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); |
1132 | hash = XLOG_RHASH(tid); | |
1133 | trans = xlog_recover_find_tid(rhash[hash], tid); | |
1134 | if (trans == NULL) { /* not found; add new tid */ | |
1135 | if (ohead->oh_flags & XLOG_START_TRANS) | |
1136 | xlog_recover_new_tid(&rhash[hash], tid, | |
1137 | INT_GET(rhead->h_lsn, ARCH_CONVERT)); | |
1138 | } else { | |
1139 | ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); | |
1140 | flags = ohead->oh_flags & ~XLOG_END_TRANS; | |
1141 | if (flags & XLOG_WAS_CONT_TRANS) | |
1142 | flags &= ~XLOG_CONTINUE_TRANS; | |
1143 | switch (flags) { | |
1144 | case XLOG_COMMIT_TRANS: | |
1145 | error = xlog_recover_commit_trans(log, | |
1146 | &rhash[hash], trans, pass); | |
1147 | break; | |
1148 | case XLOG_UNMOUNT_TRANS: | |
1149 | error = xlog_recover_unmount_trans(trans); | |
1150 | break; | |
1151 | case XLOG_WAS_CONT_TRANS: | |
1152 | error = xlog_recover_add_to_cont_trans(trans, | |
1153 | dp, INT_GET(ohead->oh_len, | |
1154 | ARCH_CONVERT)); | |
1155 | break; | |
1156 | case XLOG_START_TRANS: | |
1157 | xlog_warn( | |
1158 | "XFS: xlog_recover_process_data: bad transaction"); | |
1159 | ASSERT(0); | |
1160 | error = XFS_ERROR(EIO); | |
1161 | break; | |
1162 | case 0: | |
1163 | case XLOG_CONTINUE_TRANS: | |
1164 | error = xlog_recover_add_to_trans(trans, | |
1165 | dp, INT_GET(ohead->oh_len, | |
1166 | ARCH_CONVERT)); | |
1167 | break; | |
1168 | default: | |
1169 | xlog_warn( | |
1170 | "XFS: xlog_recover_process_data: bad flag"); | |
1171 | ASSERT(0); | |
1172 | error = XFS_ERROR(EIO); | |
1173 | break; | |
1174 | } | |
1175 | if (error) | |
1176 | return error; | |
d321ceac | 1177 | } |
a562a63b NS |
1178 | dp += INT_GET(ohead->oh_len, ARCH_CONVERT); |
1179 | num_logops--; | |
1180 | } | |
1181 | return 0; | |
1182 | } | |
d321ceac | 1183 | |
72c5917e NS |
1184 | STATIC int |
1185 | xlog_valid_rec_header( | |
1186 | xlog_t *log, | |
1187 | xlog_rec_header_t *rhead, | |
1188 | xfs_daddr_t blkno) | |
1189 | { | |
b0e364f6 | 1190 | int hlen; |
72c5917e NS |
1191 | |
1192 | if (unlikely( | |
1193 | (INT_GET(rhead->h_magicno, ARCH_CONVERT) != | |
1194 | XLOG_HEADER_MAGIC_NUM))) { | |
1195 | XFS_ERROR_REPORT("xlog_valid_rec_header(1)", | |
1196 | XFS_ERRLEVEL_LOW, log->l_mp); | |
1197 | return XFS_ERROR(EFSCORRUPTED); | |
1198 | } | |
1199 | if (unlikely( | |
46eca962 | 1200 | (!rhead->h_version || |
72c5917e NS |
1201 | (INT_GET(rhead->h_version, ARCH_CONVERT) & |
1202 | (~XLOG_VERSION_OKBITS)) != 0))) { | |
1203 | xlog_warn("XFS: %s: unrecognised log version (%d).", | |
1204 | __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT)); | |
1205 | return XFS_ERROR(EIO); | |
1206 | } | |
1207 | ||
1208 | /* LR body must have data or it wouldn't have been written */ | |
b0e364f6 NS |
1209 | hlen = INT_GET(rhead->h_len, ARCH_CONVERT); |
1210 | if (unlikely( hlen <= 0 || hlen > INT_MAX )) { | |
72c5917e NS |
1211 | XFS_ERROR_REPORT("xlog_valid_rec_header(2)", |
1212 | XFS_ERRLEVEL_LOW, log->l_mp); | |
1213 | return XFS_ERROR(EFSCORRUPTED); | |
1214 | } | |
1215 | if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { | |
1216 | XFS_ERROR_REPORT("xlog_valid_rec_header(3)", | |
1217 | XFS_ERRLEVEL_LOW, log->l_mp); | |
1218 | return XFS_ERROR(EFSCORRUPTED); | |
1219 | } | |
1220 | return 0; | |
1221 | } | |
1222 | ||
d321ceac NS |
1223 | /* |
1224 | * Read the log from tail to head and process the log records found. | |
1225 | * Handle the two cases where the tail and head are in the same cycle | |
1226 | * and where the active portion of the log wraps around the end of | |
4ed50f8a | 1227 | * the physical log separately. The pass parameter is passed through |
d321ceac NS |
1228 | * to the routines called to process the data and is not looked at |
1229 | * here. | |
1230 | */ | |
1231 | int | |
a562a63b NS |
1232 | xlog_do_recovery_pass( |
1233 | xlog_t *log, | |
1234 | xfs_daddr_t head_blk, | |
1235 | xfs_daddr_t tail_blk, | |
1236 | int pass) | |
d321ceac | 1237 | { |
a562a63b NS |
1238 | xlog_rec_header_t *rhead; |
1239 | xfs_daddr_t blk_no; | |
1240 | xfs_caddr_t bufaddr, offset; | |
1241 | xfs_buf_t *hbp, *dbp; | |
1242 | int error = 0, h_size; | |
1243 | int bblks, split_bblks; | |
1244 | int hblks, split_hblks, wrapped_hblks; | |
1245 | xlog_recover_t *rhash[XLOG_RHASH_SIZE]; | |
1246 | ||
72c5917e NS |
1247 | ASSERT(head_blk != tail_blk); |
1248 | ||
73bf5988 | 1249 | /* |
a562a63b NS |
1250 | * Read the header of the tail block and get the iclog buffer size from |
1251 | * h_size. Use this to tell how many sectors make up the log header. | |
73bf5988 | 1252 | */ |
a562a63b NS |
1253 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
1254 | /* | |
1255 | * When using variable length iclogs, read first sector of | |
1256 | * iclog header and extract the header size from it. Get a | |
1257 | * new hbp that is the correct size. | |
1258 | */ | |
1259 | hbp = xlog_get_bp(log, 1); | |
1260 | if (!hbp) | |
1261 | return ENOMEM; | |
1262 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) | |
1263 | goto bread_err1; | |
1264 | offset = xlog_align(log, tail_blk, 1, hbp); | |
1265 | rhead = (xlog_rec_header_t *)offset; | |
72c5917e NS |
1266 | error = xlog_valid_rec_header(log, rhead, tail_blk); |
1267 | if (error) | |
a562a63b | 1268 | goto bread_err1; |
a562a63b | 1269 | h_size = INT_GET(rhead->h_size, ARCH_CONVERT); |
a562a63b NS |
1270 | if ((INT_GET(rhead->h_version, ARCH_CONVERT) |
1271 | & XLOG_VERSION_2) && | |
1272 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | |
1273 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | |
1274 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
1275 | hblks++; | |
1276 | xlog_put_bp(hbp); | |
1277 | hbp = xlog_get_bp(log, hblks); | |
1278 | } else { | |
1279 | hblks = 1; | |
1280 | } | |
73bf5988 | 1281 | } else { |
a562a63b NS |
1282 | ASSERT(log->l_sectbb_log == 0); |
1283 | hblks = 1; | |
1284 | hbp = xlog_get_bp(log, 1); | |
1285 | h_size = XLOG_BIG_RECORD_BSIZE; | |
73bf5988 | 1286 | } |
a562a63b NS |
1287 | |
1288 | if (!hbp) | |
1289 | return ENOMEM; | |
1290 | dbp = xlog_get_bp(log, BTOBB(h_size)); | |
1291 | if (!dbp) { | |
1292 | xlog_put_bp(hbp); | |
1293 | return ENOMEM; | |
d321ceac | 1294 | } |
a562a63b NS |
1295 | |
1296 | memset(rhash, 0, sizeof(rhash)); | |
1297 | if (tail_blk <= head_blk) { | |
1298 | for (blk_no = tail_blk; blk_no < head_blk; ) { | |
1299 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | |
1300 | goto bread_err2; | |
1301 | offset = xlog_align(log, blk_no, hblks, hbp); | |
1302 | rhead = (xlog_rec_header_t *)offset; | |
72c5917e NS |
1303 | error = xlog_valid_rec_header(log, rhead, blk_no); |
1304 | if (error) | |
a562a63b | 1305 | goto bread_err2; |
a562a63b | 1306 | |
a562a63b NS |
1307 | /* blocks in data section */ |
1308 | bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); | |
72c5917e NS |
1309 | error = xlog_bread(log, blk_no + hblks, bblks, dbp); |
1310 | if (error) | |
1311 | goto bread_err2; | |
1312 | offset = xlog_align(log, blk_no + hblks, bblks, dbp); | |
1313 | xlog_unpack_data(rhead, offset, log); | |
1314 | if ((error = xlog_recover_process_data(log, | |
a562a63b | 1315 | rhash, rhead, offset, pass))) |
72c5917e NS |
1316 | goto bread_err2; |
1317 | blk_no += bblks + hblks; | |
73bf5988 | 1318 | } |
a562a63b NS |
1319 | } else { |
1320 | /* | |
1321 | * Perform recovery around the end of the physical log. | |
1322 | * When the head is not on the same cycle number as the tail, | |
1323 | * we can't do a sequential recovery as above. | |
1324 | */ | |
1325 | blk_no = tail_blk; | |
1326 | while (blk_no < log->l_logBBsize) { | |
1327 | /* | |
1328 | * Check for header wrapping around physical end-of-log | |
1329 | */ | |
72c5917e NS |
1330 | offset = NULL; |
1331 | split_hblks = 0; | |
a562a63b | 1332 | wrapped_hblks = 0; |
72c5917e | 1333 | if (blk_no + hblks <= log->l_logBBsize) { |
a562a63b | 1334 | /* Read header in one read */ |
72c5917e NS |
1335 | error = xlog_bread(log, blk_no, hblks, hbp); |
1336 | if (error) | |
a562a63b NS |
1337 | goto bread_err2; |
1338 | offset = xlog_align(log, blk_no, hblks, hbp); | |
1339 | } else { | |
1340 | /* This LR is split across physical log end */ | |
a562a63b NS |
1341 | if (blk_no != log->l_logBBsize) { |
1342 | /* some data before physical log end */ | |
1343 | ASSERT(blk_no <= INT_MAX); | |
1344 | split_hblks = log->l_logBBsize - (int)blk_no; | |
1345 | ASSERT(split_hblks > 0); | |
1346 | if ((error = xlog_bread(log, blk_no, | |
1347 | split_hblks, hbp))) | |
1348 | goto bread_err2; | |
1349 | offset = xlog_align(log, blk_no, | |
1350 | split_hblks, hbp); | |
1351 | } | |
1352 | /* | |
1353 | * Note: this black magic still works with | |
1354 | * large sector sizes (non-512) only because: | |
1355 | * - we increased the buffer size originally | |
1356 | * by 1 sector giving us enough extra space | |
1357 | * for the second read; | |
1358 | * - the log start is guaranteed to be sector | |
1359 | * aligned; | |
1360 | * - we read the log end (LR header start) | |
1361 | * _first_, then the log start (LR header end) | |
1362 | * - order is important. | |
1363 | */ | |
1364 | bufaddr = XFS_BUF_PTR(hbp); | |
1365 | XFS_BUF_SET_PTR(hbp, | |
1366 | bufaddr + BBTOB(split_hblks), | |
1367 | BBTOB(hblks - split_hblks)); | |
1368 | wrapped_hblks = hblks - split_hblks; | |
72c5917e NS |
1369 | error = xlog_bread(log, 0, wrapped_hblks, hbp); |
1370 | if (error) | |
a562a63b | 1371 | goto bread_err2; |
b0e364f6 | 1372 | XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks)); |
a562a63b NS |
1373 | if (!offset) |
1374 | offset = xlog_align(log, 0, | |
1375 | wrapped_hblks, hbp); | |
1376 | } | |
1377 | rhead = (xlog_rec_header_t *)offset; | |
72c5917e NS |
1378 | error = xlog_valid_rec_header(log, rhead, |
1379 | split_hblks ? blk_no : 0); | |
1380 | if (error) | |
a562a63b | 1381 | goto bread_err2; |
72c5917e NS |
1382 | |
1383 | bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); | |
1384 | blk_no += hblks; | |
a562a63b NS |
1385 | |
1386 | /* Read in data for log record */ | |
72c5917e NS |
1387 | if (blk_no + bblks <= log->l_logBBsize) { |
1388 | error = xlog_bread(log, blk_no, bblks, dbp); | |
1389 | if (error) | |
a562a63b NS |
1390 | goto bread_err2; |
1391 | offset = xlog_align(log, blk_no, bblks, dbp); | |
1392 | } else { | |
1393 | /* This log record is split across the | |
1394 | * physical end of log */ | |
1395 | offset = NULL; | |
1396 | split_bblks = 0; | |
1397 | if (blk_no != log->l_logBBsize) { | |
1398 | /* some data is before the physical | |
1399 | * end of log */ | |
1400 | ASSERT(!wrapped_hblks); | |
1401 | ASSERT(blk_no <= INT_MAX); | |
1402 | split_bblks = | |
1403 | log->l_logBBsize - (int)blk_no; | |
1404 | ASSERT(split_bblks > 0); | |
1405 | if ((error = xlog_bread(log, blk_no, | |
1406 | split_bblks, dbp))) | |
1407 | goto bread_err2; | |
1408 | offset = xlog_align(log, blk_no, | |
1409 | split_bblks, dbp); | |
1410 | } | |
1411 | /* | |
1412 | * Note: this black magic still works with | |
1413 | * large sector sizes (non-512) only because: | |
1414 | * - we increased the buffer size originally | |
1415 | * by 1 sector giving us enough extra space | |
1416 | * for the second read; | |
1417 | * - the log start is guaranteed to be sector | |
1418 | * aligned; | |
1419 | * - we read the log end (LR header start) | |
1420 | * _first_, then the log start (LR header end) | |
1421 | * - order is important. | |
1422 | */ | |
1423 | bufaddr = XFS_BUF_PTR(dbp); | |
1424 | XFS_BUF_SET_PTR(dbp, | |
1425 | bufaddr + BBTOB(split_bblks), | |
1426 | BBTOB(bblks - split_bblks)); | |
1427 | if ((error = xlog_bread(log, wrapped_hblks, | |
1428 | bblks - split_bblks, dbp))) | |
1429 | goto bread_err2; | |
b0e364f6 | 1430 | XFS_BUF_SET_PTR(dbp, bufaddr, h_size); |
a562a63b NS |
1431 | if (!offset) |
1432 | offset = xlog_align(log, wrapped_hblks, | |
1433 | bblks - split_bblks, dbp); | |
1434 | } | |
1435 | xlog_unpack_data(rhead, offset, log); | |
1436 | if ((error = xlog_recover_process_data(log, rhash, | |
72c5917e | 1437 | rhead, offset, pass))) |
a562a63b NS |
1438 | goto bread_err2; |
1439 | blk_no += bblks; | |
d321ceac | 1440 | } |
d321ceac | 1441 | |
a562a63b NS |
1442 | ASSERT(blk_no >= log->l_logBBsize); |
1443 | blk_no -= log->l_logBBsize; | |
1444 | ||
1445 | /* read first part of physical log */ | |
1446 | while (blk_no < head_blk) { | |
1447 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | |
1448 | goto bread_err2; | |
1449 | offset = xlog_align(log, blk_no, hblks, hbp); | |
1450 | rhead = (xlog_rec_header_t *)offset; | |
72c5917e NS |
1451 | error = xlog_valid_rec_header(log, rhead, blk_no); |
1452 | if (error) | |
1453 | goto bread_err2; | |
a562a63b | 1454 | bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); |
a562a63b NS |
1455 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) |
1456 | goto bread_err2; | |
1457 | offset = xlog_align(log, blk_no+hblks, bblks, dbp); | |
1458 | xlog_unpack_data(rhead, offset, log); | |
1459 | if ((error = xlog_recover_process_data(log, rhash, | |
72c5917e | 1460 | rhead, offset, pass))) |
a562a63b | 1461 | goto bread_err2; |
72c5917e | 1462 | blk_no += bblks + hblks; |
a562a63b | 1463 | } |
5000d01d | 1464 | } |
d321ceac | 1465 | |
a562a63b NS |
1466 | bread_err2: |
1467 | xlog_put_bp(dbp); | |
1468 | bread_err1: | |
1469 | xlog_put_bp(hbp); | |
1470 | return error; | |
d321ceac | 1471 | } |