]>
Commit | Line | Data |
---|---|---|
d321ceac | 1 | /* |
0d3e0b37 | 2 | * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. |
d321ceac NS |
3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | |
5 | * under the terms of version 2 of the GNU General Public License as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it would be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
11 | * | |
12 | * Further, this software is distributed without any warranty that it is | |
13 | * free of the rightful claim of any third person regarding infringement | |
14 | * or the like. Any license provided herein, whether implied or | |
15 | * otherwise, applies only to this software file. Patent licenses, if | |
16 | * any, provided herein do not apply to combinations of this program with | |
17 | * other software, or any other product whatsoever. | |
18 | * | |
19 | * You should have received a copy of the GNU General Public License along | |
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | |
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | |
22 | * | |
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | |
24 | * Mountain View, CA 94043, or: | |
25 | * | |
26 | * http://www.sgi.com | |
27 | * | |
28 | * For further information regarding this notice, see: | |
29 | * | |
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | |
31 | */ | |
32 | ||
33 | #include <libxlog.h> | |
34 | ||
35 | /* | |
36 | * This routine finds (to an approximation) the first block in the physical | |
37 | * log which contains the given cycle. It uses a binary search algorithm. | |
38 | * Note that the algorithm can not be perfect because the disk will not | |
39 | * necessarily be perfect. | |
40 | */ | |
41 | int | |
42 | xlog_find_cycle_start(xlog_t *log, | |
43 | xfs_buf_t *bp, | |
44 | xfs_daddr_t first_blk, | |
45 | xfs_daddr_t *last_blk, | |
46 | uint cycle) | |
47 | { | |
48 | xfs_daddr_t mid_blk; | |
49 | uint mid_cycle; | |
50 | int error; | |
51 | ||
52 | mid_blk = BLK_AVG(first_blk, *last_blk); | |
53 | while (mid_blk != first_blk && mid_blk != *last_blk) { | |
54 | if ((error = xlog_bread(log, mid_blk, 1, bp))) | |
55 | return error; | |
56 | mid_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
57 | if (mid_cycle == cycle) { | |
58 | *last_blk = mid_blk; | |
59 | /* last_half_cycle == mid_cycle */ | |
60 | } else { | |
61 | first_blk = mid_blk; | |
62 | /* first_half_cycle == mid_cycle */ | |
63 | } | |
64 | mid_blk = BLK_AVG(first_blk, *last_blk); | |
65 | } | |
66 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || | |
67 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); | |
68 | ||
69 | return 0; | |
70 | } /* xlog_find_cycle_start */ | |
71 | ||
72 | ||
73 | /* | |
74 | * Check that the range of blocks does not contain the cycle number | |
75 | * given. The scan needs to occur from front to back and the ptr into the | |
76 | * region must be updated since a later routine will need to perform another | |
77 | * test. If the region is completely good, we end up returning the same | |
78 | * last block number. | |
79 | * | |
ce029dc1 | 80 | * Set blkno to -1 if we encounter no errors. This is an invalid block number |
d321ceac NS |
81 | * since we don't ever expect logs to get this large. |
82 | */ | |
83 | ||
ce029dc1 | 84 | STATIC int |
d321ceac NS |
85 | xlog_find_verify_cycle( xlog_t *log, |
86 | xfs_daddr_t start_blk, | |
87 | int nbblks, | |
ce029dc1 ES |
88 | uint stop_on_cycle_no, |
89 | xfs_daddr_t *new_blk) | |
d321ceac | 90 | { |
ce029dc1 | 91 | xfs_daddr_t i, j; |
d321ceac NS |
92 | uint cycle; |
93 | xfs_buf_t *bp; | |
94 | char *buf = NULL; | |
95 | int error = 0; | |
96 | xfs_daddr_t bufblks = nbblks; | |
97 | ||
98 | while (!(bp = xlog_get_bp(bufblks, log->l_mp))) { | |
99 | /* can't get enough memory to do everything in one big buffer */ | |
100 | bufblks >>= 1; | |
101 | if (!bufblks) | |
ce029dc1 | 102 | return ENOMEM; |
d321ceac NS |
103 | } |
104 | ||
105 | ||
106 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { | |
107 | int bcount = min(bufblks, (start_blk + nbblks - i)); | |
108 | ||
109 | if ((error = xlog_bread(log, i, bcount, bp))) | |
110 | goto out; | |
111 | ||
112 | buf = XFS_BUF_PTR(bp); | |
113 | for (j = 0; j < bcount; j++) { | |
114 | cycle = GET_CYCLE(buf, ARCH_CONVERT); | |
115 | if (cycle == stop_on_cycle_no) { | |
e56fcdce | 116 | *new_blk = i+j; |
d321ceac NS |
117 | goto out; |
118 | } | |
119 | ||
120 | buf += BBSIZE; | |
121 | } | |
122 | } | |
123 | ||
ce029dc1 | 124 | *new_blk = -1; |
d321ceac NS |
125 | |
126 | out: | |
127 | xlog_put_bp(bp); | |
128 | ||
129 | return error; | |
130 | } /* xlog_find_verify_cycle */ | |
131 | ||
132 | ||
133 | /* | |
134 | * Potentially backup over partial log record write. | |
135 | * | |
136 | * In the typical case, last_blk is the number of the block directly after | |
137 | * a good log record. Therefore, we subtract one to get the block number | |
138 | * of the last block in the given buffer. extra_bblks contains the number | |
139 | * of blocks we would have read on a previous read. This happens when the | |
140 | * last log record is split over the end of the physical log. | |
141 | * | |
142 | * extra_bblks is the number of blocks potentially verified on a previous | |
143 | * call to this routine. | |
144 | */ | |
145 | ||
146 | STATIC int | |
147 | xlog_find_verify_log_record(xlog_t *log, | |
148 | xfs_daddr_t start_blk, | |
149 | xfs_daddr_t *last_blk, | |
150 | int extra_bblks) | |
151 | { | |
152 | xfs_daddr_t i; | |
153 | xfs_buf_t *bp; | |
154 | char *buf = NULL; | |
155 | xlog_rec_header_t *head = NULL; | |
156 | int error = 0; | |
157 | int smallmem = 0; | |
158 | int num_blks = *last_blk - start_blk; | |
73bf5988 | 159 | int xhdrs; |
d321ceac NS |
160 | |
161 | ASSERT(start_blk != 0 || *last_blk != start_blk); | |
162 | ||
163 | if (!(bp = xlog_get_bp(num_blks, log->l_mp))) { | |
164 | if (!(bp = xlog_get_bp(1, log->l_mp))) | |
ce029dc1 | 165 | return ENOMEM; |
d321ceac NS |
166 | smallmem = 1; |
167 | buf = XFS_BUF_PTR(bp); | |
168 | } else { | |
169 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) | |
170 | goto out; | |
171 | buf = XFS_BUF_PTR(bp) + (num_blks - 1) * BBSIZE; | |
172 | } | |
173 | ||
174 | ||
175 | for (i=(*last_blk)-1; i>=0; i--) { | |
176 | if (i < start_blk) { | |
177 | /* legal log record not found */ | |
178 | xlog_warn("XFS: Log inconsistent (didn't find previous header)"); | |
d321ceac | 179 | ASSERT(0); |
d321ceac NS |
180 | error = XFS_ERROR(EIO); |
181 | goto out; | |
182 | } | |
183 | ||
184 | if (smallmem && (error = xlog_bread(log, i, 1, bp))) | |
185 | goto out; | |
186 | head = (xlog_rec_header_t*)buf; | |
187 | ||
188 | if (INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) | |
189 | break; | |
190 | ||
191 | if (!smallmem) | |
192 | buf -= BBSIZE; | |
193 | } | |
194 | ||
195 | /* | |
196 | * We hit the beginning of the physical log & still no header. Return | |
197 | * to caller. If caller can handle a return of -1, then this routine | |
198 | * will be called again for the end of the physical log. | |
199 | */ | |
200 | if (i == -1) { | |
201 | error = -1; | |
202 | goto out; | |
203 | } | |
204 | ||
205 | /* we have the final block of the good log (the first block | |
206 | * of the log record _before_ the head. So we check the uuid. | |
207 | */ | |
208 | ||
209 | if ((error = xlog_header_check_mount(log->l_mp, head))) | |
210 | goto out; | |
211 | ||
212 | /* | |
213 | * We may have found a log record header before we expected one. | |
214 | * last_blk will be the 1st block # with a given cycle #. We may end | |
215 | * up reading an entire log record. In this case, we don't want to | |
216 | * reset last_blk. Only when last_blk points in the middle of a log | |
217 | * record do we update last_blk. | |
218 | */ | |
73bf5988 SL |
219 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
220 | int h_size = INT_GET(head->h_size, ARCH_CONVERT); | |
221 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; | |
222 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
223 | xhdrs++; | |
224 | } else { | |
225 | xhdrs = 1; | |
226 | } | |
227 | ||
d321ceac | 228 | if (*last_blk - i + extra_bblks |
73bf5988 | 229 | != BTOBB(INT_GET(head->h_len, ARCH_CONVERT))+xhdrs) |
d321ceac NS |
230 | *last_blk = i; |
231 | ||
232 | out: | |
233 | xlog_put_bp(bp); | |
234 | ||
235 | return error; | |
236 | } /* xlog_find_verify_log_record */ | |
237 | ||
238 | /* | |
239 | * Head is defined to be the point of the log where the next log write | |
240 | * write could go. This means that incomplete LR writes at the end are | |
241 | * eliminated when calculating the head. We aren't guaranteed that previous | |
242 | * LR have complete transactions. We only know that a cycle number of | |
243 | * current cycle number -1 won't be present in the log if we start writing | |
244 | * from our current block number. | |
245 | * | |
246 | * last_blk contains the block number of the first block with a given | |
247 | * cycle number. | |
248 | * | |
249 | * Also called from xfs_log_print.c | |
250 | * | |
251 | * Return: zero if normal, non-zero if error. | |
252 | */ | |
253 | int | |
254 | xlog_find_head(xlog_t *log, | |
255 | xfs_daddr_t *return_head_blk) | |
256 | { | |
257 | xfs_buf_t *bp; | |
258 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; | |
259 | int num_scan_bblks; | |
260 | uint first_half_cycle, last_half_cycle; | |
261 | uint stop_on_cycle; | |
262 | int error, log_bbnum = log->l_logBBsize; | |
263 | ||
264 | /* Is the end of the log device zeroed? */ | |
265 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { | |
266 | *return_head_blk = first_blk; | |
267 | ||
268 | /* is the whole lot zeroed? */ | |
269 | if (!first_blk) { | |
270 | /* Linux XFS shouldn't generate totally zeroed logs - | |
271 | * mkfs etc write a dummy unmount record to a fresh | |
272 | * log so we can store the uuid in there | |
273 | */ | |
274 | xlog_warn("XFS: totally zeroed log\n"); | |
275 | } | |
276 | ||
277 | return 0; | |
278 | } else if (error) { | |
279 | xlog_warn("XFS: empty log check failed"); | |
280 | return error; | |
281 | } | |
282 | ||
283 | first_blk = 0; /* get cycle # of 1st block */ | |
284 | bp = xlog_get_bp(1,log->l_mp); | |
285 | if (!bp) | |
ce029dc1 | 286 | return ENOMEM; |
d321ceac NS |
287 | if ((error = xlog_bread(log, 0, 1, bp))) |
288 | goto bp_err; | |
289 | first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
290 | ||
291 | last_blk = head_blk = log_bbnum-1; /* get cycle # of last block */ | |
292 | if ((error = xlog_bread(log, last_blk, 1, bp))) | |
293 | goto bp_err; | |
294 | last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
295 | ASSERT(last_half_cycle != 0); | |
296 | ||
297 | /* | |
298 | * If the 1st half cycle number is equal to the last half cycle number, | |
299 | * then the entire log is stamped with the same cycle number. In this | |
300 | * case, head_blk can't be set to zero (which makes sense). The below | |
301 | * math doesn't work out properly with head_blk equal to zero. Instead, | |
302 | * we set it to log_bbnum which is an illegal block number, but this | |
303 | * value makes the math correct. If head_blk doesn't changed through | |
304 | * all the tests below, *head_blk is set to zero at the very end rather | |
305 | * than log_bbnum. In a sense, log_bbnum and zero are the same block | |
306 | * in a circular file. | |
307 | */ | |
308 | if (first_half_cycle == last_half_cycle) { | |
309 | /* | |
310 | * In this case we believe that the entire log should have cycle | |
311 | * number last_half_cycle. We need to scan backwards from the | |
312 | * end verifying that there are no holes still containing | |
313 | * last_half_cycle - 1. If we find such a hole, then the start | |
314 | * of that hole will be the new head. The simple case looks like | |
315 | * x | x ... | x - 1 | x | |
316 | * Another case that fits this picture would be | |
317 | * x | x + 1 | x ... | x | |
318 | * In this case the head really is somwhere at the end of the | |
319 | * log, as one of the latest writes at the beginning was incomplete. | |
320 | * One more case is | |
321 | * x | x + 1 | x ... | x - 1 | x | |
322 | * This is really the combination of the above two cases, and the | |
323 | * head has to end up at the start of the x-1 hole at the end of | |
324 | * the log. | |
325 | * | |
326 | * In the 256k log case, we will read from the beginning to the | |
327 | * end of the log and search for cycle numbers equal to x-1. We | |
328 | * don't worry about the x+1 blocks that we encounter, because | |
329 | * we know that they cannot be the head since the log started with | |
330 | * x. | |
331 | */ | |
332 | head_blk = log_bbnum; | |
333 | stop_on_cycle = last_half_cycle - 1; | |
334 | } else { | |
335 | /* | |
336 | * In this case we want to find the first block with cycle number | |
337 | * matching last_half_cycle. We expect the log to be some | |
338 | * variation on | |
339 | * x + 1 ... | x ... | |
340 | * The first block with cycle number x (last_half_cycle) will be | |
341 | * where the new head belongs. First we do a binary search for | |
342 | * the first occurrence of last_half_cycle. The binary search | |
343 | * may not be totally accurate, so then we scan back from there | |
344 | * looking for occurrences of last_half_cycle before us. If | |
345 | * that backwards scan wraps around the beginning of the log, | |
346 | * then we look for occurrences of last_half_cycle - 1 at the | |
347 | * end of the log. The cases we're looking for look like | |
348 | * x + 1 ... | x | x + 1 | x ... | |
349 | * ^ binary search stopped here | |
350 | * or | |
351 | * x + 1 ... | x ... | x - 1 | x | |
352 | * <---------> less than scan distance | |
353 | */ | |
354 | stop_on_cycle = last_half_cycle; | |
355 | if ((error = xlog_find_cycle_start(log, bp, first_blk, | |
356 | &head_blk, last_half_cycle))) | |
357 | goto bp_err; | |
358 | } | |
359 | ||
360 | /* | |
361 | * Now validate the answer. Scan back some number of maximum possible | |
362 | * blocks and make sure each one has the expected cycle number. The | |
363 | * maximum is determined by the total possible amount of buffering | |
364 | * in the in-core log. The following number can be made tighter if | |
365 | * we actually look at the block size of the filesystem. | |
366 | */ | |
73bf5988 | 367 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
d321ceac NS |
368 | if (head_blk >= num_scan_bblks) { |
369 | /* | |
370 | * We are guaranteed that the entire check can be performed | |
371 | * in one buffer. | |
372 | */ | |
373 | start_blk = head_blk - num_scan_bblks; | |
ce029dc1 ES |
374 | if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks, |
375 | stop_on_cycle, &new_blk))) | |
606d804d | 376 | goto bp_err; |
ce029dc1 ES |
377 | if (new_blk != -1) |
378 | head_blk = new_blk; | |
d321ceac NS |
379 | } else { /* need to read 2 parts of log */ |
380 | /* | |
381 | * We are going to scan backwards in the log in two parts. First | |
382 | * we scan the physical end of the log. In this part of the log, | |
383 | * we are looking for blocks with cycle number last_half_cycle - 1. | |
384 | * If we find one, then we know that the log starts there, as we've | |
385 | * found a hole that didn't get written in going around the end | |
386 | * of the physical log. The simple case for this is | |
387 | * x + 1 ... | x ... | x - 1 | x | |
388 | * <---------> less than scan distance | |
389 | * If all of the blocks at the end of the log have cycle number | |
390 | * last_half_cycle, then we check the blocks at the start of the | |
391 | * log looking for occurrences of last_half_cycle. If we find one, | |
392 | * then our current estimate for the location of the first | |
393 | * occurrence of last_half_cycle is wrong and we move back to the | |
394 | * hole we've found. This case looks like | |
395 | * x + 1 ... | x | x + 1 | x ... | |
396 | * ^ binary search stopped here | |
397 | * Another case we need to handle that only occurs in 256k logs is | |
398 | * x + 1 ... | x ... | x+1 | x ... | |
399 | * ^ binary search stops here | |
400 | * In a 256k log, the scan at the end of the log will see the x+1 | |
401 | * blocks. We need to skip past those since that is certainly not | |
402 | * the head of the log. By searching for last_half_cycle-1 we | |
403 | * accomplish that. | |
404 | */ | |
405 | start_blk = log_bbnum - num_scan_bblks + head_blk; | |
406 | ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks-head_blk >= 0); | |
ce029dc1 ES |
407 | if ((error = xlog_find_verify_cycle(log, start_blk, |
408 | num_scan_bblks-(int)head_blk, (stop_on_cycle - 1), | |
409 | &new_blk))) | |
410 | goto bp_err; | |
411 | if (new_blk != -1) { | |
d321ceac NS |
412 | head_blk = new_blk; |
413 | goto bad_blk; | |
414 | } | |
415 | ||
416 | /* | |
417 | * Scan beginning of log now. The last part of the physical log | |
418 | * is good. This scan needs to verify that it doesn't find the | |
419 | * last_half_cycle. | |
420 | */ | |
421 | start_blk = 0; | |
422 | ASSERT(head_blk <= INT_MAX); | |
ce029dc1 ES |
423 | if ((error = xlog_find_verify_cycle(log, start_blk, (int) head_blk, |
424 | stop_on_cycle, &new_blk))) | |
425 | goto bp_err; | |
426 | if (new_blk != -1) | |
d321ceac NS |
427 | head_blk = new_blk; |
428 | } | |
429 | ||
430 | bad_blk: | |
431 | /* | |
432 | * Now we need to make sure head_blk is not pointing to a block in | |
433 | * the middle of a log record. | |
434 | */ | |
435 | num_scan_bblks = BTOBB(XLOG_MAX_RECORD_BSIZE); | |
436 | if (head_blk >= num_scan_bblks) { | |
437 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ | |
438 | ||
439 | /* start ptr at last block ptr before head_blk */ | |
440 | if ((error = xlog_find_verify_log_record(log, | |
441 | start_blk, | |
442 | &head_blk, | |
443 | 0)) == -1) { | |
444 | error = XFS_ERROR(EIO); | |
445 | goto bp_err; | |
446 | } else if (error) | |
447 | goto bp_err; | |
448 | } else { | |
449 | start_blk = 0; | |
450 | ASSERT(head_blk <= INT_MAX); | |
451 | if ((error = xlog_find_verify_log_record(log, | |
452 | start_blk, | |
453 | &head_blk, | |
454 | 0)) == -1) { | |
455 | /* We hit the beginning of the log during our search */ | |
456 | start_blk = log_bbnum - num_scan_bblks + head_blk; | |
457 | new_blk = log_bbnum; | |
458 | ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0); | |
459 | ASSERT(head_blk <= INT_MAX); | |
460 | if ((error = xlog_find_verify_log_record(log, | |
461 | start_blk, | |
462 | &new_blk, | |
463 | (int)head_blk)) == -1) { | |
464 | error = XFS_ERROR(EIO); | |
465 | goto bp_err; | |
466 | } else if (error) | |
467 | goto bp_err; | |
468 | if (new_blk != log_bbnum) | |
469 | head_blk = new_blk; | |
470 | } else if (error) | |
471 | goto bp_err; | |
472 | } | |
473 | ||
474 | xlog_put_bp(bp); | |
475 | if (head_blk == log_bbnum) | |
476 | *return_head_blk = 0; | |
477 | else | |
478 | *return_head_blk = head_blk; | |
479 | /* | |
480 | * When returning here, we have a good block number. Bad block | |
481 | * means that during a previous crash, we didn't have a clean break | |
482 | * from cycle number N to cycle number N-1. In this case, we need | |
483 | * to find the first block with cycle number N-1. | |
484 | */ | |
485 | return 0; | |
486 | ||
487 | bp_err: | |
488 | xlog_put_bp(bp); | |
489 | ||
490 | if (error) | |
491 | xlog_warn("XFS: failed to find log head"); | |
492 | ||
493 | return error; | |
494 | } /* xlog_find_head */ | |
495 | ||
496 | /* | |
497 | * Find the sync block number or the tail of the log. | |
498 | * | |
499 | * This will be the block number of the last record to have its | |
500 | * associated buffers synced to disk. Every log record header has | |
501 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy | |
502 | * to get a sync block number. The only concern is to figure out which | |
503 | * log record header to believe. | |
504 | * | |
505 | * The following algorithm uses the log record header with the largest | |
506 | * lsn. The entire log record does not need to be valid. We only care | |
507 | * that the header is valid. | |
508 | * | |
509 | * We could speed up search by using current head_blk buffer, but it is not | |
510 | * available. | |
511 | */ | |
512 | int | |
513 | xlog_find_tail(xlog_t *log, | |
514 | xfs_daddr_t *head_blk, | |
515 | xfs_daddr_t *tail_blk, | |
516 | int readonly) | |
517 | { | |
518 | xlog_rec_header_t *rhead; | |
519 | xlog_op_header_t *op_head; | |
520 | xfs_buf_t *bp; | |
521 | int error, i, found; | |
522 | xfs_daddr_t umount_data_blk; | |
523 | xfs_daddr_t after_umount_blk; | |
524 | xfs_lsn_t tail_lsn; | |
73bf5988 | 525 | int hblks; |
d321ceac NS |
526 | |
527 | found = error = 0; | |
528 | ||
529 | /* | |
530 | * Find previous log record | |
531 | */ | |
532 | if ((error = xlog_find_head(log, head_blk))) | |
533 | return error; | |
534 | ||
535 | bp = xlog_get_bp(1,log->l_mp); | |
536 | if (!bp) | |
ce029dc1 | 537 | return ENOMEM; |
d321ceac NS |
538 | if (*head_blk == 0) { /* special case */ |
539 | if ((error = xlog_bread(log, 0, 1, bp))) | |
540 | goto bread_err; | |
541 | if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) == 0) { | |
542 | *tail_blk = 0; | |
543 | /* leave all other log inited values alone */ | |
544 | goto exit; | |
545 | } | |
546 | } | |
547 | ||
548 | /* | |
549 | * Search backwards looking for log record header block | |
550 | */ | |
551 | ASSERT(*head_blk < INT_MAX); | |
552 | for (i=(int)(*head_blk)-1; i>=0; i--) { | |
553 | if ((error = xlog_bread(log, i, 1, bp))) | |
554 | goto bread_err; | |
555 | if (INT_GET(*(uint *)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) { | |
556 | found = 1; | |
557 | break; | |
558 | } | |
559 | } | |
560 | /* | |
561 | * If we haven't found the log record header block, start looking | |
562 | * again from the end of the physical log. XXXmiken: There should be | |
563 | * a check here to make sure we didn't search more than N blocks in | |
564 | * the previous code. | |
565 | */ | |
566 | if (!found) { | |
567 | for (i=log->l_logBBsize-1; i>=(int)(*head_blk); i--) { | |
568 | if ((error = xlog_bread(log, i, 1, bp))) | |
569 | goto bread_err; | |
570 | if (INT_GET(*(uint*)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) { | |
571 | found = 2; | |
572 | break; | |
573 | } | |
574 | } | |
575 | } | |
576 | if (!found) { | |
577 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); | |
578 | ASSERT(0); | |
579 | return XFS_ERROR(EIO); | |
580 | } | |
581 | ||
582 | /* find blk_no of tail of log */ | |
583 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(bp); | |
584 | *tail_blk = BLOCK_LSN(rhead->h_tail_lsn, ARCH_CONVERT); | |
585 | ||
586 | /* | |
587 | * Reset log values according to the state of the log when we | |
588 | * crashed. In the case where head_blk == 0, we bump curr_cycle | |
589 | * one because the next write starts a new cycle rather than | |
590 | * continuing the cycle of the last good log record. At this | |
591 | * point we have guaranteed that all partial log records have been | |
592 | * accounted for. Therefore, we know that the last good log record | |
593 | * written was complete and ended exactly on the end boundary | |
594 | * of the physical log. | |
595 | */ | |
596 | log->l_prev_block = i; | |
597 | log->l_curr_block = (int)*head_blk; | |
598 | log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT); | |
599 | if (found == 2) | |
600 | log->l_curr_cycle++; | |
601 | log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT); | |
602 | log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT); | |
603 | log->l_grant_reserve_cycle = log->l_curr_cycle; | |
604 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | |
605 | log->l_grant_write_cycle = log->l_curr_cycle; | |
606 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | |
607 | ||
608 | /* | |
609 | * Look for unmount record. If we find it, then we know there | |
610 | * was a clean unmount. Since 'i' could be the last block in | |
611 | * the physical log, we convert to a log block before comparing | |
612 | * to the head_blk. | |
613 | * | |
614 | * Save the current tail lsn to use to pass to | |
615 | * xlog_clear_stale_blocks() below. We won't want to clear the | |
616 | * unmount record if there is one, so we pass the lsn of the | |
617 | * unmount record rather than the block after it. | |
618 | */ | |
73bf5988 SL |
619 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
620 | int h_size = INT_GET(rhead->h_size, ARCH_CONVERT); | |
621 | int h_version = INT_GET(rhead->h_version, ARCH_CONVERT); | |
622 | if ((h_version && XLOG_VERSION_2) && | |
623 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | |
624 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | |
625 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
626 | hblks++; | |
627 | } else { | |
628 | hblks = 1; | |
629 | } | |
630 | } else { | |
631 | hblks = 1; | |
632 | } | |
633 | after_umount_blk = (i + hblks + | |
634 | (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize; | |
d321ceac NS |
635 | tail_lsn = log->l_tail_lsn; |
636 | if (*head_blk == after_umount_blk && INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) { | |
73bf5988 | 637 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
d321ceac NS |
638 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { |
639 | goto bread_err; | |
640 | } | |
641 | op_head = (xlog_op_header_t *)XFS_BUF_PTR(bp); | |
642 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | |
643 | /* | |
644 | * Set tail and last sync so that newly written | |
645 | * log records will point recovery to after the | |
646 | * current unmount record. | |
647 | */ | |
648 | ASSIGN_ANY_LSN(log->l_tail_lsn, log->l_curr_cycle, | |
649 | after_umount_blk, ARCH_NOCONVERT); | |
650 | ASSIGN_ANY_LSN(log->l_last_sync_lsn, log->l_curr_cycle, | |
651 | after_umount_blk, ARCH_NOCONVERT); | |
652 | *tail_blk = after_umount_blk; | |
653 | } | |
654 | } | |
655 | ||
656 | #ifdef __KERNEL__ | |
657 | /* | |
658 | * Make sure that there are no blocks in front of the head | |
659 | * with the same cycle number as the head. This can happen | |
660 | * because we allow multiple outstanding log writes concurrently, | |
661 | * and the later writes might make it out before earlier ones. | |
662 | * | |
663 | * We use the lsn from before modifying it so that we'll never | |
664 | * overwrite the unmount record after a clean unmount. | |
665 | * | |
666 | * Do this only if we are going to recover the filesystem | |
667 | */ | |
668 | if (!readonly) | |
669 | error = xlog_clear_stale_blocks(log, tail_lsn); | |
670 | #endif | |
671 | ||
672 | bread_err: | |
673 | exit: | |
674 | xlog_put_bp(bp); | |
675 | ||
676 | if (error) | |
677 | xlog_warn("XFS: failed to locate log tail"); | |
678 | ||
679 | return error; | |
680 | } /* xlog_find_tail */ | |
681 | ||
d321ceac NS |
682 | /* |
683 | * Is the log zeroed at all? | |
684 | * | |
685 | * The last binary search should be changed to perform an X block read | |
686 | * once X becomes small enough. You can then search linearly through | |
687 | * the X blocks. This will cut down on the number of reads we need to do. | |
688 | * | |
689 | * If the log is partially zeroed, this routine will pass back the blkno | |
690 | * of the first block with cycle number 0. It won't have a complete LR | |
691 | * preceding it. | |
692 | * | |
693 | * Return: | |
694 | * 0 => the log is completely written to | |
695 | * -1 => use *blk_no as the first block of the log | |
696 | * >0 => error has occurred | |
697 | */ | |
698 | int | |
699 | xlog_find_zeroed(struct log *log, | |
700 | xfs_daddr_t *blk_no) | |
701 | { | |
702 | xfs_buf_t *bp; | |
703 | uint first_cycle, last_cycle; | |
704 | xfs_daddr_t new_blk, last_blk, start_blk; | |
705 | xfs_daddr_t num_scan_bblks; | |
706 | int error, log_bbnum = log->l_logBBsize; | |
707 | ||
708 | error = 0; | |
709 | /* check totally zeroed log */ | |
710 | bp = xlog_get_bp(1,log->l_mp); | |
711 | if (!bp) | |
ce029dc1 | 712 | return ENOMEM; |
d321ceac NS |
713 | if ((error = xlog_bread(log, 0, 1, bp))) |
714 | goto bp_err; | |
715 | first_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
716 | if (first_cycle == 0) { /* completely zeroed log */ | |
717 | *blk_no = 0; | |
718 | xlog_put_bp(bp); | |
719 | return -1; | |
720 | } | |
721 | ||
722 | /* check partially zeroed log */ | |
723 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) | |
724 | goto bp_err; | |
725 | last_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
726 | if (last_cycle != 0) { /* log completely written to */ | |
727 | xlog_put_bp(bp); | |
728 | return 0; | |
729 | } else if (first_cycle != 1) { | |
730 | /* | |
731 | * If the cycle of the last block is zero, the cycle of | |
732 | * the first block must be 1. If it's not, maybe we're | |
733 | * not looking at a log... Bail out. | |
734 | */ | |
735 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); | |
736 | return XFS_ERROR(EINVAL); | |
737 | } | |
738 | ||
739 | /* we have a partially zeroed log */ | |
740 | last_blk = log_bbnum-1; | |
741 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) | |
742 | goto bp_err; | |
743 | ||
744 | /* | |
745 | * Validate the answer. Because there is no way to guarantee that | |
746 | * the entire log is made up of log records which are the same size, | |
747 | * we scan over the defined maximum blocks. At this point, the maximum | |
748 | * is not chosen to mean anything special. XXXmiken | |
749 | */ | |
73bf5988 | 750 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
d321ceac NS |
751 | ASSERT(num_scan_bblks <= INT_MAX); |
752 | ||
753 | if (last_blk < num_scan_bblks) | |
754 | num_scan_bblks = last_blk; | |
755 | start_blk = last_blk - num_scan_bblks; | |
756 | ||
757 | /* | |
758 | * We search for any instances of cycle number 0 that occur before | |
759 | * our current estimate of the head. What we're trying to detect is | |
760 | * 1 ... | 0 | 1 | 0... | |
761 | * ^ binary search ends here | |
762 | */ | |
ce029dc1 ES |
763 | if ((error = xlog_find_verify_cycle(log, start_blk, |
764 | (int)num_scan_bblks, 0, &new_blk))) | |
606d804d | 765 | goto bp_err; |
ce029dc1 ES |
766 | if (new_blk != -1) |
767 | last_blk = new_blk; | |
d321ceac NS |
768 | |
769 | /* | |
770 | * Potentially backup over partial log record write. We don't need | |
771 | * to search the end of the log because we know it is zero. | |
772 | */ | |
773 | if ((error = xlog_find_verify_log_record(log, start_blk, | |
79c48ada ES |
774 | &last_blk, 0)) == -1) { |
775 | error = XFS_ERROR(EIO); | |
776 | goto bp_err; | |
777 | } else if (error) | |
d321ceac NS |
778 | goto bp_err; |
779 | ||
780 | *blk_no = last_blk; | |
781 | bp_err: | |
782 | xlog_put_bp(bp); | |
783 | if (error) | |
784 | return error; | |
785 | return -1; | |
786 | } /* xlog_find_zeroed */ | |
787 | ||
788 | /* stuff for transactional view */ | |
789 | STATIC void | |
790 | xlog_unpack_data(xlog_rec_header_t *rhead, | |
791 | xfs_caddr_t dp, | |
792 | xlog_t *log) | |
793 | { | |
73bf5988 SL |
794 | int i, j, k; |
795 | union ich { | |
796 | xlog_rec_header_t hic_header; | |
797 | xlog_rec_ext_header_t hic_xheader; | |
798 | char hic_sector[XLOG_HEADER_SIZE]; | |
799 | } *xhdr; | |
800 | ||
d321ceac NS |
801 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) |
802 | uint *up = (uint *)dp; | |
803 | uint chksum = 0; | |
804 | #endif | |
805 | ||
73bf5988 SL |
806 | for (i=0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && |
807 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | |
5ce1d1f7 | 808 | *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; |
d321ceac NS |
809 | dp += BBSIZE; |
810 | } | |
73bf5988 SL |
811 | |
812 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { | |
813 | xhdr = (union ich*)rhead; | |
814 | for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { | |
815 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | |
816 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | |
817 | *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; | |
818 | dp += BBSIZE; | |
819 | } | |
820 | } | |
821 | ||
d321ceac NS |
822 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) |
823 | /* divide length by 4 to get # words */ | |
824 | for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) { | |
825 | chksum ^= INT_GET(*up, ARCH_CONVERT); | |
826 | up++; | |
827 | } | |
828 | if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) { | |
829 | if (!INT_ISZERO(rhead->h_chksum, ARCH_CONVERT) || | |
830 | ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { | |
831 | cmn_err(CE_DEBUG, | |
832 | "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)", | |
833 | INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum); | |
834 | cmn_err(CE_DEBUG, | |
835 | "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); | |
73bf5988 SL |
836 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
837 | cmn_err(CE_DEBUG, | |
838 | "XFS: LogR this is a LogV2 filesystem\n"); | |
839 | } | |
d321ceac NS |
840 | log->l_flags |= XLOG_CHKSUM_MISMATCH; |
841 | } | |
842 | } | |
843 | #endif /* DEBUG && XFS_LOUD_RECOVERY */ | |
844 | } /* xlog_unpack_data */ | |
845 | ||
d321ceac NS |
846 | STATIC xlog_recover_t * |
847 | xlog_recover_find_tid(xlog_recover_t *q, | |
848 | xlog_tid_t tid) | |
849 | { | |
850 | xlog_recover_t *p = q; | |
851 | ||
852 | while (p != NULL) { | |
853 | if (p->r_log_tid == tid) | |
854 | break; | |
855 | p = p->r_next; | |
856 | } | |
857 | return p; | |
858 | } /* xlog_recover_find_tid */ | |
859 | ||
860 | STATIC void | |
861 | xlog_recover_put_hashq(xlog_recover_t **q, | |
862 | xlog_recover_t *trans) | |
863 | { | |
864 | trans->r_next = *q; | |
865 | *q = trans; | |
866 | } /* xlog_recover_put_hashq */ | |
867 | ||
868 | STATIC void | |
869 | xlog_recover_new_tid(xlog_recover_t **q, | |
870 | xlog_tid_t tid, | |
871 | xfs_lsn_t lsn) | |
872 | { | |
873 | xlog_recover_t *trans; | |
874 | ||
875 | trans = kmem_zalloc(sizeof(xlog_recover_t), 0); | |
876 | trans->r_log_tid = tid; | |
877 | trans->r_lsn = lsn; | |
878 | xlog_recover_put_hashq(q, trans); | |
879 | } /* xlog_recover_new_tid */ | |
880 | ||
881 | ||
882 | STATIC int | |
883 | xlog_recover_unlink_tid(xlog_recover_t **q, | |
884 | xlog_recover_t *trans) | |
885 | { | |
886 | xlog_recover_t *tp; | |
887 | int found = 0; | |
888 | ||
889 | ASSERT(trans != 0); | |
890 | if (trans == *q) { | |
891 | *q = (*q)->r_next; | |
892 | } else { | |
893 | tp = *q; | |
894 | while (tp != 0) { | |
895 | if (tp->r_next == trans) { | |
896 | found = 1; | |
897 | break; | |
898 | } | |
899 | tp = tp->r_next; | |
900 | } | |
901 | if (!found) { | |
902 | xlog_warn( | |
903 | "XFS: xlog_recover_unlink_tid: trans not found"); | |
904 | ASSERT(0); | |
905 | return XFS_ERROR(EIO); | |
906 | } | |
907 | tp->r_next = tp->r_next->r_next; | |
908 | } | |
909 | return 0; | |
910 | } /* xlog_recover_unlink_tid */ | |
911 | ||
912 | /* | |
913 | * Free up any resources allocated by the transaction | |
914 | * | |
915 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | |
916 | */ | |
917 | STATIC void | |
918 | xlog_recover_free_trans(xlog_recover_t *trans) | |
919 | { | |
920 | xlog_recover_item_t *first_item, *item, *free_item; | |
921 | int i; | |
922 | ||
923 | item = first_item = trans->r_itemq; | |
924 | do { | |
925 | free_item = item; | |
926 | item = item->ri_next; | |
927 | /* Free the regions in the item. */ | |
928 | for (i = 0; i < free_item->ri_cnt; i++) { | |
929 | kmem_free(free_item->ri_buf[i].i_addr, | |
930 | free_item->ri_buf[i].i_len); | |
931 | } | |
932 | /* Free the item itself */ | |
933 | kmem_free(free_item->ri_buf, | |
934 | (free_item->ri_total * sizeof(xfs_log_iovec_t))); | |
935 | kmem_free(free_item, sizeof(xlog_recover_item_t)); | |
936 | } while (first_item != item); | |
937 | /* Free the transaction recover structure */ | |
938 | kmem_free(trans, sizeof(xlog_recover_t)); | |
939 | } /* xlog_recover_free_trans */ | |
940 | ||
941 | ||
942 | STATIC int | |
943 | xlog_recover_commit_trans(xlog_t *log, | |
944 | xlog_recover_t **q, | |
945 | xlog_recover_t *trans, | |
946 | int pass) | |
947 | { | |
948 | int error; | |
949 | ||
950 | if ((error = xlog_recover_unlink_tid(q, trans))) | |
951 | return error; | |
952 | if ((error = xlog_recover_do_trans(log, trans, pass))) | |
953 | return error; | |
954 | xlog_recover_free_trans(trans); /* no error */ | |
955 | return 0; | |
956 | } /* xlog_recover_commit_trans */ | |
957 | ||
958 | STATIC void | |
959 | xlog_recover_insert_item_backq(xlog_recover_item_t **q, | |
960 | xlog_recover_item_t *item) | |
961 | { | |
962 | if (*q == 0) { | |
963 | item->ri_prev = item->ri_next = item; | |
964 | *q = item; | |
965 | } else { | |
966 | item->ri_next = *q; | |
967 | item->ri_prev = (*q)->ri_prev; | |
968 | (*q)->ri_prev = item; | |
969 | item->ri_prev->ri_next = item; | |
970 | } | |
971 | } /* xlog_recover_insert_item_backq */ | |
972 | ||
973 | STATIC void | |
974 | xlog_recover_add_item(xlog_recover_item_t **itemq) | |
975 | { | |
976 | xlog_recover_item_t *item; | |
977 | ||
978 | item = kmem_zalloc(sizeof(xlog_recover_item_t), 0); | |
979 | xlog_recover_insert_item_backq(itemq, item); | |
980 | } /* xlog_recover_add_item */ | |
981 | ||
982 | /* The next region to add is the start of a new region. It could be | |
983 | * a whole region or it could be the first part of a new region. Because | |
984 | * of this, the assumption here is that the type and size fields of all | |
985 | * format structures fit into the first 32 bits of the structure. | |
986 | * | |
987 | * This works because all regions must be 32 bit aligned. Therefore, we | |
988 | * either have both fields or we have neither field. In the case we have | |
989 | * neither field, the data part of the region is zero length. We only have | |
990 | * a log_op_header and can throw away the header since a new one will appear | |
991 | * later. If we have at least 4 bytes, then we can determine how many regions | |
992 | * will appear in the current log item. | |
993 | */ | |
994 | STATIC int | |
995 | xlog_recover_add_to_trans(xlog_recover_t *trans, | |
996 | xfs_caddr_t dp, | |
997 | int len) | |
998 | { | |
999 | xfs_inode_log_format_t *in_f; /* any will do */ | |
1000 | xlog_recover_item_t *item; | |
1001 | xfs_caddr_t ptr; | |
1002 | ||
1003 | if (!len) | |
1004 | return 0; | |
1005 | ptr = kmem_zalloc(len, 0); | |
1006 | bcopy(dp, ptr, len); | |
1007 | ||
1008 | in_f = (xfs_inode_log_format_t *)ptr; | |
1009 | item = trans->r_itemq; | |
1010 | if (item == 0) { | |
1011 | ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); | |
1012 | if (len == sizeof(xfs_trans_header_t)) | |
1013 | xlog_recover_add_item(&trans->r_itemq); | |
1014 | bcopy(dp, &trans->r_theader, len); /* s, d, l */ | |
1015 | return 0; | |
1016 | } | |
1017 | if (item->ri_prev->ri_total != 0 && | |
1018 | item->ri_prev->ri_total == item->ri_prev->ri_cnt) { | |
1019 | xlog_recover_add_item(&trans->r_itemq); | |
1020 | } | |
1021 | item = trans->r_itemq; | |
1022 | item = item->ri_prev; | |
1023 | ||
1024 | if (item->ri_total == 0) { /* first region to be added */ | |
1025 | item->ri_total = in_f->ilf_size; | |
1026 | ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); | |
1027 | item->ri_buf = kmem_zalloc((item->ri_total * | |
1028 | sizeof(xfs_log_iovec_t)), 0); | |
1029 | } | |
1030 | ASSERT(item->ri_total > item->ri_cnt); | |
1031 | /* Description region is ri_buf[0] */ | |
1032 | item->ri_buf[item->ri_cnt].i_addr = ptr; | |
1033 | item->ri_buf[item->ri_cnt].i_len = len; | |
1034 | item->ri_cnt++; | |
1035 | return 0; | |
1036 | } /* xlog_recover_add_to_trans */ | |
1037 | ||
1038 | STATIC int | |
1039 | xlog_recover_add_to_cont_trans(xlog_recover_t *trans, | |
1040 | xfs_caddr_t dp, | |
1041 | int len) | |
1042 | { | |
1043 | xlog_recover_item_t *item; | |
1044 | xfs_caddr_t ptr, old_ptr; | |
1045 | int old_len; | |
1046 | ||
1047 | item = trans->r_itemq; | |
1048 | if (item == 0) { | |
1049 | /* finish copying rest of trans header */ | |
1050 | xlog_recover_add_item(&trans->r_itemq); | |
1051 | ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len; | |
1052 | bcopy(dp, ptr, len); /* s, d, l */ | |
1053 | return 0; | |
1054 | } | |
1055 | item = item->ri_prev; | |
1056 | ||
1057 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | |
1058 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | |
1059 | ||
1060 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0); | |
1061 | bcopy(dp , &ptr[old_len], len); /* s, d, l */ | |
1062 | item->ri_buf[item->ri_cnt-1].i_len += len; | |
1063 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | |
1064 | return 0; | |
1065 | } /* xlog_recover_add_to_cont_trans */ | |
1066 | ||
1067 | STATIC int | |
1068 | xlog_recover_unmount_trans(xlog_recover_t *trans) | |
1069 | { | |
1070 | /* Do nothing now */ | |
1071 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); | |
1072 | return( 0 ); | |
1073 | } /* xlog_recover_unmount_trans */ | |
1074 | ||
1075 | ||
1076 | STATIC int | |
1077 | xlog_recover_process_data(xlog_t *log, | |
1078 | xlog_recover_t *rhash[], | |
1079 | xlog_rec_header_t *rhead, | |
1080 | xfs_caddr_t dp, | |
1081 | int pass) | |
1082 | { | |
1083 | xfs_caddr_t lp = dp+INT_GET(rhead->h_len, ARCH_CONVERT); | |
1084 | int num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); | |
1085 | xlog_op_header_t *ohead; | |
1086 | xlog_recover_t *trans; | |
1087 | xlog_tid_t tid; | |
1088 | int error; | |
1089 | unsigned long hash; | |
1090 | uint flags; | |
1091 | ||
1092 | /* check the log format matches our own - else we can't recover */ | |
1093 | if (xlog_header_check_recover(log->l_mp, rhead)) | |
1094 | return (XFS_ERROR(EIO)); | |
1095 | ||
73bf5988 | 1096 | while ((dp < lp) && num_logops) { |
d321ceac NS |
1097 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); |
1098 | ohead = (xlog_op_header_t *)dp; | |
1099 | dp += sizeof(xlog_op_header_t); | |
1100 | if (ohead->oh_clientid != XFS_TRANSACTION && | |
1101 | ohead->oh_clientid != XFS_LOG) { | |
1102 | xlog_warn("XFS: xlog_recover_process_data: bad clientid"); | |
1103 | ASSERT(0); | |
1104 | return (XFS_ERROR(EIO)); | |
1105 | } | |
1106 | tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); | |
1107 | hash = XLOG_RHASH(tid); | |
1108 | trans = xlog_recover_find_tid(rhash[hash], tid); | |
1109 | if (trans == NULL) { /* not found; add new tid */ | |
1110 | if (ohead->oh_flags & XLOG_START_TRANS) | |
1111 | xlog_recover_new_tid(&rhash[hash], tid, INT_GET(rhead->h_lsn, ARCH_CONVERT)); | |
1112 | } else { | |
1113 | ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); | |
1114 | flags = ohead->oh_flags & ~XLOG_END_TRANS; | |
1115 | if (flags & XLOG_WAS_CONT_TRANS) | |
1116 | flags &= ~XLOG_CONTINUE_TRANS; | |
1117 | switch (flags) { | |
1118 | case XLOG_COMMIT_TRANS: { | |
1119 | error = xlog_recover_commit_trans(log, &rhash[hash], | |
1120 | trans, pass); | |
1121 | break; | |
1122 | } | |
1123 | case XLOG_UNMOUNT_TRANS: { | |
1124 | error = xlog_recover_unmount_trans(trans); | |
1125 | break; | |
1126 | } | |
1127 | case XLOG_WAS_CONT_TRANS: { | |
1128 | error = xlog_recover_add_to_cont_trans(trans, dp, | |
1129 | INT_GET(ohead->oh_len, ARCH_CONVERT)); | |
1130 | break; | |
1131 | } | |
1132 | case XLOG_START_TRANS : { | |
1133 | xlog_warn("XFS: xlog_recover_process_data: bad transaction"); | |
1134 | ASSERT(0); | |
1135 | error = XFS_ERROR(EIO); | |
1136 | break; | |
1137 | } | |
1138 | case 0: | |
1139 | case XLOG_CONTINUE_TRANS: { | |
1140 | error = xlog_recover_add_to_trans(trans, dp, | |
1141 | INT_GET(ohead->oh_len, ARCH_CONVERT)); | |
1142 | break; | |
1143 | } | |
1144 | default: { | |
1145 | xlog_warn("XFS: xlog_recover_process_data: bad flag"); | |
1146 | ASSERT(0); | |
1147 | error = XFS_ERROR(EIO); | |
1148 | break; | |
1149 | } | |
1150 | } /* switch */ | |
1151 | if (error) | |
1152 | return error; | |
1153 | } /* if */ | |
1154 | dp += INT_GET(ohead->oh_len, ARCH_CONVERT); | |
1155 | num_logops--; | |
1156 | } | |
1157 | return( 0 ); | |
1158 | } /* xlog_recover_process_data */ | |
1159 | ||
1160 | /* | |
1161 | * Read the log from tail to head and process the log records found. | |
1162 | * Handle the two cases where the tail and head are in the same cycle | |
1163 | * and where the active portion of the log wraps around the end of | |
1164 | * the physical log separately. The pass parameter is passed through | |
1165 | * to the routines called to process the data and is not looked at | |
1166 | * here. | |
1167 | */ | |
1168 | int | |
1169 | xlog_do_recovery_pass(xlog_t *log, | |
1170 | xfs_daddr_t head_blk, | |
1171 | xfs_daddr_t tail_blk, | |
1172 | int pass) | |
1173 | { | |
1174 | xlog_rec_header_t *rhead; | |
1175 | xfs_daddr_t blk_no; | |
1176 | xfs_caddr_t bufaddr; | |
1177 | xfs_buf_t *hbp, *dbp; | |
73bf5988 | 1178 | int error, h_size; |
d321ceac | 1179 | int bblks, split_bblks; |
73bf5988 | 1180 | int hblks, split_hblks, wrapped_hblks; |
d321ceac NS |
1181 | xlog_recover_t *rhash[XLOG_RHASH_SIZE]; |
1182 | ||
1183 | error = 0; | |
73bf5988 SL |
1184 | |
1185 | ||
1186 | /* | |
1187 | * Read the header of the tail block and get the iclog buffer size from | |
1188 | * h_size. Use this to tell how many sectors make up the log header. | |
1189 | */ | |
1190 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { | |
1191 | /* | |
1192 | * When using variable length iclogs, read first sector of iclog | |
1193 | * header and extract the header size from it. Get a new hbp that | |
1194 | * is the correct size. | |
1195 | */ | |
1196 | hbp = xlog_get_bp(1, log->l_mp); | |
1197 | if (!hbp) | |
1198 | return ENOMEM; | |
1199 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) | |
1200 | goto bread_err1; | |
1201 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); | |
1202 | ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == | |
1203 | XLOG_HEADER_MAGIC_NUM); | |
1204 | if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) { | |
1205 | xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number."); | |
1206 | error = XFS_ERROR(EIO); | |
1207 | goto bread_err1; | |
1208 | } | |
1209 | h_size = INT_GET(rhead->h_size, ARCH_CONVERT); | |
1210 | ||
1211 | if ((INT_GET(rhead->h_version, ARCH_CONVERT) & XLOG_VERSION_2) && | |
1212 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | |
1213 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | |
1214 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
1215 | hblks++; | |
1216 | xlog_put_bp(hbp); | |
1217 | hbp = xlog_get_bp(hblks, log->l_mp); | |
1218 | } else { | |
1219 | hblks=1; | |
1220 | } | |
1221 | } else { | |
1222 | hblks=1; | |
1223 | hbp = xlog_get_bp(1, log->l_mp); | |
1224 | h_size = XLOG_BIG_RECORD_BSIZE; | |
1225 | } | |
1226 | ||
d321ceac | 1227 | if (!hbp) |
ce029dc1 | 1228 | return ENOMEM; |
73bf5988 | 1229 | dbp = xlog_get_bp(BTOBB(h_size),log->l_mp); |
d321ceac NS |
1230 | if (!dbp) { |
1231 | xlog_put_bp(hbp); | |
ce029dc1 | 1232 | return ENOMEM; |
d321ceac | 1233 | } |
73bf5988 | 1234 | |
d321ceac NS |
1235 | bzero(rhash, sizeof(rhash)); |
1236 | if (tail_blk <= head_blk) { | |
1237 | for (blk_no = tail_blk; blk_no < head_blk; ) { | |
73bf5988 SL |
1238 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) |
1239 | goto bread_err2; | |
d321ceac NS |
1240 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); |
1241 | ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); | |
1242 | ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); | |
73bf5988 SL |
1243 | if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) { |
1244 | xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number."); | |
1245 | error = XFS_ERROR(EIO); | |
1246 | goto bread_err2; | |
1247 | } | |
d321ceac NS |
1248 | bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */ |
1249 | if (bblks > 0) { | |
73bf5988 SL |
1250 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) |
1251 | goto bread_err2; | |
d321ceac NS |
1252 | xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); |
1253 | if ((error = xlog_recover_process_data(log, rhash, | |
1254 | rhead, XFS_BUF_PTR(dbp), | |
1255 | pass))) | |
73bf5988 | 1256 | goto bread_err2; |
d321ceac | 1257 | } |
73bf5988 | 1258 | blk_no += (bblks+hblks); |
d321ceac NS |
1259 | } |
1260 | } else { | |
1261 | /* | |
1262 | * Perform recovery around the end of the physical log. When the head | |
1263 | * is not on the same cycle number as the tail, we can't do a sequential | |
1264 | * recovery as above. | |
1265 | */ | |
1266 | blk_no = tail_blk; | |
1267 | while (blk_no < log->l_logBBsize) { | |
73bf5988 SL |
1268 | /* |
1269 | * Check for header wrapping around physical end-of-log | |
1270 | */ | |
1271 | wrapped_hblks = 0; | |
1272 | if (blk_no+hblks <= log->l_logBBsize) { | |
1273 | /* Read header in one read */ | |
1274 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | |
1275 | goto bread_err2; | |
1276 | } else { | |
1277 | /* This log record is split across physical end of log */ | |
1278 | split_hblks = 0; | |
1279 | if (blk_no != log->l_logBBsize) { | |
1280 | /* some data is before physical end of log */ | |
1281 | ASSERT(blk_no <= INT_MAX); | |
1282 | split_hblks = log->l_logBBsize - (int)blk_no; | |
1283 | ASSERT(split_hblks > 0); | |
1284 | if ((error = xlog_bread(log, blk_no, split_hblks, hbp))) | |
1285 | goto bread_err2; | |
1286 | } | |
1287 | bufaddr = XFS_BUF_PTR(hbp); | |
1288 | XFS_BUF_SET_PTR(hbp, bufaddr + BBTOB(split_hblks), | |
1289 | BBTOB(hblks - split_hblks)); | |
1290 | wrapped_hblks = hblks - split_hblks; | |
1291 | if ((error = xlog_bread(log, 0, wrapped_hblks, hbp))) | |
1292 | goto bread_err2; | |
1293 | XFS_BUF_SET_PTR(hbp, bufaddr, hblks); | |
1294 | } | |
d321ceac NS |
1295 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); |
1296 | ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); | |
1297 | ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); | |
1298 | bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); | |
1299 | ||
1300 | /* LR body must have data or it wouldn't have been written */ | |
1301 | ASSERT(bblks > 0); | |
73bf5988 | 1302 | blk_no += hblks; /* successfully read header */ |
d321ceac NS |
1303 | |
1304 | if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) || | |
1305 | (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) || | |
73bf5988 | 1306 | (bblks <= 0)) { |
d321ceac | 1307 | error = EFSCORRUPTED; |
73bf5988 | 1308 | goto bread_err2; |
d321ceac NS |
1309 | } |
1310 | ||
1311 | /* Read in data for log record */ | |
1312 | if (blk_no+bblks <= log->l_logBBsize) { | |
1313 | if ((error = xlog_bread(log, blk_no, bblks, dbp))) | |
73bf5988 | 1314 | goto bread_err2; |
d321ceac NS |
1315 | } else { |
1316 | /* This log record is split across physical end of log */ | |
1317 | split_bblks = 0; | |
1318 | if (blk_no != log->l_logBBsize) { | |
1319 | ||
1320 | /* some data is before physical end of log */ | |
1321 | ASSERT(blk_no <= INT_MAX); | |
1322 | split_bblks = log->l_logBBsize - (int)blk_no; | |
1323 | ASSERT(split_bblks > 0); | |
1324 | if ((error = xlog_bread(log, blk_no, split_bblks, dbp))) | |
73bf5988 | 1325 | goto bread_err2; |
d321ceac NS |
1326 | } |
1327 | bufaddr = XFS_BUF_PTR(dbp); | |
1328 | XFS_BUF_SET_PTR(dbp, bufaddr + BBTOB(split_bblks), | |
1329 | BBTOB(bblks - split_bblks)); | |
73bf5988 SL |
1330 | if ((error = xlog_bread(log, wrapped_hblks, |
1331 | bblks - split_bblks, dbp))) | |
1332 | goto bread_err2; | |
1333 | XFS_BUF_SET_PTR(dbp, bufaddr, XLOG_BIG_RECORD_BSIZE); | |
d321ceac NS |
1334 | } |
1335 | xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); | |
1336 | if ((error = xlog_recover_process_data(log, rhash, | |
1337 | rhead, XFS_BUF_PTR(dbp), | |
1338 | pass))) | |
73bf5988 | 1339 | goto bread_err2; |
d321ceac NS |
1340 | blk_no += bblks; |
1341 | } | |
1342 | ||
1343 | ASSERT(blk_no >= log->l_logBBsize); | |
1344 | blk_no -= log->l_logBBsize; | |
1345 | ||
1346 | /* read first part of physical log */ | |
1347 | while (blk_no < head_blk) { | |
73bf5988 SL |
1348 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) |
1349 | goto bread_err2; | |
d321ceac NS |
1350 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); |
1351 | ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); | |
1352 | ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); | |
1353 | bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); | |
1354 | ASSERT(bblks > 0); | |
73bf5988 SL |
1355 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) |
1356 | goto bread_err2; | |
d321ceac NS |
1357 | xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); |
1358 | if ((error = xlog_recover_process_data(log, rhash, | |
1359 | rhead, XFS_BUF_PTR(dbp), | |
1360 | pass))) | |
73bf5988 SL |
1361 | goto bread_err2; |
1362 | blk_no += (bblks+hblks); | |
d321ceac NS |
1363 | } |
1364 | } | |
1365 | ||
73bf5988 | 1366 | bread_err2: |
d321ceac | 1367 | xlog_put_bp(dbp); |
73bf5988 | 1368 | bread_err1: |
d321ceac NS |
1369 | xlog_put_bp(hbp); |
1370 | ||
1371 | return error; | |
1372 | } |