]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxlog/xfs_log_recover.c
Bump revision number of version 2 log support
[thirdparty/xfsprogs-dev.git] / libxlog / xfs_log_recover.c
CommitLineData
d321ceac 1/*
0d3e0b37 2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
d321ceac
NS
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include <libxlog.h>
34
35/*
36 * This routine finds (to an approximation) the first block in the physical
37 * log which contains the given cycle. It uses a binary search algorithm.
38 * Note that the algorithm can not be perfect because the disk will not
39 * necessarily be perfect.
40 */
41int
42xlog_find_cycle_start(xlog_t *log,
43 xfs_buf_t *bp,
44 xfs_daddr_t first_blk,
45 xfs_daddr_t *last_blk,
46 uint cycle)
47{
48 xfs_daddr_t mid_blk;
49 uint mid_cycle;
50 int error;
51
52 mid_blk = BLK_AVG(first_blk, *last_blk);
53 while (mid_blk != first_blk && mid_blk != *last_blk) {
54 if ((error = xlog_bread(log, mid_blk, 1, bp)))
55 return error;
56 mid_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
57 if (mid_cycle == cycle) {
58 *last_blk = mid_blk;
59 /* last_half_cycle == mid_cycle */
60 } else {
61 first_blk = mid_blk;
62 /* first_half_cycle == mid_cycle */
63 }
64 mid_blk = BLK_AVG(first_blk, *last_blk);
65 }
66 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
67 (mid_blk == *last_blk && mid_blk-1 == first_blk));
68
69 return 0;
70} /* xlog_find_cycle_start */
71
72
73/*
74 * Check that the range of blocks does not contain the cycle number
75 * given. The scan needs to occur from front to back and the ptr into the
76 * region must be updated since a later routine will need to perform another
77 * test. If the region is completely good, we end up returning the same
78 * last block number.
79 *
ce029dc1 80 * Set blkno to -1 if we encounter no errors. This is an invalid block number
d321ceac
NS
81 * since we don't ever expect logs to get this large.
82 */
83
ce029dc1 84STATIC int
d321ceac
NS
85xlog_find_verify_cycle( xlog_t *log,
86 xfs_daddr_t start_blk,
87 int nbblks,
ce029dc1
ES
88 uint stop_on_cycle_no,
89 xfs_daddr_t *new_blk)
d321ceac 90{
ce029dc1 91 xfs_daddr_t i, j;
d321ceac
NS
92 uint cycle;
93 xfs_buf_t *bp;
94 char *buf = NULL;
95 int error = 0;
96 xfs_daddr_t bufblks = nbblks;
97
98 while (!(bp = xlog_get_bp(bufblks, log->l_mp))) {
99 /* can't get enough memory to do everything in one big buffer */
100 bufblks >>= 1;
101 if (!bufblks)
ce029dc1 102 return ENOMEM;
d321ceac
NS
103 }
104
105
106 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
107 int bcount = min(bufblks, (start_blk + nbblks - i));
108
109 if ((error = xlog_bread(log, i, bcount, bp)))
110 goto out;
111
112 buf = XFS_BUF_PTR(bp);
113 for (j = 0; j < bcount; j++) {
114 cycle = GET_CYCLE(buf, ARCH_CONVERT);
115 if (cycle == stop_on_cycle_no) {
e56fcdce 116 *new_blk = i+j;
d321ceac
NS
117 goto out;
118 }
119
120 buf += BBSIZE;
121 }
122 }
123
ce029dc1 124 *new_blk = -1;
d321ceac
NS
125
126out:
127 xlog_put_bp(bp);
128
129 return error;
130} /* xlog_find_verify_cycle */
131
132
133/*
134 * Potentially backup over partial log record write.
135 *
136 * In the typical case, last_blk is the number of the block directly after
137 * a good log record. Therefore, we subtract one to get the block number
138 * of the last block in the given buffer. extra_bblks contains the number
139 * of blocks we would have read on a previous read. This happens when the
140 * last log record is split over the end of the physical log.
141 *
142 * extra_bblks is the number of blocks potentially verified on a previous
143 * call to this routine.
144 */
145
146STATIC int
147xlog_find_verify_log_record(xlog_t *log,
148 xfs_daddr_t start_blk,
149 xfs_daddr_t *last_blk,
150 int extra_bblks)
151{
152 xfs_daddr_t i;
153 xfs_buf_t *bp;
154 char *buf = NULL;
155 xlog_rec_header_t *head = NULL;
156 int error = 0;
157 int smallmem = 0;
158 int num_blks = *last_blk - start_blk;
73bf5988 159 int xhdrs;
d321ceac
NS
160
161 ASSERT(start_blk != 0 || *last_blk != start_blk);
162
163 if (!(bp = xlog_get_bp(num_blks, log->l_mp))) {
164 if (!(bp = xlog_get_bp(1, log->l_mp)))
ce029dc1 165 return ENOMEM;
d321ceac
NS
166 smallmem = 1;
167 buf = XFS_BUF_PTR(bp);
168 } else {
169 if ((error = xlog_bread(log, start_blk, num_blks, bp)))
170 goto out;
171 buf = XFS_BUF_PTR(bp) + (num_blks - 1) * BBSIZE;
172 }
173
174
175 for (i=(*last_blk)-1; i>=0; i--) {
176 if (i < start_blk) {
177 /* legal log record not found */
178 xlog_warn("XFS: Log inconsistent (didn't find previous header)");
d321ceac 179 ASSERT(0);
d321ceac
NS
180 error = XFS_ERROR(EIO);
181 goto out;
182 }
183
184 if (smallmem && (error = xlog_bread(log, i, 1, bp)))
185 goto out;
186 head = (xlog_rec_header_t*)buf;
187
188 if (INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM)
189 break;
190
191 if (!smallmem)
192 buf -= BBSIZE;
193 }
194
195 /*
196 * We hit the beginning of the physical log & still no header. Return
197 * to caller. If caller can handle a return of -1, then this routine
198 * will be called again for the end of the physical log.
199 */
200 if (i == -1) {
201 error = -1;
202 goto out;
203 }
204
205 /* we have the final block of the good log (the first block
206 * of the log record _before_ the head. So we check the uuid.
207 */
208
209 if ((error = xlog_header_check_mount(log->l_mp, head)))
210 goto out;
211
212 /*
213 * We may have found a log record header before we expected one.
214 * last_blk will be the 1st block # with a given cycle #. We may end
215 * up reading an entire log record. In this case, we don't want to
216 * reset last_blk. Only when last_blk points in the middle of a log
217 * record do we update last_blk.
218 */
73bf5988
SL
219 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
220 int h_size = INT_GET(head->h_size, ARCH_CONVERT);
221 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
222 if (h_size % XLOG_HEADER_CYCLE_SIZE)
223 xhdrs++;
224 } else {
225 xhdrs = 1;
226 }
227
d321ceac 228 if (*last_blk - i + extra_bblks
73bf5988 229 != BTOBB(INT_GET(head->h_len, ARCH_CONVERT))+xhdrs)
d321ceac
NS
230 *last_blk = i;
231
232out:
233 xlog_put_bp(bp);
234
235 return error;
236} /* xlog_find_verify_log_record */
237
238/*
239 * Head is defined to be the point of the log where the next log write
240 * write could go. This means that incomplete LR writes at the end are
241 * eliminated when calculating the head. We aren't guaranteed that previous
242 * LR have complete transactions. We only know that a cycle number of
243 * current cycle number -1 won't be present in the log if we start writing
244 * from our current block number.
245 *
246 * last_blk contains the block number of the first block with a given
247 * cycle number.
248 *
249 * Also called from xfs_log_print.c
250 *
251 * Return: zero if normal, non-zero if error.
252 */
253int
254xlog_find_head(xlog_t *log,
255 xfs_daddr_t *return_head_blk)
256{
257 xfs_buf_t *bp;
258 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
259 int num_scan_bblks;
260 uint first_half_cycle, last_half_cycle;
261 uint stop_on_cycle;
262 int error, log_bbnum = log->l_logBBsize;
263
264 /* Is the end of the log device zeroed? */
265 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
266 *return_head_blk = first_blk;
267
268 /* is the whole lot zeroed? */
269 if (!first_blk) {
270 /* Linux XFS shouldn't generate totally zeroed logs -
271 * mkfs etc write a dummy unmount record to a fresh
272 * log so we can store the uuid in there
273 */
274 xlog_warn("XFS: totally zeroed log\n");
275 }
276
277 return 0;
278 } else if (error) {
279 xlog_warn("XFS: empty log check failed");
280 return error;
281 }
282
283 first_blk = 0; /* get cycle # of 1st block */
284 bp = xlog_get_bp(1,log->l_mp);
285 if (!bp)
ce029dc1 286 return ENOMEM;
d321ceac
NS
287 if ((error = xlog_bread(log, 0, 1, bp)))
288 goto bp_err;
289 first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
290
291 last_blk = head_blk = log_bbnum-1; /* get cycle # of last block */
292 if ((error = xlog_bread(log, last_blk, 1, bp)))
293 goto bp_err;
294 last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
295 ASSERT(last_half_cycle != 0);
296
297 /*
298 * If the 1st half cycle number is equal to the last half cycle number,
299 * then the entire log is stamped with the same cycle number. In this
300 * case, head_blk can't be set to zero (which makes sense). The below
301 * math doesn't work out properly with head_blk equal to zero. Instead,
302 * we set it to log_bbnum which is an illegal block number, but this
303 * value makes the math correct. If head_blk doesn't changed through
304 * all the tests below, *head_blk is set to zero at the very end rather
305 * than log_bbnum. In a sense, log_bbnum and zero are the same block
306 * in a circular file.
307 */
308 if (first_half_cycle == last_half_cycle) {
309 /*
310 * In this case we believe that the entire log should have cycle
311 * number last_half_cycle. We need to scan backwards from the
312 * end verifying that there are no holes still containing
313 * last_half_cycle - 1. If we find such a hole, then the start
314 * of that hole will be the new head. The simple case looks like
315 * x | x ... | x - 1 | x
316 * Another case that fits this picture would be
317 * x | x + 1 | x ... | x
318 * In this case the head really is somwhere at the end of the
319 * log, as one of the latest writes at the beginning was incomplete.
320 * One more case is
321 * x | x + 1 | x ... | x - 1 | x
322 * This is really the combination of the above two cases, and the
323 * head has to end up at the start of the x-1 hole at the end of
324 * the log.
325 *
326 * In the 256k log case, we will read from the beginning to the
327 * end of the log and search for cycle numbers equal to x-1. We
328 * don't worry about the x+1 blocks that we encounter, because
329 * we know that they cannot be the head since the log started with
330 * x.
331 */
332 head_blk = log_bbnum;
333 stop_on_cycle = last_half_cycle - 1;
334 } else {
335 /*
336 * In this case we want to find the first block with cycle number
337 * matching last_half_cycle. We expect the log to be some
338 * variation on
339 * x + 1 ... | x ...
340 * The first block with cycle number x (last_half_cycle) will be
341 * where the new head belongs. First we do a binary search for
342 * the first occurrence of last_half_cycle. The binary search
343 * may not be totally accurate, so then we scan back from there
344 * looking for occurrences of last_half_cycle before us. If
345 * that backwards scan wraps around the beginning of the log,
346 * then we look for occurrences of last_half_cycle - 1 at the
347 * end of the log. The cases we're looking for look like
348 * x + 1 ... | x | x + 1 | x ...
349 * ^ binary search stopped here
350 * or
351 * x + 1 ... | x ... | x - 1 | x
352 * <---------> less than scan distance
353 */
354 stop_on_cycle = last_half_cycle;
355 if ((error = xlog_find_cycle_start(log, bp, first_blk,
356 &head_blk, last_half_cycle)))
357 goto bp_err;
358 }
359
360 /*
361 * Now validate the answer. Scan back some number of maximum possible
362 * blocks and make sure each one has the expected cycle number. The
363 * maximum is determined by the total possible amount of buffering
364 * in the in-core log. The following number can be made tighter if
365 * we actually look at the block size of the filesystem.
366 */
73bf5988 367 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
d321ceac
NS
368 if (head_blk >= num_scan_bblks) {
369 /*
370 * We are guaranteed that the entire check can be performed
371 * in one buffer.
372 */
373 start_blk = head_blk - num_scan_bblks;
ce029dc1
ES
374 if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks,
375 stop_on_cycle, &new_blk)))
606d804d 376 goto bp_err;
ce029dc1
ES
377 if (new_blk != -1)
378 head_blk = new_blk;
d321ceac
NS
379 } else { /* need to read 2 parts of log */
380 /*
381 * We are going to scan backwards in the log in two parts. First
382 * we scan the physical end of the log. In this part of the log,
383 * we are looking for blocks with cycle number last_half_cycle - 1.
384 * If we find one, then we know that the log starts there, as we've
385 * found a hole that didn't get written in going around the end
386 * of the physical log. The simple case for this is
387 * x + 1 ... | x ... | x - 1 | x
388 * <---------> less than scan distance
389 * If all of the blocks at the end of the log have cycle number
390 * last_half_cycle, then we check the blocks at the start of the
391 * log looking for occurrences of last_half_cycle. If we find one,
392 * then our current estimate for the location of the first
393 * occurrence of last_half_cycle is wrong and we move back to the
394 * hole we've found. This case looks like
395 * x + 1 ... | x | x + 1 | x ...
396 * ^ binary search stopped here
397 * Another case we need to handle that only occurs in 256k logs is
398 * x + 1 ... | x ... | x+1 | x ...
399 * ^ binary search stops here
400 * In a 256k log, the scan at the end of the log will see the x+1
401 * blocks. We need to skip past those since that is certainly not
402 * the head of the log. By searching for last_half_cycle-1 we
403 * accomplish that.
404 */
405 start_blk = log_bbnum - num_scan_bblks + head_blk;
406 ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks-head_blk >= 0);
ce029dc1
ES
407 if ((error = xlog_find_verify_cycle(log, start_blk,
408 num_scan_bblks-(int)head_blk, (stop_on_cycle - 1),
409 &new_blk)))
410 goto bp_err;
411 if (new_blk != -1) {
d321ceac
NS
412 head_blk = new_blk;
413 goto bad_blk;
414 }
415
416 /*
417 * Scan beginning of log now. The last part of the physical log
418 * is good. This scan needs to verify that it doesn't find the
419 * last_half_cycle.
420 */
421 start_blk = 0;
422 ASSERT(head_blk <= INT_MAX);
ce029dc1
ES
423 if ((error = xlog_find_verify_cycle(log, start_blk, (int) head_blk,
424 stop_on_cycle, &new_blk)))
425 goto bp_err;
426 if (new_blk != -1)
d321ceac
NS
427 head_blk = new_blk;
428 }
429
430bad_blk:
431 /*
432 * Now we need to make sure head_blk is not pointing to a block in
433 * the middle of a log record.
434 */
435 num_scan_bblks = BTOBB(XLOG_MAX_RECORD_BSIZE);
436 if (head_blk >= num_scan_bblks) {
437 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
438
439 /* start ptr at last block ptr before head_blk */
440 if ((error = xlog_find_verify_log_record(log,
441 start_blk,
442 &head_blk,
443 0)) == -1) {
444 error = XFS_ERROR(EIO);
445 goto bp_err;
446 } else if (error)
447 goto bp_err;
448 } else {
449 start_blk = 0;
450 ASSERT(head_blk <= INT_MAX);
451 if ((error = xlog_find_verify_log_record(log,
452 start_blk,
453 &head_blk,
454 0)) == -1) {
455 /* We hit the beginning of the log during our search */
456 start_blk = log_bbnum - num_scan_bblks + head_blk;
457 new_blk = log_bbnum;
458 ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0);
459 ASSERT(head_blk <= INT_MAX);
460 if ((error = xlog_find_verify_log_record(log,
461 start_blk,
462 &new_blk,
463 (int)head_blk)) == -1) {
464 error = XFS_ERROR(EIO);
465 goto bp_err;
466 } else if (error)
467 goto bp_err;
468 if (new_blk != log_bbnum)
469 head_blk = new_blk;
470 } else if (error)
471 goto bp_err;
472 }
473
474 xlog_put_bp(bp);
475 if (head_blk == log_bbnum)
476 *return_head_blk = 0;
477 else
478 *return_head_blk = head_blk;
479 /*
480 * When returning here, we have a good block number. Bad block
481 * means that during a previous crash, we didn't have a clean break
482 * from cycle number N to cycle number N-1. In this case, we need
483 * to find the first block with cycle number N-1.
484 */
485 return 0;
486
487bp_err:
488 xlog_put_bp(bp);
489
490 if (error)
491 xlog_warn("XFS: failed to find log head");
492
493 return error;
494} /* xlog_find_head */
495
496/*
497 * Find the sync block number or the tail of the log.
498 *
499 * This will be the block number of the last record to have its
500 * associated buffers synced to disk. Every log record header has
501 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
502 * to get a sync block number. The only concern is to figure out which
503 * log record header to believe.
504 *
505 * The following algorithm uses the log record header with the largest
506 * lsn. The entire log record does not need to be valid. We only care
507 * that the header is valid.
508 *
509 * We could speed up search by using current head_blk buffer, but it is not
510 * available.
511 */
512int
513xlog_find_tail(xlog_t *log,
514 xfs_daddr_t *head_blk,
515 xfs_daddr_t *tail_blk,
516 int readonly)
517{
518 xlog_rec_header_t *rhead;
519 xlog_op_header_t *op_head;
520 xfs_buf_t *bp;
521 int error, i, found;
522 xfs_daddr_t umount_data_blk;
523 xfs_daddr_t after_umount_blk;
524 xfs_lsn_t tail_lsn;
73bf5988 525 int hblks;
d321ceac
NS
526
527 found = error = 0;
528
529 /*
530 * Find previous log record
531 */
532 if ((error = xlog_find_head(log, head_blk)))
533 return error;
534
535 bp = xlog_get_bp(1,log->l_mp);
536 if (!bp)
ce029dc1 537 return ENOMEM;
d321ceac
NS
538 if (*head_blk == 0) { /* special case */
539 if ((error = xlog_bread(log, 0, 1, bp)))
540 goto bread_err;
541 if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) == 0) {
542 *tail_blk = 0;
543 /* leave all other log inited values alone */
544 goto exit;
545 }
546 }
547
548 /*
549 * Search backwards looking for log record header block
550 */
551 ASSERT(*head_blk < INT_MAX);
552 for (i=(int)(*head_blk)-1; i>=0; i--) {
553 if ((error = xlog_bread(log, i, 1, bp)))
554 goto bread_err;
555 if (INT_GET(*(uint *)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) {
556 found = 1;
557 break;
558 }
559 }
560 /*
561 * If we haven't found the log record header block, start looking
562 * again from the end of the physical log. XXXmiken: There should be
563 * a check here to make sure we didn't search more than N blocks in
564 * the previous code.
565 */
566 if (!found) {
567 for (i=log->l_logBBsize-1; i>=(int)(*head_blk); i--) {
568 if ((error = xlog_bread(log, i, 1, bp)))
569 goto bread_err;
570 if (INT_GET(*(uint*)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) {
571 found = 2;
572 break;
573 }
574 }
575 }
576 if (!found) {
577 xlog_warn("XFS: xlog_find_tail: couldn't find sync record");
578 ASSERT(0);
579 return XFS_ERROR(EIO);
580 }
581
582 /* find blk_no of tail of log */
583 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(bp);
584 *tail_blk = BLOCK_LSN(rhead->h_tail_lsn, ARCH_CONVERT);
585
586 /*
587 * Reset log values according to the state of the log when we
588 * crashed. In the case where head_blk == 0, we bump curr_cycle
589 * one because the next write starts a new cycle rather than
590 * continuing the cycle of the last good log record. At this
591 * point we have guaranteed that all partial log records have been
592 * accounted for. Therefore, we know that the last good log record
593 * written was complete and ended exactly on the end boundary
594 * of the physical log.
595 */
596 log->l_prev_block = i;
597 log->l_curr_block = (int)*head_blk;
598 log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
599 if (found == 2)
600 log->l_curr_cycle++;
601 log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
602 log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
603 log->l_grant_reserve_cycle = log->l_curr_cycle;
604 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
605 log->l_grant_write_cycle = log->l_curr_cycle;
606 log->l_grant_write_bytes = BBTOB(log->l_curr_block);
607
608 /*
609 * Look for unmount record. If we find it, then we know there
610 * was a clean unmount. Since 'i' could be the last block in
611 * the physical log, we convert to a log block before comparing
612 * to the head_blk.
613 *
614 * Save the current tail lsn to use to pass to
615 * xlog_clear_stale_blocks() below. We won't want to clear the
616 * unmount record if there is one, so we pass the lsn of the
617 * unmount record rather than the block after it.
618 */
73bf5988
SL
619 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
620 int h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
621 int h_version = INT_GET(rhead->h_version, ARCH_CONVERT);
622 if ((h_version && XLOG_VERSION_2) &&
623 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
624 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
625 if (h_size % XLOG_HEADER_CYCLE_SIZE)
626 hblks++;
627 } else {
628 hblks = 1;
629 }
630 } else {
631 hblks = 1;
632 }
633 after_umount_blk = (i + hblks +
634 (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize;
d321ceac
NS
635 tail_lsn = log->l_tail_lsn;
636 if (*head_blk == after_umount_blk && INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
73bf5988 637 umount_data_blk = (i + hblks) % log->l_logBBsize;
d321ceac
NS
638 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
639 goto bread_err;
640 }
641 op_head = (xlog_op_header_t *)XFS_BUF_PTR(bp);
642 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
643 /*
644 * Set tail and last sync so that newly written
645 * log records will point recovery to after the
646 * current unmount record.
647 */
648 ASSIGN_ANY_LSN(log->l_tail_lsn, log->l_curr_cycle,
649 after_umount_blk, ARCH_NOCONVERT);
650 ASSIGN_ANY_LSN(log->l_last_sync_lsn, log->l_curr_cycle,
651 after_umount_blk, ARCH_NOCONVERT);
652 *tail_blk = after_umount_blk;
653 }
654 }
655
656#ifdef __KERNEL__
657 /*
658 * Make sure that there are no blocks in front of the head
659 * with the same cycle number as the head. This can happen
660 * because we allow multiple outstanding log writes concurrently,
661 * and the later writes might make it out before earlier ones.
662 *
663 * We use the lsn from before modifying it so that we'll never
664 * overwrite the unmount record after a clean unmount.
665 *
666 * Do this only if we are going to recover the filesystem
667 */
668 if (!readonly)
669 error = xlog_clear_stale_blocks(log, tail_lsn);
670#endif
671
672bread_err:
673exit:
674 xlog_put_bp(bp);
675
676 if (error)
677 xlog_warn("XFS: failed to locate log tail");
678
679 return error;
680} /* xlog_find_tail */
681
d321ceac
NS
682/*
683 * Is the log zeroed at all?
684 *
685 * The last binary search should be changed to perform an X block read
686 * once X becomes small enough. You can then search linearly through
687 * the X blocks. This will cut down on the number of reads we need to do.
688 *
689 * If the log is partially zeroed, this routine will pass back the blkno
690 * of the first block with cycle number 0. It won't have a complete LR
691 * preceding it.
692 *
693 * Return:
694 * 0 => the log is completely written to
695 * -1 => use *blk_no as the first block of the log
696 * >0 => error has occurred
697 */
698int
699xlog_find_zeroed(struct log *log,
700 xfs_daddr_t *blk_no)
701{
702 xfs_buf_t *bp;
703 uint first_cycle, last_cycle;
704 xfs_daddr_t new_blk, last_blk, start_blk;
705 xfs_daddr_t num_scan_bblks;
706 int error, log_bbnum = log->l_logBBsize;
707
708 error = 0;
709 /* check totally zeroed log */
710 bp = xlog_get_bp(1,log->l_mp);
711 if (!bp)
ce029dc1 712 return ENOMEM;
d321ceac
NS
713 if ((error = xlog_bread(log, 0, 1, bp)))
714 goto bp_err;
715 first_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
716 if (first_cycle == 0) { /* completely zeroed log */
717 *blk_no = 0;
718 xlog_put_bp(bp);
719 return -1;
720 }
721
722 /* check partially zeroed log */
723 if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
724 goto bp_err;
725 last_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
726 if (last_cycle != 0) { /* log completely written to */
727 xlog_put_bp(bp);
728 return 0;
729 } else if (first_cycle != 1) {
730 /*
731 * If the cycle of the last block is zero, the cycle of
732 * the first block must be 1. If it's not, maybe we're
733 * not looking at a log... Bail out.
734 */
735 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)");
736 return XFS_ERROR(EINVAL);
737 }
738
739 /* we have a partially zeroed log */
740 last_blk = log_bbnum-1;
741 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
742 goto bp_err;
743
744 /*
745 * Validate the answer. Because there is no way to guarantee that
746 * the entire log is made up of log records which are the same size,
747 * we scan over the defined maximum blocks. At this point, the maximum
748 * is not chosen to mean anything special. XXXmiken
749 */
73bf5988 750 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
d321ceac
NS
751 ASSERT(num_scan_bblks <= INT_MAX);
752
753 if (last_blk < num_scan_bblks)
754 num_scan_bblks = last_blk;
755 start_blk = last_blk - num_scan_bblks;
756
757 /*
758 * We search for any instances of cycle number 0 that occur before
759 * our current estimate of the head. What we're trying to detect is
760 * 1 ... | 0 | 1 | 0...
761 * ^ binary search ends here
762 */
ce029dc1
ES
763 if ((error = xlog_find_verify_cycle(log, start_blk,
764 (int)num_scan_bblks, 0, &new_blk)))
606d804d 765 goto bp_err;
ce029dc1
ES
766 if (new_blk != -1)
767 last_blk = new_blk;
d321ceac
NS
768
769 /*
770 * Potentially backup over partial log record write. We don't need
771 * to search the end of the log because we know it is zero.
772 */
773 if ((error = xlog_find_verify_log_record(log, start_blk,
79c48ada
ES
774 &last_blk, 0)) == -1) {
775 error = XFS_ERROR(EIO);
776 goto bp_err;
777 } else if (error)
d321ceac
NS
778 goto bp_err;
779
780 *blk_no = last_blk;
781bp_err:
782 xlog_put_bp(bp);
783 if (error)
784 return error;
785 return -1;
786} /* xlog_find_zeroed */
787
788/* stuff for transactional view */
789STATIC void
790xlog_unpack_data(xlog_rec_header_t *rhead,
791 xfs_caddr_t dp,
792 xlog_t *log)
793{
73bf5988
SL
794 int i, j, k;
795 union ich {
796 xlog_rec_header_t hic_header;
797 xlog_rec_ext_header_t hic_xheader;
798 char hic_sector[XLOG_HEADER_SIZE];
799 } *xhdr;
800
d321ceac
NS
801#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
802 uint *up = (uint *)dp;
803 uint chksum = 0;
804#endif
805
73bf5988
SL
806 for (i=0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
807 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
5ce1d1f7 808 *(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
d321ceac
NS
809 dp += BBSIZE;
810 }
73bf5988
SL
811
812 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
813 xhdr = (union ich*)rhead;
814 for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
815 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
816 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
817 *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
818 dp += BBSIZE;
819 }
820 }
821
d321ceac
NS
822#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
823 /* divide length by 4 to get # words */
824 for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) {
825 chksum ^= INT_GET(*up, ARCH_CONVERT);
826 up++;
827 }
828 if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) {
829 if (!INT_ISZERO(rhead->h_chksum, ARCH_CONVERT) ||
830 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
831 cmn_err(CE_DEBUG,
832 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)",
833 INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
834 cmn_err(CE_DEBUG,
835"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
73bf5988
SL
836 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
837 cmn_err(CE_DEBUG,
838 "XFS: LogR this is a LogV2 filesystem\n");
839 }
d321ceac
NS
840 log->l_flags |= XLOG_CHKSUM_MISMATCH;
841 }
842 }
843#endif /* DEBUG && XFS_LOUD_RECOVERY */
844} /* xlog_unpack_data */
845
d321ceac
NS
846STATIC xlog_recover_t *
847xlog_recover_find_tid(xlog_recover_t *q,
848 xlog_tid_t tid)
849{
850 xlog_recover_t *p = q;
851
852 while (p != NULL) {
853 if (p->r_log_tid == tid)
854 break;
855 p = p->r_next;
856 }
857 return p;
858} /* xlog_recover_find_tid */
859
860STATIC void
861xlog_recover_put_hashq(xlog_recover_t **q,
862 xlog_recover_t *trans)
863{
864 trans->r_next = *q;
865 *q = trans;
866} /* xlog_recover_put_hashq */
867
868STATIC void
869xlog_recover_new_tid(xlog_recover_t **q,
870 xlog_tid_t tid,
871 xfs_lsn_t lsn)
872{
873 xlog_recover_t *trans;
874
875 trans = kmem_zalloc(sizeof(xlog_recover_t), 0);
876 trans->r_log_tid = tid;
877 trans->r_lsn = lsn;
878 xlog_recover_put_hashq(q, trans);
879} /* xlog_recover_new_tid */
880
881
882STATIC int
883xlog_recover_unlink_tid(xlog_recover_t **q,
884 xlog_recover_t *trans)
885{
886 xlog_recover_t *tp;
887 int found = 0;
888
889 ASSERT(trans != 0);
890 if (trans == *q) {
891 *q = (*q)->r_next;
892 } else {
893 tp = *q;
894 while (tp != 0) {
895 if (tp->r_next == trans) {
896 found = 1;
897 break;
898 }
899 tp = tp->r_next;
900 }
901 if (!found) {
902 xlog_warn(
903 "XFS: xlog_recover_unlink_tid: trans not found");
904 ASSERT(0);
905 return XFS_ERROR(EIO);
906 }
907 tp->r_next = tp->r_next->r_next;
908 }
909 return 0;
910} /* xlog_recover_unlink_tid */
911
912/*
913 * Free up any resources allocated by the transaction
914 *
915 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
916 */
917STATIC void
918xlog_recover_free_trans(xlog_recover_t *trans)
919{
920 xlog_recover_item_t *first_item, *item, *free_item;
921 int i;
922
923 item = first_item = trans->r_itemq;
924 do {
925 free_item = item;
926 item = item->ri_next;
927 /* Free the regions in the item. */
928 for (i = 0; i < free_item->ri_cnt; i++) {
929 kmem_free(free_item->ri_buf[i].i_addr,
930 free_item->ri_buf[i].i_len);
931 }
932 /* Free the item itself */
933 kmem_free(free_item->ri_buf,
934 (free_item->ri_total * sizeof(xfs_log_iovec_t)));
935 kmem_free(free_item, sizeof(xlog_recover_item_t));
936 } while (first_item != item);
937 /* Free the transaction recover structure */
938 kmem_free(trans, sizeof(xlog_recover_t));
939} /* xlog_recover_free_trans */
940
941
942STATIC int
943xlog_recover_commit_trans(xlog_t *log,
944 xlog_recover_t **q,
945 xlog_recover_t *trans,
946 int pass)
947{
948 int error;
949
950 if ((error = xlog_recover_unlink_tid(q, trans)))
951 return error;
952 if ((error = xlog_recover_do_trans(log, trans, pass)))
953 return error;
954 xlog_recover_free_trans(trans); /* no error */
955 return 0;
956} /* xlog_recover_commit_trans */
957
958STATIC void
959xlog_recover_insert_item_backq(xlog_recover_item_t **q,
960 xlog_recover_item_t *item)
961{
962 if (*q == 0) {
963 item->ri_prev = item->ri_next = item;
964 *q = item;
965 } else {
966 item->ri_next = *q;
967 item->ri_prev = (*q)->ri_prev;
968 (*q)->ri_prev = item;
969 item->ri_prev->ri_next = item;
970 }
971} /* xlog_recover_insert_item_backq */
972
973STATIC void
974xlog_recover_add_item(xlog_recover_item_t **itemq)
975{
976 xlog_recover_item_t *item;
977
978 item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
979 xlog_recover_insert_item_backq(itemq, item);
980} /* xlog_recover_add_item */
981
982/* The next region to add is the start of a new region. It could be
983 * a whole region or it could be the first part of a new region. Because
984 * of this, the assumption here is that the type and size fields of all
985 * format structures fit into the first 32 bits of the structure.
986 *
987 * This works because all regions must be 32 bit aligned. Therefore, we
988 * either have both fields or we have neither field. In the case we have
989 * neither field, the data part of the region is zero length. We only have
990 * a log_op_header and can throw away the header since a new one will appear
991 * later. If we have at least 4 bytes, then we can determine how many regions
992 * will appear in the current log item.
993 */
994STATIC int
995xlog_recover_add_to_trans(xlog_recover_t *trans,
996 xfs_caddr_t dp,
997 int len)
998{
999 xfs_inode_log_format_t *in_f; /* any will do */
1000 xlog_recover_item_t *item;
1001 xfs_caddr_t ptr;
1002
1003 if (!len)
1004 return 0;
1005 ptr = kmem_zalloc(len, 0);
1006 bcopy(dp, ptr, len);
1007
1008 in_f = (xfs_inode_log_format_t *)ptr;
1009 item = trans->r_itemq;
1010 if (item == 0) {
1011 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
1012 if (len == sizeof(xfs_trans_header_t))
1013 xlog_recover_add_item(&trans->r_itemq);
1014 bcopy(dp, &trans->r_theader, len); /* s, d, l */
1015 return 0;
1016 }
1017 if (item->ri_prev->ri_total != 0 &&
1018 item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
1019 xlog_recover_add_item(&trans->r_itemq);
1020 }
1021 item = trans->r_itemq;
1022 item = item->ri_prev;
1023
1024 if (item->ri_total == 0) { /* first region to be added */
1025 item->ri_total = in_f->ilf_size;
1026 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
1027 item->ri_buf = kmem_zalloc((item->ri_total *
1028 sizeof(xfs_log_iovec_t)), 0);
1029 }
1030 ASSERT(item->ri_total > item->ri_cnt);
1031 /* Description region is ri_buf[0] */
1032 item->ri_buf[item->ri_cnt].i_addr = ptr;
1033 item->ri_buf[item->ri_cnt].i_len = len;
1034 item->ri_cnt++;
1035 return 0;
1036} /* xlog_recover_add_to_trans */
1037
1038STATIC int
1039xlog_recover_add_to_cont_trans(xlog_recover_t *trans,
1040 xfs_caddr_t dp,
1041 int len)
1042{
1043 xlog_recover_item_t *item;
1044 xfs_caddr_t ptr, old_ptr;
1045 int old_len;
1046
1047 item = trans->r_itemq;
1048 if (item == 0) {
1049 /* finish copying rest of trans header */
1050 xlog_recover_add_item(&trans->r_itemq);
1051 ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len;
1052 bcopy(dp, ptr, len); /* s, d, l */
1053 return 0;
1054 }
1055 item = item->ri_prev;
1056
1057 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1058 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1059
1060 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
1061 bcopy(dp , &ptr[old_len], len); /* s, d, l */
1062 item->ri_buf[item->ri_cnt-1].i_len += len;
1063 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1064 return 0;
1065} /* xlog_recover_add_to_cont_trans */
1066
1067STATIC int
1068xlog_recover_unmount_trans(xlog_recover_t *trans)
1069{
1070 /* Do nothing now */
1071 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
1072 return( 0 );
1073} /* xlog_recover_unmount_trans */
1074
1075
1076STATIC int
1077xlog_recover_process_data(xlog_t *log,
1078 xlog_recover_t *rhash[],
1079 xlog_rec_header_t *rhead,
1080 xfs_caddr_t dp,
1081 int pass)
1082{
1083 xfs_caddr_t lp = dp+INT_GET(rhead->h_len, ARCH_CONVERT);
1084 int num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
1085 xlog_op_header_t *ohead;
1086 xlog_recover_t *trans;
1087 xlog_tid_t tid;
1088 int error;
1089 unsigned long hash;
1090 uint flags;
1091
1092 /* check the log format matches our own - else we can't recover */
1093 if (xlog_header_check_recover(log->l_mp, rhead))
1094 return (XFS_ERROR(EIO));
1095
73bf5988 1096 while ((dp < lp) && num_logops) {
d321ceac
NS
1097 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
1098 ohead = (xlog_op_header_t *)dp;
1099 dp += sizeof(xlog_op_header_t);
1100 if (ohead->oh_clientid != XFS_TRANSACTION &&
1101 ohead->oh_clientid != XFS_LOG) {
1102 xlog_warn("XFS: xlog_recover_process_data: bad clientid");
1103 ASSERT(0);
1104 return (XFS_ERROR(EIO));
1105 }
1106 tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
1107 hash = XLOG_RHASH(tid);
1108 trans = xlog_recover_find_tid(rhash[hash], tid);
1109 if (trans == NULL) { /* not found; add new tid */
1110 if (ohead->oh_flags & XLOG_START_TRANS)
1111 xlog_recover_new_tid(&rhash[hash], tid, INT_GET(rhead->h_lsn, ARCH_CONVERT));
1112 } else {
1113 ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
1114 flags = ohead->oh_flags & ~XLOG_END_TRANS;
1115 if (flags & XLOG_WAS_CONT_TRANS)
1116 flags &= ~XLOG_CONTINUE_TRANS;
1117 switch (flags) {
1118 case XLOG_COMMIT_TRANS: {
1119 error = xlog_recover_commit_trans(log, &rhash[hash],
1120 trans, pass);
1121 break;
1122 }
1123 case XLOG_UNMOUNT_TRANS: {
1124 error = xlog_recover_unmount_trans(trans);
1125 break;
1126 }
1127 case XLOG_WAS_CONT_TRANS: {
1128 error = xlog_recover_add_to_cont_trans(trans, dp,
1129 INT_GET(ohead->oh_len, ARCH_CONVERT));
1130 break;
1131 }
1132 case XLOG_START_TRANS : {
1133 xlog_warn("XFS: xlog_recover_process_data: bad transaction");
1134 ASSERT(0);
1135 error = XFS_ERROR(EIO);
1136 break;
1137 }
1138 case 0:
1139 case XLOG_CONTINUE_TRANS: {
1140 error = xlog_recover_add_to_trans(trans, dp,
1141 INT_GET(ohead->oh_len, ARCH_CONVERT));
1142 break;
1143 }
1144 default: {
1145 xlog_warn("XFS: xlog_recover_process_data: bad flag");
1146 ASSERT(0);
1147 error = XFS_ERROR(EIO);
1148 break;
1149 }
1150 } /* switch */
1151 if (error)
1152 return error;
1153 } /* if */
1154 dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
1155 num_logops--;
1156 }
1157 return( 0 );
1158} /* xlog_recover_process_data */
1159
1160/*
1161 * Read the log from tail to head and process the log records found.
1162 * Handle the two cases where the tail and head are in the same cycle
1163 * and where the active portion of the log wraps around the end of
1164 * the physical log separately. The pass parameter is passed through
1165 * to the routines called to process the data and is not looked at
1166 * here.
1167 */
1168int
1169xlog_do_recovery_pass(xlog_t *log,
1170 xfs_daddr_t head_blk,
1171 xfs_daddr_t tail_blk,
1172 int pass)
1173{
1174 xlog_rec_header_t *rhead;
1175 xfs_daddr_t blk_no;
1176 xfs_caddr_t bufaddr;
1177 xfs_buf_t *hbp, *dbp;
73bf5988 1178 int error, h_size;
d321ceac 1179 int bblks, split_bblks;
73bf5988 1180 int hblks, split_hblks, wrapped_hblks;
d321ceac
NS
1181 xlog_recover_t *rhash[XLOG_RHASH_SIZE];
1182
1183 error = 0;
73bf5988
SL
1184
1185
1186 /*
1187 * Read the header of the tail block and get the iclog buffer size from
1188 * h_size. Use this to tell how many sectors make up the log header.
1189 */
1190 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
1191 /*
1192 * When using variable length iclogs, read first sector of iclog
1193 * header and extract the header size from it. Get a new hbp that
1194 * is the correct size.
1195 */
1196 hbp = xlog_get_bp(1, log->l_mp);
1197 if (!hbp)
1198 return ENOMEM;
1199 if ((error = xlog_bread(log, tail_blk, 1, hbp)))
1200 goto bread_err1;
1201 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1202 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) ==
1203 XLOG_HEADER_MAGIC_NUM);
1204 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) {
1205 xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number.");
1206 error = XFS_ERROR(EIO);
1207 goto bread_err1;
1208 }
1209 h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
1210
1211 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & XLOG_VERSION_2) &&
1212 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1213 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1214 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1215 hblks++;
1216 xlog_put_bp(hbp);
1217 hbp = xlog_get_bp(hblks, log->l_mp);
1218 } else {
1219 hblks=1;
1220 }
1221 } else {
1222 hblks=1;
1223 hbp = xlog_get_bp(1, log->l_mp);
1224 h_size = XLOG_BIG_RECORD_BSIZE;
1225 }
1226
d321ceac 1227 if (!hbp)
ce029dc1 1228 return ENOMEM;
73bf5988 1229 dbp = xlog_get_bp(BTOBB(h_size),log->l_mp);
d321ceac
NS
1230 if (!dbp) {
1231 xlog_put_bp(hbp);
ce029dc1 1232 return ENOMEM;
d321ceac 1233 }
73bf5988 1234
d321ceac
NS
1235 bzero(rhash, sizeof(rhash));
1236 if (tail_blk <= head_blk) {
1237 for (blk_no = tail_blk; blk_no < head_blk; ) {
73bf5988
SL
1238 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1239 goto bread_err2;
d321ceac
NS
1240 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1241 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1242 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
73bf5988
SL
1243 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) {
1244 xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number.");
1245 error = XFS_ERROR(EIO);
1246 goto bread_err2;
1247 }
d321ceac
NS
1248 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */
1249 if (bblks > 0) {
73bf5988
SL
1250 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
1251 goto bread_err2;
d321ceac
NS
1252 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1253 if ((error = xlog_recover_process_data(log, rhash,
1254 rhead, XFS_BUF_PTR(dbp),
1255 pass)))
73bf5988 1256 goto bread_err2;
d321ceac 1257 }
73bf5988 1258 blk_no += (bblks+hblks);
d321ceac
NS
1259 }
1260 } else {
1261 /*
1262 * Perform recovery around the end of the physical log. When the head
1263 * is not on the same cycle number as the tail, we can't do a sequential
1264 * recovery as above.
1265 */
1266 blk_no = tail_blk;
1267 while (blk_no < log->l_logBBsize) {
73bf5988
SL
1268 /*
1269 * Check for header wrapping around physical end-of-log
1270 */
1271 wrapped_hblks = 0;
1272 if (blk_no+hblks <= log->l_logBBsize) {
1273 /* Read header in one read */
1274 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1275 goto bread_err2;
1276 } else {
1277 /* This log record is split across physical end of log */
1278 split_hblks = 0;
1279 if (blk_no != log->l_logBBsize) {
1280 /* some data is before physical end of log */
1281 ASSERT(blk_no <= INT_MAX);
1282 split_hblks = log->l_logBBsize - (int)blk_no;
1283 ASSERT(split_hblks > 0);
1284 if ((error = xlog_bread(log, blk_no, split_hblks, hbp)))
1285 goto bread_err2;
1286 }
1287 bufaddr = XFS_BUF_PTR(hbp);
1288 XFS_BUF_SET_PTR(hbp, bufaddr + BBTOB(split_hblks),
1289 BBTOB(hblks - split_hblks));
1290 wrapped_hblks = hblks - split_hblks;
1291 if ((error = xlog_bread(log, 0, wrapped_hblks, hbp)))
1292 goto bread_err2;
1293 XFS_BUF_SET_PTR(hbp, bufaddr, hblks);
1294 }
d321ceac
NS
1295 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1296 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1297 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1298 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1299
1300 /* LR body must have data or it wouldn't have been written */
1301 ASSERT(bblks > 0);
73bf5988 1302 blk_no += hblks; /* successfully read header */
d321ceac
NS
1303
1304 if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) ||
1305 (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) ||
73bf5988 1306 (bblks <= 0)) {
d321ceac 1307 error = EFSCORRUPTED;
73bf5988 1308 goto bread_err2;
d321ceac
NS
1309 }
1310
1311 /* Read in data for log record */
1312 if (blk_no+bblks <= log->l_logBBsize) {
1313 if ((error = xlog_bread(log, blk_no, bblks, dbp)))
73bf5988 1314 goto bread_err2;
d321ceac
NS
1315 } else {
1316 /* This log record is split across physical end of log */
1317 split_bblks = 0;
1318 if (blk_no != log->l_logBBsize) {
1319
1320 /* some data is before physical end of log */
1321 ASSERT(blk_no <= INT_MAX);
1322 split_bblks = log->l_logBBsize - (int)blk_no;
1323 ASSERT(split_bblks > 0);
1324 if ((error = xlog_bread(log, blk_no, split_bblks, dbp)))
73bf5988 1325 goto bread_err2;
d321ceac
NS
1326 }
1327 bufaddr = XFS_BUF_PTR(dbp);
1328 XFS_BUF_SET_PTR(dbp, bufaddr + BBTOB(split_bblks),
1329 BBTOB(bblks - split_bblks));
73bf5988
SL
1330 if ((error = xlog_bread(log, wrapped_hblks,
1331 bblks - split_bblks, dbp)))
1332 goto bread_err2;
1333 XFS_BUF_SET_PTR(dbp, bufaddr, XLOG_BIG_RECORD_BSIZE);
d321ceac
NS
1334 }
1335 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1336 if ((error = xlog_recover_process_data(log, rhash,
1337 rhead, XFS_BUF_PTR(dbp),
1338 pass)))
73bf5988 1339 goto bread_err2;
d321ceac
NS
1340 blk_no += bblks;
1341 }
1342
1343 ASSERT(blk_no >= log->l_logBBsize);
1344 blk_no -= log->l_logBBsize;
1345
1346 /* read first part of physical log */
1347 while (blk_no < head_blk) {
73bf5988
SL
1348 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1349 goto bread_err2;
d321ceac
NS
1350 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1351 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1352 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1353 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1354 ASSERT(bblks > 0);
73bf5988
SL
1355 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
1356 goto bread_err2;
d321ceac
NS
1357 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1358 if ((error = xlog_recover_process_data(log, rhash,
1359 rhead, XFS_BUF_PTR(dbp),
1360 pass)))
73bf5988
SL
1361 goto bread_err2;
1362 blk_no += (bblks+hblks);
d321ceac
NS
1363 }
1364 }
1365
73bf5988 1366bread_err2:
d321ceac 1367 xlog_put_bp(dbp);
73bf5988 1368bread_err1:
d321ceac
NS
1369 xlog_put_bp(hbp);
1370
1371 return error;
1372}