]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxlog/xfs_log_recover.c
Fix libxfs device flush ioctl, sync with kernel source.
[thirdparty/xfsprogs-dev.git] / libxlog / xfs_log_recover.c
1 /*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33 #include <libxlog.h>
34
35 /*
36 * This routine finds (to an approximation) the first block in the physical
37 * log which contains the given cycle. It uses a binary search algorithm.
38 * Note that the algorithm can not be perfect because the disk will not
39 * necessarily be perfect.
40 */
41 int
42 xlog_find_cycle_start(xlog_t *log,
43 xfs_buf_t *bp,
44 xfs_daddr_t first_blk,
45 xfs_daddr_t *last_blk,
46 uint cycle)
47 {
48 xfs_daddr_t mid_blk;
49 uint mid_cycle;
50 int error;
51
52 mid_blk = BLK_AVG(first_blk, *last_blk);
53 while (mid_blk != first_blk && mid_blk != *last_blk) {
54 if ((error = xlog_bread(log, mid_blk, 1, bp)))
55 return error;
56 mid_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
57 if (mid_cycle == cycle) {
58 *last_blk = mid_blk;
59 /* last_half_cycle == mid_cycle */
60 } else {
61 first_blk = mid_blk;
62 /* first_half_cycle == mid_cycle */
63 }
64 mid_blk = BLK_AVG(first_blk, *last_blk);
65 }
66 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
67 (mid_blk == *last_blk && mid_blk-1 == first_blk));
68
69 return 0;
70 } /* xlog_find_cycle_start */
71
72
73 /*
74 * Check that the range of blocks does not contain the cycle number
75 * given. The scan needs to occur from front to back and the ptr into the
76 * region must be updated since a later routine will need to perform another
77 * test. If the region is completely good, we end up returning the same
78 * last block number.
79 *
80 * Set blkno to -1 if we encounter no errors. This is an invalid block number
81 * since we don't ever expect logs to get this large.
82 */
83
84 STATIC int
85 xlog_find_verify_cycle( xlog_t *log,
86 xfs_daddr_t start_blk,
87 int nbblks,
88 uint stop_on_cycle_no,
89 xfs_daddr_t *new_blk)
90 {
91 xfs_daddr_t i, j;
92 uint cycle;
93 xfs_buf_t *bp;
94 char *buf = NULL;
95 int error = 0;
96 xfs_daddr_t bufblks;
97
98 bufblks = 1 << ffs(nbblks);
99
100 while (!(bp = xlog_get_bp(bufblks, log->l_mp))) {
101 /* can't get enough memory to do everything in one big buffer */
102 bufblks >>= 1;
103 if (!bufblks)
104 return ENOMEM;
105 }
106
107
108 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
109 int bcount = min(bufblks, (start_blk + nbblks - i));
110
111 if ((error = xlog_bread(log, i, bcount, bp)))
112 goto out;
113
114 buf = XFS_BUF_PTR(bp);
115 for (j = 0; j < bcount; j++) {
116 cycle = GET_CYCLE(buf, ARCH_CONVERT);
117 if (cycle == stop_on_cycle_no) {
118 *new_blk = i+j;
119 goto out;
120 }
121
122 buf += BBSIZE;
123 }
124 }
125
126 *new_blk = -1;
127
128 out:
129 xlog_put_bp(bp);
130
131 return error;
132 } /* xlog_find_verify_cycle */
133
134
135 /*
136 * Potentially backup over partial log record write.
137 *
138 * In the typical case, last_blk is the number of the block directly after
139 * a good log record. Therefore, we subtract one to get the block number
140 * of the last block in the given buffer. extra_bblks contains the number
141 * of blocks we would have read on a previous read. This happens when the
142 * last log record is split over the end of the physical log.
143 *
144 * extra_bblks is the number of blocks potentially verified on a previous
145 * call to this routine.
146 */
147
148 STATIC int
149 xlog_find_verify_log_record(xlog_t *log,
150 xfs_daddr_t start_blk,
151 xfs_daddr_t *last_blk,
152 int extra_bblks)
153 {
154 xfs_daddr_t i;
155 xfs_buf_t *bp;
156 char *buf = NULL;
157 xlog_rec_header_t *head = NULL;
158 int error = 0;
159 int smallmem = 0;
160 int num_blks = *last_blk - start_blk;
161 int xhdrs;
162
163 ASSERT(start_blk != 0 || *last_blk != start_blk);
164
165 if (!(bp = xlog_get_bp(num_blks, log->l_mp))) {
166 if (!(bp = xlog_get_bp(1, log->l_mp)))
167 return ENOMEM;
168 smallmem = 1;
169 buf = XFS_BUF_PTR(bp);
170 } else {
171 if ((error = xlog_bread(log, start_blk, num_blks, bp)))
172 goto out;
173 buf = XFS_BUF_PTR(bp) + (num_blks - 1) * BBSIZE;
174 }
175
176
177 for (i=(*last_blk)-1; i>=0; i--) {
178 if (i < start_blk) {
179 /* legal log record not found */
180 xlog_warn("XFS: Log inconsistent (didn't find previous header)");
181 ASSERT(0);
182 error = XFS_ERROR(EIO);
183 goto out;
184 }
185
186 if (smallmem && (error = xlog_bread(log, i, 1, bp)))
187 goto out;
188 head = (xlog_rec_header_t*)buf;
189
190 if (INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM)
191 break;
192
193 if (!smallmem)
194 buf -= BBSIZE;
195 }
196
197 /*
198 * We hit the beginning of the physical log & still no header. Return
199 * to caller. If caller can handle a return of -1, then this routine
200 * will be called again for the end of the physical log.
201 */
202 if (i == -1) {
203 error = -1;
204 goto out;
205 }
206
207 /* we have the final block of the good log (the first block
208 * of the log record _before_ the head. So we check the uuid.
209 */
210
211 if ((error = xlog_header_check_mount(log->l_mp, head)))
212 goto out;
213
214 /*
215 * We may have found a log record header before we expected one.
216 * last_blk will be the 1st block # with a given cycle #. We may end
217 * up reading an entire log record. In this case, we don't want to
218 * reset last_blk. Only when last_blk points in the middle of a log
219 * record do we update last_blk.
220 */
221 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
222 int h_size = INT_GET(head->h_size, ARCH_CONVERT);
223 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
224 if (h_size % XLOG_HEADER_CYCLE_SIZE)
225 xhdrs++;
226 } else {
227 xhdrs = 1;
228 }
229
230 if (*last_blk - i + extra_bblks
231 != BTOBB(INT_GET(head->h_len, ARCH_CONVERT))+xhdrs)
232 *last_blk = i;
233
234 out:
235 xlog_put_bp(bp);
236
237 return error;
238 } /* xlog_find_verify_log_record */
239
240 /*
241 * Head is defined to be the point of the log where the next log write
242 * write could go. This means that incomplete LR writes at the end are
243 * eliminated when calculating the head. We aren't guaranteed that previous
244 * LR have complete transactions. We only know that a cycle number of
245 * current cycle number -1 won't be present in the log if we start writing
246 * from our current block number.
247 *
248 * last_blk contains the block number of the first block with a given
249 * cycle number.
250 *
251 * Also called from xfs_log_print.c
252 *
253 * Return: zero if normal, non-zero if error.
254 */
255 int
256 xlog_find_head(xlog_t *log,
257 xfs_daddr_t *return_head_blk)
258 {
259 xfs_buf_t *bp;
260 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
261 int num_scan_bblks;
262 uint first_half_cycle, last_half_cycle;
263 uint stop_on_cycle;
264 int error, log_bbnum = log->l_logBBsize;
265
266 /* Is the end of the log device zeroed? */
267 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
268 *return_head_blk = first_blk;
269
270 /* is the whole lot zeroed? */
271 if (!first_blk) {
272 /* Linux XFS shouldn't generate totally zeroed logs -
273 * mkfs etc write a dummy unmount record to a fresh
274 * log so we can store the uuid in there
275 */
276 xlog_warn("XFS: totally zeroed log\n");
277 }
278
279 return 0;
280 } else if (error) {
281 xlog_warn("XFS: empty log check failed");
282 return error;
283 }
284
285 first_blk = 0; /* get cycle # of 1st block */
286 bp = xlog_get_bp(1,log->l_mp);
287 if (!bp)
288 return ENOMEM;
289 if ((error = xlog_bread(log, 0, 1, bp)))
290 goto bp_err;
291 first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
292
293 last_blk = head_blk = log_bbnum-1; /* get cycle # of last block */
294 if ((error = xlog_bread(log, last_blk, 1, bp)))
295 goto bp_err;
296 last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
297 ASSERT(last_half_cycle != 0);
298
299 /*
300 * If the 1st half cycle number is equal to the last half cycle number,
301 * then the entire log is stamped with the same cycle number. In this
302 * case, head_blk can't be set to zero (which makes sense). The below
303 * math doesn't work out properly with head_blk equal to zero. Instead,
304 * we set it to log_bbnum which is an illegal block number, but this
305 * value makes the math correct. If head_blk doesn't changed through
306 * all the tests below, *head_blk is set to zero at the very end rather
307 * than log_bbnum. In a sense, log_bbnum and zero are the same block
308 * in a circular file.
309 */
310 if (first_half_cycle == last_half_cycle) {
311 /*
312 * In this case we believe that the entire log should have cycle
313 * number last_half_cycle. We need to scan backwards from the
314 * end verifying that there are no holes still containing
315 * last_half_cycle - 1. If we find such a hole, then the start
316 * of that hole will be the new head. The simple case looks like
317 * x | x ... | x - 1 | x
318 * Another case that fits this picture would be
319 * x | x + 1 | x ... | x
320 * In this case the head really is somwhere at the end of the
321 * log, as one of the latest writes at the beginning was incomplete.
322 * One more case is
323 * x | x + 1 | x ... | x - 1 | x
324 * This is really the combination of the above two cases, and the
325 * head has to end up at the start of the x-1 hole at the end of
326 * the log.
327 *
328 * In the 256k log case, we will read from the beginning to the
329 * end of the log and search for cycle numbers equal to x-1. We
330 * don't worry about the x+1 blocks that we encounter, because
331 * we know that they cannot be the head since the log started with
332 * x.
333 */
334 head_blk = log_bbnum;
335 stop_on_cycle = last_half_cycle - 1;
336 } else {
337 /*
338 * In this case we want to find the first block with cycle number
339 * matching last_half_cycle. We expect the log to be some
340 * variation on
341 * x + 1 ... | x ...
342 * The first block with cycle number x (last_half_cycle) will be
343 * where the new head belongs. First we do a binary search for
344 * the first occurrence of last_half_cycle. The binary search
345 * may not be totally accurate, so then we scan back from there
346 * looking for occurrences of last_half_cycle before us. If
347 * that backwards scan wraps around the beginning of the log,
348 * then we look for occurrences of last_half_cycle - 1 at the
349 * end of the log. The cases we're looking for look like
350 * x + 1 ... | x | x + 1 | x ...
351 * ^ binary search stopped here
352 * or
353 * x + 1 ... | x ... | x - 1 | x
354 * <---------> less than scan distance
355 */
356 stop_on_cycle = last_half_cycle;
357 if ((error = xlog_find_cycle_start(log, bp, first_blk,
358 &head_blk, last_half_cycle)))
359 goto bp_err;
360 }
361
362 /*
363 * Now validate the answer. Scan back some number of maximum possible
364 * blocks and make sure each one has the expected cycle number. The
365 * maximum is determined by the total possible amount of buffering
366 * in the in-core log. The following number can be made tighter if
367 * we actually look at the block size of the filesystem.
368 */
369 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
370 if (head_blk >= num_scan_bblks) {
371 /*
372 * We are guaranteed that the entire check can be performed
373 * in one buffer.
374 */
375 start_blk = head_blk - num_scan_bblks;
376 if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks,
377 stop_on_cycle, &new_blk)))
378 goto bp_err;
379 if (new_blk != -1)
380 head_blk = new_blk;
381 } else { /* need to read 2 parts of log */
382 /*
383 * We are going to scan backwards in the log in two parts. First
384 * we scan the physical end of the log. In this part of the log,
385 * we are looking for blocks with cycle number last_half_cycle - 1.
386 * If we find one, then we know that the log starts there, as we've
387 * found a hole that didn't get written in going around the end
388 * of the physical log. The simple case for this is
389 * x + 1 ... | x ... | x - 1 | x
390 * <---------> less than scan distance
391 * If all of the blocks at the end of the log have cycle number
392 * last_half_cycle, then we check the blocks at the start of the
393 * log looking for occurrences of last_half_cycle. If we find one,
394 * then our current estimate for the location of the first
395 * occurrence of last_half_cycle is wrong and we move back to the
396 * hole we've found. This case looks like
397 * x + 1 ... | x | x + 1 | x ...
398 * ^ binary search stopped here
399 * Another case we need to handle that only occurs in 256k logs is
400 * x + 1 ... | x ... | x+1 | x ...
401 * ^ binary search stops here
402 * In a 256k log, the scan at the end of the log will see the x+1
403 * blocks. We need to skip past those since that is certainly not
404 * the head of the log. By searching for last_half_cycle-1 we
405 * accomplish that.
406 */
407 start_blk = log_bbnum - num_scan_bblks + head_blk;
408 ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks-head_blk >= 0);
409 if ((error = xlog_find_verify_cycle(log, start_blk,
410 num_scan_bblks-(int)head_blk, (stop_on_cycle - 1),
411 &new_blk)))
412 goto bp_err;
413 if (new_blk != -1) {
414 head_blk = new_blk;
415 goto bad_blk;
416 }
417
418 /*
419 * Scan beginning of log now. The last part of the physical log
420 * is good. This scan needs to verify that it doesn't find the
421 * last_half_cycle.
422 */
423 start_blk = 0;
424 ASSERT(head_blk <= INT_MAX);
425 if ((error = xlog_find_verify_cycle(log, start_blk, (int) head_blk,
426 stop_on_cycle, &new_blk)))
427 goto bp_err;
428 if (new_blk != -1)
429 head_blk = new_blk;
430 }
431
432 bad_blk:
433 /*
434 * Now we need to make sure head_blk is not pointing to a block in
435 * the middle of a log record.
436 */
437 num_scan_bblks = BTOBB(XLOG_MAX_RECORD_BSIZE);
438 if (head_blk >= num_scan_bblks) {
439 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
440
441 /* start ptr at last block ptr before head_blk */
442 if ((error = xlog_find_verify_log_record(log,
443 start_blk,
444 &head_blk,
445 0)) == -1) {
446 error = XFS_ERROR(EIO);
447 goto bp_err;
448 } else if (error)
449 goto bp_err;
450 } else {
451 start_blk = 0;
452 ASSERT(head_blk <= INT_MAX);
453 if ((error = xlog_find_verify_log_record(log,
454 start_blk,
455 &head_blk,
456 0)) == -1) {
457 /* We hit the beginning of the log during our search */
458 start_blk = log_bbnum - num_scan_bblks + head_blk;
459 new_blk = log_bbnum;
460 ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0);
461 ASSERT(head_blk <= INT_MAX);
462 if ((error = xlog_find_verify_log_record(log,
463 start_blk,
464 &new_blk,
465 (int)head_blk)) == -1) {
466 error = XFS_ERROR(EIO);
467 goto bp_err;
468 } else if (error)
469 goto bp_err;
470 if (new_blk != log_bbnum)
471 head_blk = new_blk;
472 } else if (error)
473 goto bp_err;
474 }
475
476 xlog_put_bp(bp);
477 if (head_blk == log_bbnum)
478 *return_head_blk = 0;
479 else
480 *return_head_blk = head_blk;
481 /*
482 * When returning here, we have a good block number. Bad block
483 * means that during a previous crash, we didn't have a clean break
484 * from cycle number N to cycle number N-1. In this case, we need
485 * to find the first block with cycle number N-1.
486 */
487 return 0;
488
489 bp_err:
490 xlog_put_bp(bp);
491
492 if (error)
493 xlog_warn("XFS: failed to find log head");
494
495 return error;
496 } /* xlog_find_head */
497
498 /*
499 * Find the sync block number or the tail of the log.
500 *
501 * This will be the block number of the last record to have its
502 * associated buffers synced to disk. Every log record header has
503 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
504 * to get a sync block number. The only concern is to figure out which
505 * log record header to believe.
506 *
507 * The following algorithm uses the log record header with the largest
508 * lsn. The entire log record does not need to be valid. We only care
509 * that the header is valid.
510 *
511 * We could speed up search by using current head_blk buffer, but it is not
512 * available.
513 */
514 int
515 xlog_find_tail(xlog_t *log,
516 xfs_daddr_t *head_blk,
517 xfs_daddr_t *tail_blk,
518 int readonly)
519 {
520 xlog_rec_header_t *rhead;
521 xlog_op_header_t *op_head;
522 xfs_buf_t *bp;
523 int error, i, found;
524 xfs_daddr_t umount_data_blk;
525 xfs_daddr_t after_umount_blk;
526 xfs_lsn_t tail_lsn;
527 int hblks;
528
529 found = 0;
530
531 /*
532 * Find previous log record
533 */
534 if ((error = xlog_find_head(log, head_blk)))
535 return error;
536
537 bp = xlog_get_bp(1,log->l_mp);
538 if (!bp)
539 return ENOMEM;
540 if (*head_blk == 0) { /* special case */
541 if ((error = xlog_bread(log, 0, 1, bp)))
542 goto bread_err;
543 if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) == 0) {
544 *tail_blk = 0;
545 /* leave all other log inited values alone */
546 goto exit;
547 }
548 }
549
550 /*
551 * Search backwards looking for log record header block
552 */
553 ASSERT(*head_blk < INT_MAX);
554 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
555 if ((error = xlog_bread(log, i, 1, bp)))
556 goto bread_err;
557 if (XLOG_HEADER_MAGIC_NUM ==
558 INT_GET(*(uint *)(XFS_BUF_PTR(bp)), ARCH_CONVERT)) {
559 found = 1;
560 break;
561 }
562 }
563 /*
564 * If we haven't found the log record header block, start looking
565 * again from the end of the physical log. XXXmiken: There should be
566 * a check here to make sure we didn't search more than N blocks in
567 * the previous code.
568 */
569 if (!found) {
570 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
571 if ((error = xlog_bread(log, i, 1, bp)))
572 goto bread_err;
573 if (XLOG_HEADER_MAGIC_NUM ==
574 INT_GET(*(uint*)(XFS_BUF_PTR(bp)), ARCH_CONVERT)) {
575 found = 2;
576 break;
577 }
578 }
579 }
580 if (!found) {
581 xlog_warn("XFS: xlog_find_tail: couldn't find sync record");
582 ASSERT(0);
583 return XFS_ERROR(EIO);
584 }
585
586 /* find blk_no of tail of log */
587 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(bp);
588 *tail_blk = BLOCK_LSN(rhead->h_tail_lsn, ARCH_CONVERT);
589
590 /*
591 * Reset log values according to the state of the log when we
592 * crashed. In the case where head_blk == 0, we bump curr_cycle
593 * one because the next write starts a new cycle rather than
594 * continuing the cycle of the last good log record. At this
595 * point we have guaranteed that all partial log records have been
596 * accounted for. Therefore, we know that the last good log record
597 * written was complete and ended exactly on the end boundary
598 * of the physical log.
599 */
600 log->l_prev_block = i;
601 log->l_curr_block = (int)*head_blk;
602 log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
603 if (found == 2)
604 log->l_curr_cycle++;
605 log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
606 log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
607 log->l_grant_reserve_cycle = log->l_curr_cycle;
608 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
609 log->l_grant_write_cycle = log->l_curr_cycle;
610 log->l_grant_write_bytes = BBTOB(log->l_curr_block);
611
612 /*
613 * Look for unmount record. If we find it, then we know there
614 * was a clean unmount. Since 'i' could be the last block in
615 * the physical log, we convert to a log block before comparing
616 * to the head_blk.
617 *
618 * Save the current tail lsn to use to pass to
619 * xlog_clear_stale_blocks() below. We won't want to clear the
620 * unmount record if there is one, so we pass the lsn of the
621 * unmount record rather than the block after it.
622 */
623 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
624 int h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
625 int h_version = INT_GET(rhead->h_version, ARCH_CONVERT);
626
627 if ((h_version & XLOG_VERSION_2) &&
628 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
629 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
630 if (h_size % XLOG_HEADER_CYCLE_SIZE)
631 hblks++;
632 } else {
633 hblks = 1;
634 }
635 } else {
636 hblks = 1;
637 }
638 after_umount_blk = (i + hblks + (int)
639 BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize;
640 tail_lsn = log->l_tail_lsn;
641 if (*head_blk == after_umount_blk &&
642 INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
643 umount_data_blk = (i + hblks) % log->l_logBBsize;
644 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
645 goto bread_err;
646 }
647 op_head = (xlog_op_header_t *)XFS_BUF_PTR(bp);
648 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
649 /*
650 * Set tail and last sync so that newly written
651 * log records will point recovery to after the
652 * current unmount record.
653 */
654 ASSIGN_ANY_LSN(log->l_tail_lsn, log->l_curr_cycle,
655 after_umount_blk, ARCH_NOCONVERT);
656 ASSIGN_ANY_LSN(log->l_last_sync_lsn, log->l_curr_cycle,
657 after_umount_blk, ARCH_NOCONVERT);
658 *tail_blk = after_umount_blk;
659 }
660 }
661
662 #ifdef __KERNEL__
663 /*
664 * Make sure that there are no blocks in front of the head
665 * with the same cycle number as the head. This can happen
666 * because we allow multiple outstanding log writes concurrently,
667 * and the later writes might make it out before earlier ones.
668 *
669 * We use the lsn from before modifying it so that we'll never
670 * overwrite the unmount record after a clean unmount.
671 *
672 * Do this only if we are going to recover the filesystem
673 *
674 * NOTE: This used to say "if (!readonly)"
675 * However on Linux, we can & do recover a read-only filesystem.
676 * We only skip recovery if NORECOVERY is specified on mount,
677 * in which case we would not be here.
678 *
679 * But... if the -device- itself is readonly, just skip this.
680 * We can't recover this device anyway, so it won't matter.
681 */
682
683 if (!is_read_only(log->l_mp->m_logdev_targp->pbr_kdev)) {
684 error = xlog_clear_stale_blocks(log, tail_lsn);
685 }
686 #endif
687
688 bread_err:
689 exit:
690 xlog_put_bp(bp);
691
692 if (error)
693 xlog_warn("XFS: failed to locate log tail");
694
695 return error;
696 } /* xlog_find_tail */
697
698 /*
699 * Is the log zeroed at all?
700 *
701 * The last binary search should be changed to perform an X block read
702 * once X becomes small enough. You can then search linearly through
703 * the X blocks. This will cut down on the number of reads we need to do.
704 *
705 * If the log is partially zeroed, this routine will pass back the blkno
706 * of the first block with cycle number 0. It won't have a complete LR
707 * preceding it.
708 *
709 * Return:
710 * 0 => the log is completely written to
711 * -1 => use *blk_no as the first block of the log
712 * >0 => error has occurred
713 */
714 int
715 xlog_find_zeroed(struct log *log,
716 xfs_daddr_t *blk_no)
717 {
718 xfs_buf_t *bp;
719 uint first_cycle, last_cycle;
720 xfs_daddr_t new_blk, last_blk, start_blk;
721 xfs_daddr_t num_scan_bblks;
722 int error, log_bbnum = log->l_logBBsize;
723
724 error = 0;
725 /* check totally zeroed log */
726 bp = xlog_get_bp(1,log->l_mp);
727 if (!bp)
728 return ENOMEM;
729 if ((error = xlog_bread(log, 0, 1, bp)))
730 goto bp_err;
731 first_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
732 if (first_cycle == 0) { /* completely zeroed log */
733 *blk_no = 0;
734 xlog_put_bp(bp);
735 return -1;
736 }
737
738 /* check partially zeroed log */
739 if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
740 goto bp_err;
741 last_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
742 if (last_cycle != 0) { /* log completely written to */
743 xlog_put_bp(bp);
744 return 0;
745 } else if (first_cycle != 1) {
746 /*
747 * If the cycle of the last block is zero, the cycle of
748 * the first block must be 1. If it's not, maybe we're
749 * not looking at a log... Bail out.
750 */
751 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)");
752 return XFS_ERROR(EINVAL);
753 }
754
755 /* we have a partially zeroed log */
756 last_blk = log_bbnum-1;
757 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
758 goto bp_err;
759
760 /*
761 * Validate the answer. Because there is no way to guarantee that
762 * the entire log is made up of log records which are the same size,
763 * we scan over the defined maximum blocks. At this point, the maximum
764 * is not chosen to mean anything special. XXXmiken
765 */
766 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
767 ASSERT(num_scan_bblks <= INT_MAX);
768
769 if (last_blk < num_scan_bblks)
770 num_scan_bblks = last_blk;
771 start_blk = last_blk - num_scan_bblks;
772
773 /*
774 * We search for any instances of cycle number 0 that occur before
775 * our current estimate of the head. What we're trying to detect is
776 * 1 ... | 0 | 1 | 0...
777 * ^ binary search ends here
778 */
779 if ((error = xlog_find_verify_cycle(log, start_blk,
780 (int)num_scan_bblks, 0, &new_blk)))
781 goto bp_err;
782 if (new_blk != -1)
783 last_blk = new_blk;
784
785 /*
786 * Potentially backup over partial log record write. We don't need
787 * to search the end of the log because we know it is zero.
788 */
789 if ((error = xlog_find_verify_log_record(log, start_blk,
790 &last_blk, 0)) == -1) {
791 error = XFS_ERROR(EIO);
792 goto bp_err;
793 } else if (error)
794 goto bp_err;
795
796 *blk_no = last_blk;
797 bp_err:
798 xlog_put_bp(bp);
799 if (error)
800 return error;
801 return -1;
802 } /* xlog_find_zeroed */
803
804 /* stuff for transactional view */
805 STATIC void
806 xlog_unpack_data(xlog_rec_header_t *rhead,
807 xfs_caddr_t dp,
808 xlog_t *log)
809 {
810 int i, j, k;
811 union ich {
812 xlog_rec_header_t hic_header;
813 xlog_rec_ext_header_t hic_xheader;
814 char hic_sector[XLOG_HEADER_SIZE];
815 } *xhdr;
816
817 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
818 uint *up = (uint *)dp;
819 uint chksum = 0;
820 #endif
821
822 for (i=0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
823 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
824 *(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
825 dp += BBSIZE;
826 }
827
828 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
829 xhdr = (union ich*)rhead;
830 for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
831 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
832 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
833 *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
834 dp += BBSIZE;
835 }
836 }
837
838 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
839 /* divide length by 4 to get # words */
840 for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) {
841 chksum ^= INT_GET(*up, ARCH_CONVERT);
842 up++;
843 }
844 if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) {
845 if (!INT_ISZERO(rhead->h_chksum, ARCH_CONVERT) ||
846 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
847 cmn_err(CE_DEBUG,
848 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)",
849 INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
850 cmn_err(CE_DEBUG,
851 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
852 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
853 cmn_err(CE_DEBUG,
854 "XFS: LogR this is a LogV2 filesystem\n");
855 }
856 log->l_flags |= XLOG_CHKSUM_MISMATCH;
857 }
858 }
859 #endif /* DEBUG && XFS_LOUD_RECOVERY */
860 } /* xlog_unpack_data */
861
862 STATIC xlog_recover_t *
863 xlog_recover_find_tid(xlog_recover_t *q,
864 xlog_tid_t tid)
865 {
866 xlog_recover_t *p = q;
867
868 while (p != NULL) {
869 if (p->r_log_tid == tid)
870 break;
871 p = p->r_next;
872 }
873 return p;
874 } /* xlog_recover_find_tid */
875
876 STATIC void
877 xlog_recover_put_hashq(xlog_recover_t **q,
878 xlog_recover_t *trans)
879 {
880 trans->r_next = *q;
881 *q = trans;
882 } /* xlog_recover_put_hashq */
883
884 STATIC void
885 xlog_recover_new_tid(xlog_recover_t **q,
886 xlog_tid_t tid,
887 xfs_lsn_t lsn)
888 {
889 xlog_recover_t *trans;
890
891 trans = kmem_zalloc(sizeof(xlog_recover_t), 0);
892 trans->r_log_tid = tid;
893 trans->r_lsn = lsn;
894 xlog_recover_put_hashq(q, trans);
895 } /* xlog_recover_new_tid */
896
897
898 STATIC int
899 xlog_recover_unlink_tid(xlog_recover_t **q,
900 xlog_recover_t *trans)
901 {
902 xlog_recover_t *tp;
903 int found = 0;
904
905 ASSERT(trans != 0);
906 if (trans == *q) {
907 *q = (*q)->r_next;
908 } else {
909 tp = *q;
910 while (tp != 0) {
911 if (tp->r_next == trans) {
912 found = 1;
913 break;
914 }
915 tp = tp->r_next;
916 }
917 if (!found) {
918 xlog_warn(
919 "XFS: xlog_recover_unlink_tid: trans not found");
920 ASSERT(0);
921 return XFS_ERROR(EIO);
922 }
923 tp->r_next = tp->r_next->r_next;
924 }
925 return 0;
926 } /* xlog_recover_unlink_tid */
927
928 /*
929 * Free up any resources allocated by the transaction
930 *
931 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
932 */
933 STATIC void
934 xlog_recover_free_trans(xlog_recover_t *trans)
935 {
936 xlog_recover_item_t *first_item, *item, *free_item;
937 int i;
938
939 item = first_item = trans->r_itemq;
940 do {
941 free_item = item;
942 item = item->ri_next;
943 /* Free the regions in the item. */
944 for (i = 0; i < free_item->ri_cnt; i++) {
945 kmem_free(free_item->ri_buf[i].i_addr,
946 free_item->ri_buf[i].i_len);
947 }
948 /* Free the item itself */
949 kmem_free(free_item->ri_buf,
950 (free_item->ri_total * sizeof(xfs_log_iovec_t)));
951 kmem_free(free_item, sizeof(xlog_recover_item_t));
952 } while (first_item != item);
953 /* Free the transaction recover structure */
954 kmem_free(trans, sizeof(xlog_recover_t));
955 } /* xlog_recover_free_trans */
956
957
958 STATIC int
959 xlog_recover_commit_trans(xlog_t *log,
960 xlog_recover_t **q,
961 xlog_recover_t *trans,
962 int pass)
963 {
964 int error;
965
966 if ((error = xlog_recover_unlink_tid(q, trans)))
967 return error;
968 if ((error = xlog_recover_do_trans(log, trans, pass)))
969 return error;
970 xlog_recover_free_trans(trans); /* no error */
971 return 0;
972 } /* xlog_recover_commit_trans */
973
974 STATIC void
975 xlog_recover_insert_item_backq(xlog_recover_item_t **q,
976 xlog_recover_item_t *item)
977 {
978 if (*q == 0) {
979 item->ri_prev = item->ri_next = item;
980 *q = item;
981 } else {
982 item->ri_next = *q;
983 item->ri_prev = (*q)->ri_prev;
984 (*q)->ri_prev = item;
985 item->ri_prev->ri_next = item;
986 }
987 } /* xlog_recover_insert_item_backq */
988
989 STATIC void
990 xlog_recover_add_item(xlog_recover_item_t **itemq)
991 {
992 xlog_recover_item_t *item;
993
994 item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
995 xlog_recover_insert_item_backq(itemq, item);
996 } /* xlog_recover_add_item */
997
998 /* The next region to add is the start of a new region. It could be
999 * a whole region or it could be the first part of a new region. Because
1000 * of this, the assumption here is that the type and size fields of all
1001 * format structures fit into the first 32 bits of the structure.
1002 *
1003 * This works because all regions must be 32 bit aligned. Therefore, we
1004 * either have both fields or we have neither field. In the case we have
1005 * neither field, the data part of the region is zero length. We only have
1006 * a log_op_header and can throw away the header since a new one will appear
1007 * later. If we have at least 4 bytes, then we can determine how many regions
1008 * will appear in the current log item.
1009 */
1010 STATIC int
1011 xlog_recover_add_to_trans(xlog_recover_t *trans,
1012 xfs_caddr_t dp,
1013 int len)
1014 {
1015 xfs_inode_log_format_t *in_f; /* any will do */
1016 xlog_recover_item_t *item;
1017 xfs_caddr_t ptr;
1018
1019 if (!len)
1020 return 0;
1021 ptr = kmem_zalloc(len, 0);
1022 memcpy(ptr, dp, len);
1023
1024 in_f = (xfs_inode_log_format_t *)ptr;
1025 item = trans->r_itemq;
1026 if (item == 0) {
1027 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
1028 if (len == sizeof(xfs_trans_header_t))
1029 xlog_recover_add_item(&trans->r_itemq);
1030 memcpy(&trans->r_theader, dp, len); /* d, s, l */
1031 return 0;
1032 }
1033 if (item->ri_prev->ri_total != 0 &&
1034 item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
1035 xlog_recover_add_item(&trans->r_itemq);
1036 }
1037 item = trans->r_itemq;
1038 item = item->ri_prev;
1039
1040 if (item->ri_total == 0) { /* first region to be added */
1041 item->ri_total = in_f->ilf_size;
1042 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
1043 item->ri_buf = kmem_zalloc((item->ri_total *
1044 sizeof(xfs_log_iovec_t)), 0);
1045 }
1046 ASSERT(item->ri_total > item->ri_cnt);
1047 /* Description region is ri_buf[0] */
1048 item->ri_buf[item->ri_cnt].i_addr = ptr;
1049 item->ri_buf[item->ri_cnt].i_len = len;
1050 item->ri_cnt++;
1051 return 0;
1052 } /* xlog_recover_add_to_trans */
1053
1054 STATIC int
1055 xlog_recover_add_to_cont_trans(xlog_recover_t *trans,
1056 xfs_caddr_t dp,
1057 int len)
1058 {
1059 xlog_recover_item_t *item;
1060 xfs_caddr_t ptr, old_ptr;
1061 int old_len;
1062
1063 item = trans->r_itemq;
1064 if (item == 0) {
1065 /* finish copying rest of trans header */
1066 xlog_recover_add_item(&trans->r_itemq);
1067 ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len;
1068 memcpy(ptr, dp, len); /* d, s, l */
1069 return 0;
1070 }
1071 item = item->ri_prev;
1072
1073 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1074 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1075
1076 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
1077 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1078 item->ri_buf[item->ri_cnt-1].i_len += len;
1079 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1080 return 0;
1081 } /* xlog_recover_add_to_cont_trans */
1082
1083 STATIC int
1084 xlog_recover_unmount_trans(xlog_recover_t *trans)
1085 {
1086 /* Do nothing now */
1087 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
1088 return( 0 );
1089 } /* xlog_recover_unmount_trans */
1090
1091
1092 STATIC int
1093 xlog_recover_process_data(xlog_t *log,
1094 xlog_recover_t *rhash[],
1095 xlog_rec_header_t *rhead,
1096 xfs_caddr_t dp,
1097 int pass)
1098 {
1099 xfs_caddr_t lp = dp+INT_GET(rhead->h_len, ARCH_CONVERT);
1100 int num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
1101 xlog_op_header_t *ohead;
1102 xlog_recover_t *trans;
1103 xlog_tid_t tid;
1104 int error;
1105 unsigned long hash;
1106 uint flags;
1107
1108 /* check the log format matches our own - else we can't recover */
1109 if (xlog_header_check_recover(log->l_mp, rhead))
1110 return (XFS_ERROR(EIO));
1111
1112 while ((dp < lp) && num_logops) {
1113 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
1114 ohead = (xlog_op_header_t *)dp;
1115 dp += sizeof(xlog_op_header_t);
1116 if (ohead->oh_clientid != XFS_TRANSACTION &&
1117 ohead->oh_clientid != XFS_LOG) {
1118 xlog_warn("XFS: xlog_recover_process_data: bad clientid");
1119 ASSERT(0);
1120 return (XFS_ERROR(EIO));
1121 }
1122 tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
1123 hash = XLOG_RHASH(tid);
1124 trans = xlog_recover_find_tid(rhash[hash], tid);
1125 if (trans == NULL) { /* not found; add new tid */
1126 if (ohead->oh_flags & XLOG_START_TRANS)
1127 xlog_recover_new_tid(&rhash[hash], tid, INT_GET(rhead->h_lsn, ARCH_CONVERT));
1128 } else {
1129 ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
1130 flags = ohead->oh_flags & ~XLOG_END_TRANS;
1131 if (flags & XLOG_WAS_CONT_TRANS)
1132 flags &= ~XLOG_CONTINUE_TRANS;
1133 switch (flags) {
1134 case XLOG_COMMIT_TRANS: {
1135 error = xlog_recover_commit_trans(log, &rhash[hash],
1136 trans, pass);
1137 break;
1138 }
1139 case XLOG_UNMOUNT_TRANS: {
1140 error = xlog_recover_unmount_trans(trans);
1141 break;
1142 }
1143 case XLOG_WAS_CONT_TRANS: {
1144 error = xlog_recover_add_to_cont_trans(trans, dp,
1145 INT_GET(ohead->oh_len, ARCH_CONVERT));
1146 break;
1147 }
1148 case XLOG_START_TRANS : {
1149 xlog_warn("XFS: xlog_recover_process_data: bad transaction");
1150 ASSERT(0);
1151 error = XFS_ERROR(EIO);
1152 break;
1153 }
1154 case 0:
1155 case XLOG_CONTINUE_TRANS: {
1156 error = xlog_recover_add_to_trans(trans, dp,
1157 INT_GET(ohead->oh_len, ARCH_CONVERT));
1158 break;
1159 }
1160 default: {
1161 xlog_warn("XFS: xlog_recover_process_data: bad flag");
1162 ASSERT(0);
1163 error = XFS_ERROR(EIO);
1164 break;
1165 }
1166 } /* switch */
1167 if (error)
1168 return error;
1169 } /* if */
1170 dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
1171 num_logops--;
1172 }
1173 return( 0 );
1174 } /* xlog_recover_process_data */
1175
1176 /*
1177 * Read the log from tail to head and process the log records found.
1178 * Handle the two cases where the tail and head are in the same cycle
1179 * and where the active portion of the log wraps around the end of
1180 * the physical log separately. The pass parameter is passed through
1181 * to the routines called to process the data and is not looked at
1182 * here.
1183 */
1184 int
1185 xlog_do_recovery_pass(xlog_t *log,
1186 xfs_daddr_t head_blk,
1187 xfs_daddr_t tail_blk,
1188 int pass)
1189 {
1190 xlog_rec_header_t *rhead;
1191 xfs_daddr_t blk_no;
1192 xfs_caddr_t bufaddr;
1193 xfs_buf_t *hbp, *dbp;
1194 int error, h_size;
1195 int bblks, split_bblks;
1196 int hblks, split_hblks, wrapped_hblks;
1197 xlog_recover_t *rhash[XLOG_RHASH_SIZE];
1198
1199 error = 0;
1200
1201
1202 /*
1203 * Read the header of the tail block and get the iclog buffer size from
1204 * h_size. Use this to tell how many sectors make up the log header.
1205 */
1206 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
1207 /*
1208 * When using variable length iclogs, read first sector of iclog
1209 * header and extract the header size from it. Get a new hbp that
1210 * is the correct size.
1211 */
1212 hbp = xlog_get_bp(1, log->l_mp);
1213 if (!hbp)
1214 return ENOMEM;
1215 if ((error = xlog_bread(log, tail_blk, 1, hbp)))
1216 goto bread_err1;
1217 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1218 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) ==
1219 XLOG_HEADER_MAGIC_NUM);
1220 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) {
1221 xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number.");
1222 error = XFS_ERROR(EIO);
1223 goto bread_err1;
1224 }
1225 h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
1226
1227 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & XLOG_VERSION_2) &&
1228 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1229 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1230 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1231 hblks++;
1232 xlog_put_bp(hbp);
1233 hbp = xlog_get_bp(hblks, log->l_mp);
1234 } else {
1235 hblks=1;
1236 }
1237 } else {
1238 hblks=1;
1239 hbp = xlog_get_bp(1, log->l_mp);
1240 h_size = XLOG_BIG_RECORD_BSIZE;
1241 }
1242
1243 if (!hbp)
1244 return ENOMEM;
1245 dbp = xlog_get_bp(BTOBB(h_size),log->l_mp);
1246 if (!dbp) {
1247 xlog_put_bp(hbp);
1248 return ENOMEM;
1249 }
1250
1251 memset(rhash, 0, sizeof(rhash));
1252 if (tail_blk <= head_blk) {
1253 for (blk_no = tail_blk; blk_no < head_blk; ) {
1254 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1255 goto bread_err2;
1256 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1257 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1258 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1259 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */
1260
1261 if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) ||
1262 (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) ||
1263 (bblks <= 0) ||
1264 (blk_no > log->l_logBBsize)) {
1265 error = EFSCORRUPTED;
1266 goto bread_err2;
1267 }
1268
1269 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) {
1270 xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number.");
1271 error = XFS_ERROR(EIO);
1272 goto bread_err2;
1273 }
1274 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */
1275 if (bblks > 0) {
1276 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
1277 goto bread_err2;
1278 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1279 if ((error = xlog_recover_process_data(log, rhash,
1280 rhead, XFS_BUF_PTR(dbp),
1281 pass)))
1282 goto bread_err2;
1283 }
1284 blk_no += (bblks+hblks);
1285 }
1286 } else {
1287 /*
1288 * Perform recovery around the end of the physical log. When the head
1289 * is not on the same cycle number as the tail, we can't do a sequential
1290 * recovery as above.
1291 */
1292 blk_no = tail_blk;
1293 while (blk_no < log->l_logBBsize) {
1294 /*
1295 * Check for header wrapping around physical end-of-log
1296 */
1297 wrapped_hblks = 0;
1298 if (blk_no+hblks <= log->l_logBBsize) {
1299 /* Read header in one read */
1300 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1301 goto bread_err2;
1302 } else {
1303 /* This log record is split across physical end of log */
1304 split_hblks = 0;
1305 if (blk_no != log->l_logBBsize) {
1306 /* some data is before physical end of log */
1307 ASSERT(blk_no <= INT_MAX);
1308 split_hblks = log->l_logBBsize - (int)blk_no;
1309 ASSERT(split_hblks > 0);
1310 if ((error = xlog_bread(log, blk_no, split_hblks, hbp)))
1311 goto bread_err2;
1312 }
1313 bufaddr = XFS_BUF_PTR(hbp);
1314 XFS_BUF_SET_PTR(hbp, bufaddr + BBTOB(split_hblks),
1315 BBTOB(hblks - split_hblks));
1316 wrapped_hblks = hblks - split_hblks;
1317 if ((error = xlog_bread(log, 0, wrapped_hblks, hbp)))
1318 goto bread_err2;
1319 XFS_BUF_SET_PTR(hbp, bufaddr, hblks);
1320 }
1321 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1322 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1323 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1324 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1325
1326 /* LR body must have data or it wouldn't have been written */
1327 ASSERT(bblks > 0);
1328 blk_no += hblks; /* successfully read header */
1329
1330 if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) ||
1331 (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) ||
1332 (bblks <= 0)) {
1333 error = EFSCORRUPTED;
1334 goto bread_err2;
1335 }
1336
1337 /* Read in data for log record */
1338 if (blk_no+bblks <= log->l_logBBsize) {
1339 if ((error = xlog_bread(log, blk_no, bblks, dbp)))
1340 goto bread_err2;
1341 } else {
1342 /* This log record is split across physical end of log */
1343 split_bblks = 0;
1344 if (blk_no != log->l_logBBsize) {
1345
1346 /* some data is before physical end of log */
1347 ASSERT(blk_no <= INT_MAX);
1348 split_bblks = log->l_logBBsize - (int)blk_no;
1349 ASSERT(split_bblks > 0);
1350 if ((error = xlog_bread(log, blk_no, split_bblks, dbp)))
1351 goto bread_err2;
1352 }
1353 bufaddr = XFS_BUF_PTR(dbp);
1354 XFS_BUF_SET_PTR(dbp, bufaddr + BBTOB(split_bblks),
1355 BBTOB(bblks - split_bblks));
1356 if ((error = xlog_bread(log, wrapped_hblks,
1357 bblks - split_bblks, dbp)))
1358 goto bread_err2;
1359 XFS_BUF_SET_PTR(dbp, bufaddr, XLOG_BIG_RECORD_BSIZE);
1360 }
1361 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1362 if ((error = xlog_recover_process_data(log, rhash,
1363 rhead, XFS_BUF_PTR(dbp),
1364 pass)))
1365 goto bread_err2;
1366 blk_no += bblks;
1367 }
1368
1369 ASSERT(blk_no >= log->l_logBBsize);
1370 blk_no -= log->l_logBBsize;
1371
1372 /* read first part of physical log */
1373 while (blk_no < head_blk) {
1374 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1375 goto bread_err2;
1376 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1377 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1378 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1379 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1380 ASSERT(bblks > 0);
1381 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
1382 goto bread_err2;
1383 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1384 if ((error = xlog_recover_process_data(log, rhash,
1385 rhead, XFS_BUF_PTR(dbp),
1386 pass)))
1387 goto bread_err2;
1388 blk_no += (bblks+hblks);
1389 }
1390 }
1391
1392 bread_err2:
1393 xlog_put_bp(dbp);
1394 bread_err1:
1395 xlog_put_bp(hbp);
1396
1397 return error;
1398 }