]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxlog/xfs_log_recover.c
white space cleanup
[thirdparty/xfsprogs-dev.git] / libxlog / xfs_log_recover.c
CommitLineData
d321ceac 1/*
0d3e0b37 2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
5000d01d 3 *
d321ceac
NS
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
5000d01d 7 *
d321ceac
NS
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5000d01d 11 *
d321ceac
NS
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
5000d01d 14 * or the like. Any license provided herein, whether implied or
d321ceac
NS
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
5000d01d 18 *
d321ceac
NS
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
5000d01d 22 *
d321ceac
NS
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
5000d01d
SL
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
d321ceac
NS
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33#include <libxlog.h>
34
35/*
36 * This routine finds (to an approximation) the first block in the physical
5000d01d 37 * log which contains the given cycle. It uses a binary search algorithm.
d321ceac
NS
38 * Note that the algorithm can not be perfect because the disk will not
39 * necessarily be perfect.
40 */
41int
42xlog_find_cycle_start(xlog_t *log,
5000d01d 43 xfs_buf_t *bp,
d321ceac
NS
44 xfs_daddr_t first_blk,
45 xfs_daddr_t *last_blk,
46 uint cycle)
47{
48 xfs_daddr_t mid_blk;
49 uint mid_cycle;
50 int error;
51
52 mid_blk = BLK_AVG(first_blk, *last_blk);
53 while (mid_blk != first_blk && mid_blk != *last_blk) {
54 if ((error = xlog_bread(log, mid_blk, 1, bp)))
55 return error;
56 mid_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
57 if (mid_cycle == cycle) {
58 *last_blk = mid_blk;
59 /* last_half_cycle == mid_cycle */
60 } else {
61 first_blk = mid_blk;
62 /* first_half_cycle == mid_cycle */
63 }
64 mid_blk = BLK_AVG(first_blk, *last_blk);
65 }
66 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
67 (mid_blk == *last_blk && mid_blk-1 == first_blk));
68
69 return 0;
70} /* xlog_find_cycle_start */
71
72
73/*
74 * Check that the range of blocks does not contain the cycle number
75 * given. The scan needs to occur from front to back and the ptr into the
76 * region must be updated since a later routine will need to perform another
77 * test. If the region is completely good, we end up returning the same
78 * last block number.
79 *
ce029dc1 80 * Set blkno to -1 if we encounter no errors. This is an invalid block number
d321ceac
NS
81 * since we don't ever expect logs to get this large.
82 */
83
ce029dc1 84STATIC int
5000d01d
SL
85xlog_find_verify_cycle( xlog_t *log,
86 xfs_daddr_t start_blk,
87 int nbblks,
88 uint stop_on_cycle_no,
ce029dc1 89 xfs_daddr_t *new_blk)
d321ceac 90{
ce029dc1 91 xfs_daddr_t i, j;
d321ceac 92 uint cycle;
5000d01d
SL
93 xfs_buf_t *bp;
94 char *buf = NULL;
95 int error = 0;
85a875e9
ES
96 xfs_daddr_t bufblks;
97
98 bufblks = 1 << ffs(nbblks);
d321ceac
NS
99
100 while (!(bp = xlog_get_bp(bufblks, log->l_mp))) {
5000d01d 101 /* can't get enough memory to do everything in one big buffer */
d321ceac 102 bufblks >>= 1;
5000d01d
SL
103 if (!bufblks)
104 return ENOMEM;
105 }
106
d321ceac
NS
107
108 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
109 int bcount = min(bufblks, (start_blk + nbblks - i));
110
5000d01d
SL
111 if ((error = xlog_bread(log, i, bcount, bp)))
112 goto out;
d321ceac
NS
113
114 buf = XFS_BUF_PTR(bp);
115 for (j = 0; j < bcount; j++) {
116 cycle = GET_CYCLE(buf, ARCH_CONVERT);
117 if (cycle == stop_on_cycle_no) {
e56fcdce 118 *new_blk = i+j;
d321ceac
NS
119 goto out;
120 }
5000d01d
SL
121
122 buf += BBSIZE;
d321ceac
NS
123 }
124 }
125
ce029dc1 126 *new_blk = -1;
d321ceac
NS
127
128out:
129 xlog_put_bp(bp);
130
131 return error;
132} /* xlog_find_verify_cycle */
133
134
135/*
136 * Potentially backup over partial log record write.
137 *
138 * In the typical case, last_blk is the number of the block directly after
139 * a good log record. Therefore, we subtract one to get the block number
140 * of the last block in the given buffer. extra_bblks contains the number
141 * of blocks we would have read on a previous read. This happens when the
142 * last log record is split over the end of the physical log.
143 *
144 * extra_bblks is the number of blocks potentially verified on a previous
145 * call to this routine.
146 */
147
148STATIC int
149xlog_find_verify_log_record(xlog_t *log,
5000d01d
SL
150 xfs_daddr_t start_blk,
151 xfs_daddr_t *last_blk,
d321ceac
NS
152 int extra_bblks)
153{
5000d01d 154 xfs_daddr_t i;
d321ceac 155 xfs_buf_t *bp;
5000d01d
SL
156 char *buf = NULL;
157 xlog_rec_header_t *head = NULL;
158 int error = 0;
159 int smallmem = 0;
160 int num_blks = *last_blk - start_blk;
73bf5988 161 int xhdrs;
d321ceac
NS
162
163 ASSERT(start_blk != 0 || *last_blk != start_blk);
164
165 if (!(bp = xlog_get_bp(num_blks, log->l_mp))) {
5000d01d
SL
166 if (!(bp = xlog_get_bp(1, log->l_mp)))
167 return ENOMEM;
168 smallmem = 1;
169 buf = XFS_BUF_PTR(bp);
d321ceac
NS
170 } else {
171 if ((error = xlog_bread(log, start_blk, num_blks, bp)))
172 goto out;
5000d01d 173 buf = XFS_BUF_PTR(bp) + (num_blks - 1) * BBSIZE;
d321ceac 174 }
5000d01d 175
d321ceac
NS
176
177 for (i=(*last_blk)-1; i>=0; i--) {
178 if (i < start_blk) {
179 /* legal log record not found */
180 xlog_warn("XFS: Log inconsistent (didn't find previous header)");
d321ceac 181 ASSERT(0);
d321ceac
NS
182 error = XFS_ERROR(EIO);
183 goto out;
184 }
185
186 if (smallmem && (error = xlog_bread(log, i, 1, bp)))
187 goto out;
5000d01d
SL
188 head = (xlog_rec_header_t*)buf;
189
d321ceac
NS
190 if (INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM)
191 break;
5000d01d
SL
192
193 if (!smallmem)
194 buf -= BBSIZE;
d321ceac
NS
195 }
196
197 /*
198 * We hit the beginning of the physical log & still no header. Return
199 * to caller. If caller can handle a return of -1, then this routine
200 * will be called again for the end of the physical log.
201 */
202 if (i == -1) {
5000d01d 203 error = -1;
d321ceac
NS
204 goto out;
205 }
206
207 /* we have the final block of the good log (the first block
208 * of the log record _before_ the head. So we check the uuid.
209 */
5000d01d 210
d321ceac 211 if ((error = xlog_header_check_mount(log->l_mp, head)))
5000d01d
SL
212 goto out;
213
d321ceac
NS
214 /*
215 * We may have found a log record header before we expected one.
216 * last_blk will be the 1st block # with a given cycle #. We may end
5000d01d
SL
217 * up reading an entire log record. In this case, we don't want to
218 * reset last_blk. Only when last_blk points in the middle of a log
d321ceac
NS
219 * record do we update last_blk.
220 */
73bf5988
SL
221 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
222 int h_size = INT_GET(head->h_size, ARCH_CONVERT);
223 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
224 if (h_size % XLOG_HEADER_CYCLE_SIZE)
225 xhdrs++;
226 } else {
227 xhdrs = 1;
228 }
229
5000d01d
SL
230 if (*last_blk - i + extra_bblks
231 != BTOBB(INT_GET(head->h_len, ARCH_CONVERT))+xhdrs)
d321ceac
NS
232 *last_blk = i;
233
234out:
235 xlog_put_bp(bp);
236
237 return error;
238} /* xlog_find_verify_log_record */
239
240/*
241 * Head is defined to be the point of the log where the next log write
242 * write could go. This means that incomplete LR writes at the end are
243 * eliminated when calculating the head. We aren't guaranteed that previous
5000d01d 244 * LR have complete transactions. We only know that a cycle number of
d321ceac
NS
245 * current cycle number -1 won't be present in the log if we start writing
246 * from our current block number.
247 *
248 * last_blk contains the block number of the first block with a given
249 * cycle number.
250 *
251 * Also called from xfs_log_print.c
252 *
253 * Return: zero if normal, non-zero if error.
254 */
255int
256xlog_find_head(xlog_t *log,
257 xfs_daddr_t *return_head_blk)
258{
5000d01d 259 xfs_buf_t *bp;
d321ceac 260 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
5000d01d 261 int num_scan_bblks;
d321ceac
NS
262 uint first_half_cycle, last_half_cycle;
263 uint stop_on_cycle;
5000d01d 264 int error, log_bbnum = log->l_logBBsize;
d321ceac
NS
265
266 /* Is the end of the log device zeroed? */
267 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
268 *return_head_blk = first_blk;
5000d01d
SL
269
270 /* is the whole lot zeroed? */
271 if (!first_blk) {
272 /* Linux XFS shouldn't generate totally zeroed logs -
273 * mkfs etc write a dummy unmount record to a fresh
274 * log so we can store the uuid in there
275 */
276 xlog_warn("XFS: totally zeroed log\n");
277 }
278
d321ceac
NS
279 return 0;
280 } else if (error) {
5000d01d 281 xlog_warn("XFS: empty log check failed");
d321ceac
NS
282 return error;
283 }
284
285 first_blk = 0; /* get cycle # of 1st block */
286 bp = xlog_get_bp(1,log->l_mp);
287 if (!bp)
ce029dc1 288 return ENOMEM;
d321ceac
NS
289 if ((error = xlog_bread(log, 0, 1, bp)))
290 goto bp_err;
291 first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
292
293 last_blk = head_blk = log_bbnum-1; /* get cycle # of last block */
294 if ((error = xlog_bread(log, last_blk, 1, bp)))
295 goto bp_err;
296 last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
297 ASSERT(last_half_cycle != 0);
298
299 /*
300 * If the 1st half cycle number is equal to the last half cycle number,
301 * then the entire log is stamped with the same cycle number. In this
5000d01d 302 * case, head_blk can't be set to zero (which makes sense). The below
d321ceac
NS
303 * math doesn't work out properly with head_blk equal to zero. Instead,
304 * we set it to log_bbnum which is an illegal block number, but this
305 * value makes the math correct. If head_blk doesn't changed through
306 * all the tests below, *head_blk is set to zero at the very end rather
5000d01d 307 * than log_bbnum. In a sense, log_bbnum and zero are the same block
d321ceac
NS
308 * in a circular file.
309 */
310 if (first_half_cycle == last_half_cycle) {
311 /*
312 * In this case we believe that the entire log should have cycle
313 * number last_half_cycle. We need to scan backwards from the
314 * end verifying that there are no holes still containing
5000d01d 315 * last_half_cycle - 1. If we find such a hole, then the start
d321ceac 316 * of that hole will be the new head. The simple case looks like
5000d01d 317 * x | x ... | x - 1 | x
d321ceac 318 * Another case that fits this picture would be
5000d01d 319 * x | x + 1 | x ... | x
d321ceac
NS
320 * In this case the head really is somwhere at the end of the
321 * log, as one of the latest writes at the beginning was incomplete.
322 * One more case is
5000d01d 323 * x | x + 1 | x ... | x - 1 | x
d321ceac
NS
324 * This is really the combination of the above two cases, and the
325 * head has to end up at the start of the x-1 hole at the end of
326 * the log.
5000d01d 327 *
d321ceac
NS
328 * In the 256k log case, we will read from the beginning to the
329 * end of the log and search for cycle numbers equal to x-1. We
330 * don't worry about the x+1 blocks that we encounter, because
331 * we know that they cannot be the head since the log started with
332 * x.
333 */
334 head_blk = log_bbnum;
335 stop_on_cycle = last_half_cycle - 1;
336 } else {
337 /*
338 * In this case we want to find the first block with cycle number
339 * matching last_half_cycle. We expect the log to be some
340 * variation on
5000d01d 341 * x + 1 ... | x ...
d321ceac 342 * The first block with cycle number x (last_half_cycle) will be
5000d01d 343 * where the new head belongs. First we do a binary search for
d321ceac
NS
344 * the first occurrence of last_half_cycle. The binary search
345 * may not be totally accurate, so then we scan back from there
346 * looking for occurrences of last_half_cycle before us. If
347 * that backwards scan wraps around the beginning of the log,
348 * then we look for occurrences of last_half_cycle - 1 at the
349 * end of the log. The cases we're looking for look like
5000d01d
SL
350 * x + 1 ... | x | x + 1 | x ...
351 * ^ binary search stopped here
d321ceac 352 * or
5000d01d
SL
353 * x + 1 ... | x ... | x - 1 | x
354 * <---------> less than scan distance
d321ceac
NS
355 */
356 stop_on_cycle = last_half_cycle;
357 if ((error = xlog_find_cycle_start(log, bp, first_blk,
358 &head_blk, last_half_cycle)))
359 goto bp_err;
360 }
361
362 /*
5000d01d 363 * Now validate the answer. Scan back some number of maximum possible
d321ceac
NS
364 * blocks and make sure each one has the expected cycle number. The
365 * maximum is determined by the total possible amount of buffering
366 * in the in-core log. The following number can be made tighter if
367 * we actually look at the block size of the filesystem.
368 */
73bf5988 369 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
d321ceac
NS
370 if (head_blk >= num_scan_bblks) {
371 /*
372 * We are guaranteed that the entire check can be performed
373 * in one buffer.
374 */
375 start_blk = head_blk - num_scan_bblks;
ce029dc1
ES
376 if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks,
377 stop_on_cycle, &new_blk)))
606d804d 378 goto bp_err;
ce029dc1
ES
379 if (new_blk != -1)
380 head_blk = new_blk;
d321ceac 381 } else { /* need to read 2 parts of log */
5000d01d 382 /*
d321ceac 383 * We are going to scan backwards in the log in two parts. First
5000d01d 384 * we scan the physical end of the log. In this part of the log,
d321ceac
NS
385 * we are looking for blocks with cycle number last_half_cycle - 1.
386 * If we find one, then we know that the log starts there, as we've
387 * found a hole that didn't get written in going around the end
5000d01d
SL
388 * of the physical log. The simple case for this is
389 * x + 1 ... | x ... | x - 1 | x
390 * <---------> less than scan distance
d321ceac
NS
391 * If all of the blocks at the end of the log have cycle number
392 * last_half_cycle, then we check the blocks at the start of the
393 * log looking for occurrences of last_half_cycle. If we find one,
394 * then our current estimate for the location of the first
395 * occurrence of last_half_cycle is wrong and we move back to the
396 * hole we've found. This case looks like
5000d01d
SL
397 * x + 1 ... | x | x + 1 | x ...
398 * ^ binary search stopped here
d321ceac 399 * Another case we need to handle that only occurs in 256k logs is
5000d01d
SL
400 * x + 1 ... | x ... | x+1 | x ...
401 * ^ binary search stops here
d321ceac
NS
402 * In a 256k log, the scan at the end of the log will see the x+1
403 * blocks. We need to skip past those since that is certainly not
5000d01d 404 * the head of the log. By searching for last_half_cycle-1 we
d321ceac
NS
405 * accomplish that.
406 */
407 start_blk = log_bbnum - num_scan_bblks + head_blk;
408 ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks-head_blk >= 0);
ce029dc1
ES
409 if ((error = xlog_find_verify_cycle(log, start_blk,
410 num_scan_bblks-(int)head_blk, (stop_on_cycle - 1),
411 &new_blk)))
5000d01d 412 goto bp_err;
ce029dc1 413 if (new_blk != -1) {
d321ceac
NS
414 head_blk = new_blk;
415 goto bad_blk;
416 }
417
418 /*
419 * Scan beginning of log now. The last part of the physical log
420 * is good. This scan needs to verify that it doesn't find the
421 * last_half_cycle.
422 */
423 start_blk = 0;
424 ASSERT(head_blk <= INT_MAX);
ce029dc1
ES
425 if ((error = xlog_find_verify_cycle(log, start_blk, (int) head_blk,
426 stop_on_cycle, &new_blk)))
427 goto bp_err;
428 if (new_blk != -1)
d321ceac
NS
429 head_blk = new_blk;
430 }
431
432bad_blk:
433 /*
434 * Now we need to make sure head_blk is not pointing to a block in
435 * the middle of a log record.
436 */
437 num_scan_bblks = BTOBB(XLOG_MAX_RECORD_BSIZE);
438 if (head_blk >= num_scan_bblks) {
5000d01d 439 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
d321ceac
NS
440
441 /* start ptr at last block ptr before head_blk */
442 if ((error = xlog_find_verify_log_record(log,
443 start_blk,
444 &head_blk,
445 0)) == -1) {
446 error = XFS_ERROR(EIO);
447 goto bp_err;
448 } else if (error)
449 goto bp_err;
450 } else {
451 start_blk = 0;
452 ASSERT(head_blk <= INT_MAX);
453 if ((error = xlog_find_verify_log_record(log,
454 start_blk,
455 &head_blk,
456 0)) == -1) {
457 /* We hit the beginning of the log during our search */
458 start_blk = log_bbnum - num_scan_bblks + head_blk;
459 new_blk = log_bbnum;
460 ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0);
461 ASSERT(head_blk <= INT_MAX);
462 if ((error = xlog_find_verify_log_record(log,
463 start_blk,
464 &new_blk,
465 (int)head_blk)) == -1) {
466 error = XFS_ERROR(EIO);
467 goto bp_err;
468 } else if (error)
469 goto bp_err;
470 if (new_blk != log_bbnum)
471 head_blk = new_blk;
472 } else if (error)
473 goto bp_err;
474 }
475
476 xlog_put_bp(bp);
477 if (head_blk == log_bbnum)
478 *return_head_blk = 0;
479 else
480 *return_head_blk = head_blk;
481 /*
482 * When returning here, we have a good block number. Bad block
483 * means that during a previous crash, we didn't have a clean break
5000d01d 484 * from cycle number N to cycle number N-1. In this case, we need
d321ceac
NS
485 * to find the first block with cycle number N-1.
486 */
487 return 0;
488
489bp_err:
490 xlog_put_bp(bp);
491
5000d01d
SL
492 if (error)
493 xlog_warn("XFS: failed to find log head");
494
d321ceac
NS
495 return error;
496} /* xlog_find_head */
497
498/*
499 * Find the sync block number or the tail of the log.
500 *
501 * This will be the block number of the last record to have its
502 * associated buffers synced to disk. Every log record header has
503 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
5000d01d 504 * to get a sync block number. The only concern is to figure out which
d321ceac
NS
505 * log record header to believe.
506 *
507 * The following algorithm uses the log record header with the largest
5000d01d 508 * lsn. The entire log record does not need to be valid. We only care
d321ceac
NS
509 * that the header is valid.
510 *
511 * We could speed up search by using current head_blk buffer, but it is not
512 * available.
513 */
514int
515xlog_find_tail(xlog_t *log,
516 xfs_daddr_t *head_blk,
517 xfs_daddr_t *tail_blk,
518 int readonly)
519{
520 xlog_rec_header_t *rhead;
521 xlog_op_header_t *op_head;
522 xfs_buf_t *bp;
523 int error, i, found;
524 xfs_daddr_t umount_data_blk;
525 xfs_daddr_t after_umount_blk;
526 xfs_lsn_t tail_lsn;
73bf5988 527 int hblks;
5000d01d 528
d321ceac
NS
529 found = error = 0;
530
531 /*
5000d01d 532 * Find previous log record
d321ceac
NS
533 */
534 if ((error = xlog_find_head(log, head_blk)))
535 return error;
536
537 bp = xlog_get_bp(1,log->l_mp);
538 if (!bp)
ce029dc1 539 return ENOMEM;
d321ceac
NS
540 if (*head_blk == 0) { /* special case */
541 if ((error = xlog_bread(log, 0, 1, bp)))
542 goto bread_err;
543 if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) == 0) {
544 *tail_blk = 0;
545 /* leave all other log inited values alone */
546 goto exit;
547 }
548 }
549
550 /*
551 * Search backwards looking for log record header block
552 */
553 ASSERT(*head_blk < INT_MAX);
554 for (i=(int)(*head_blk)-1; i>=0; i--) {
555 if ((error = xlog_bread(log, i, 1, bp)))
556 goto bread_err;
557 if (INT_GET(*(uint *)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) {
558 found = 1;
559 break;
560 }
561 }
562 /*
563 * If we haven't found the log record header block, start looking
564 * again from the end of the physical log. XXXmiken: There should be
565 * a check here to make sure we didn't search more than N blocks in
566 * the previous code.
567 */
568 if (!found) {
569 for (i=log->l_logBBsize-1; i>=(int)(*head_blk); i--) {
570 if ((error = xlog_bread(log, i, 1, bp)))
571 goto bread_err;
572 if (INT_GET(*(uint*)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) {
573 found = 2;
574 break;
575 }
576 }
577 }
578 if (!found) {
579 xlog_warn("XFS: xlog_find_tail: couldn't find sync record");
580 ASSERT(0);
581 return XFS_ERROR(EIO);
582 }
583
584 /* find blk_no of tail of log */
585 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(bp);
586 *tail_blk = BLOCK_LSN(rhead->h_tail_lsn, ARCH_CONVERT);
587
588 /*
589 * Reset log values according to the state of the log when we
590 * crashed. In the case where head_blk == 0, we bump curr_cycle
591 * one because the next write starts a new cycle rather than
592 * continuing the cycle of the last good log record. At this
593 * point we have guaranteed that all partial log records have been
594 * accounted for. Therefore, we know that the last good log record
595 * written was complete and ended exactly on the end boundary
596 * of the physical log.
597 */
598 log->l_prev_block = i;
599 log->l_curr_block = (int)*head_blk;
600 log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
601 if (found == 2)
602 log->l_curr_cycle++;
603 log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
604 log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
605 log->l_grant_reserve_cycle = log->l_curr_cycle;
606 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
607 log->l_grant_write_cycle = log->l_curr_cycle;
608 log->l_grant_write_bytes = BBTOB(log->l_curr_block);
609
610 /*
611 * Look for unmount record. If we find it, then we know there
5000d01d 612 * was a clean unmount. Since 'i' could be the last block in
d321ceac
NS
613 * the physical log, we convert to a log block before comparing
614 * to the head_blk.
615 *
616 * Save the current tail lsn to use to pass to
617 * xlog_clear_stale_blocks() below. We won't want to clear the
618 * unmount record if there is one, so we pass the lsn of the
619 * unmount record rather than the block after it.
620 */
73bf5988
SL
621 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
622 int h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
623 int h_version = INT_GET(rhead->h_version, ARCH_CONVERT);
5000d01d 624 if ((h_version && XLOG_VERSION_2) &&
73bf5988
SL
625 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
626 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
627 if (h_size % XLOG_HEADER_CYCLE_SIZE)
628 hblks++;
629 } else {
630 hblks = 1;
631 }
632 } else {
633 hblks = 1;
634 }
635 after_umount_blk = (i + hblks +
636 (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize;
d321ceac
NS
637 tail_lsn = log->l_tail_lsn;
638 if (*head_blk == after_umount_blk && INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
73bf5988 639 umount_data_blk = (i + hblks) % log->l_logBBsize;
d321ceac
NS
640 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
641 goto bread_err;
642 }
643 op_head = (xlog_op_header_t *)XFS_BUF_PTR(bp);
644 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
645 /*
646 * Set tail and last sync so that newly written
647 * log records will point recovery to after the
648 * current unmount record.
649 */
650 ASSIGN_ANY_LSN(log->l_tail_lsn, log->l_curr_cycle,
651 after_umount_blk, ARCH_NOCONVERT);
652 ASSIGN_ANY_LSN(log->l_last_sync_lsn, log->l_curr_cycle,
653 after_umount_blk, ARCH_NOCONVERT);
654 *tail_blk = after_umount_blk;
655 }
656 }
657
658#ifdef __KERNEL__
659 /*
660 * Make sure that there are no blocks in front of the head
661 * with the same cycle number as the head. This can happen
662 * because we allow multiple outstanding log writes concurrently,
663 * and the later writes might make it out before earlier ones.
664 *
665 * We use the lsn from before modifying it so that we'll never
666 * overwrite the unmount record after a clean unmount.
667 *
668 * Do this only if we are going to recover the filesystem
669 */
670 if (!readonly)
671 error = xlog_clear_stale_blocks(log, tail_lsn);
672#endif
673
674bread_err:
675exit:
676 xlog_put_bp(bp);
677
5000d01d
SL
678 if (error)
679 xlog_warn("XFS: failed to locate log tail");
d321ceac
NS
680
681 return error;
682} /* xlog_find_tail */
683
d321ceac
NS
684/*
685 * Is the log zeroed at all?
686 *
687 * The last binary search should be changed to perform an X block read
5000d01d 688 * once X becomes small enough. You can then search linearly through
d321ceac
NS
689 * the X blocks. This will cut down on the number of reads we need to do.
690 *
691 * If the log is partially zeroed, this routine will pass back the blkno
692 * of the first block with cycle number 0. It won't have a complete LR
693 * preceding it.
694 *
695 * Return:
696 * 0 => the log is completely written to
697 * -1 => use *blk_no as the first block of the log
698 * >0 => error has occurred
699 */
700int
701xlog_find_zeroed(struct log *log,
5000d01d 702 xfs_daddr_t *blk_no)
d321ceac
NS
703{
704 xfs_buf_t *bp;
5000d01d 705 uint first_cycle, last_cycle;
d321ceac 706 xfs_daddr_t new_blk, last_blk, start_blk;
5000d01d
SL
707 xfs_daddr_t num_scan_bblks;
708 int error, log_bbnum = log->l_logBBsize;
d321ceac
NS
709
710 error = 0;
711 /* check totally zeroed log */
712 bp = xlog_get_bp(1,log->l_mp);
713 if (!bp)
ce029dc1 714 return ENOMEM;
d321ceac
NS
715 if ((error = xlog_bread(log, 0, 1, bp)))
716 goto bp_err;
717 first_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
718 if (first_cycle == 0) { /* completely zeroed log */
719 *blk_no = 0;
720 xlog_put_bp(bp);
721 return -1;
722 }
723
724 /* check partially zeroed log */
725 if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
726 goto bp_err;
727 last_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
728 if (last_cycle != 0) { /* log completely written to */
729 xlog_put_bp(bp);
730 return 0;
731 } else if (first_cycle != 1) {
732 /*
733 * If the cycle of the last block is zero, the cycle of
5000d01d
SL
734 * the first block must be 1. If it's not, maybe we're
735 * not looking at a log... Bail out.
d321ceac 736 */
5000d01d 737 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)");
d321ceac
NS
738 return XFS_ERROR(EINVAL);
739 }
5000d01d 740
d321ceac
NS
741 /* we have a partially zeroed log */
742 last_blk = log_bbnum-1;
743 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
744 goto bp_err;
745
746 /*
5000d01d 747 * Validate the answer. Because there is no way to guarantee that
d321ceac
NS
748 * the entire log is made up of log records which are the same size,
749 * we scan over the defined maximum blocks. At this point, the maximum
750 * is not chosen to mean anything special. XXXmiken
751 */
73bf5988 752 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
d321ceac 753 ASSERT(num_scan_bblks <= INT_MAX);
5000d01d 754
d321ceac
NS
755 if (last_blk < num_scan_bblks)
756 num_scan_bblks = last_blk;
757 start_blk = last_blk - num_scan_bblks;
5000d01d 758
d321ceac
NS
759 /*
760 * We search for any instances of cycle number 0 that occur before
761 * our current estimate of the head. What we're trying to detect is
5000d01d
SL
762 * 1 ... | 0 | 1 | 0...
763 * ^ binary search ends here
d321ceac 764 */
ce029dc1
ES
765 if ((error = xlog_find_verify_cycle(log, start_blk,
766 (int)num_scan_bblks, 0, &new_blk)))
606d804d 767 goto bp_err;
ce029dc1
ES
768 if (new_blk != -1)
769 last_blk = new_blk;
d321ceac
NS
770
771 /*
772 * Potentially backup over partial log record write. We don't need
773 * to search the end of the log because we know it is zero.
774 */
5000d01d 775 if ((error = xlog_find_verify_log_record(log, start_blk,
79c48ada
ES
776 &last_blk, 0)) == -1) {
777 error = XFS_ERROR(EIO);
778 goto bp_err;
779 } else if (error)
d321ceac
NS
780 goto bp_err;
781
782 *blk_no = last_blk;
783bp_err:
784 xlog_put_bp(bp);
785 if (error)
786 return error;
787 return -1;
788} /* xlog_find_zeroed */
789
790/* stuff for transactional view */
791STATIC void
792xlog_unpack_data(xlog_rec_header_t *rhead,
793 xfs_caddr_t dp,
794 xlog_t *log)
795{
73bf5988
SL
796 int i, j, k;
797 union ich {
798 xlog_rec_header_t hic_header;
799 xlog_rec_ext_header_t hic_xheader;
800 char hic_sector[XLOG_HEADER_SIZE];
801 } *xhdr;
802
d321ceac
NS
803#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
804 uint *up = (uint *)dp;
805 uint chksum = 0;
806#endif
807
73bf5988
SL
808 for (i=0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
809 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
5ce1d1f7 810 *(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
d321ceac
NS
811 dp += BBSIZE;
812 }
73bf5988
SL
813
814 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
815 xhdr = (union ich*)rhead;
816 for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
817 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
818 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
819 *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
820 dp += BBSIZE;
821 }
822 }
823
d321ceac
NS
824#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
825 /* divide length by 4 to get # words */
826 for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) {
827 chksum ^= INT_GET(*up, ARCH_CONVERT);
828 up++;
829 }
830 if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) {
831 if (!INT_ISZERO(rhead->h_chksum, ARCH_CONVERT) ||
832 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
833 cmn_err(CE_DEBUG,
5000d01d 834 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)",
d321ceac
NS
835 INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
836 cmn_err(CE_DEBUG,
837"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
73bf5988 838 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
5000d01d 839 cmn_err(CE_DEBUG,
73bf5988
SL
840 "XFS: LogR this is a LogV2 filesystem\n");
841 }
d321ceac
NS
842 log->l_flags |= XLOG_CHKSUM_MISMATCH;
843 }
5000d01d 844 }
d321ceac
NS
845#endif /* DEBUG && XFS_LOUD_RECOVERY */
846} /* xlog_unpack_data */
847
d321ceac
NS
848STATIC xlog_recover_t *
849xlog_recover_find_tid(xlog_recover_t *q,
850 xlog_tid_t tid)
851{
852 xlog_recover_t *p = q;
853
854 while (p != NULL) {
855 if (p->r_log_tid == tid)
856 break;
857 p = p->r_next;
858 }
859 return p;
860} /* xlog_recover_find_tid */
861
862STATIC void
863xlog_recover_put_hashq(xlog_recover_t **q,
864 xlog_recover_t *trans)
865{
866 trans->r_next = *q;
867 *q = trans;
868} /* xlog_recover_put_hashq */
869
870STATIC void
871xlog_recover_new_tid(xlog_recover_t **q,
872 xlog_tid_t tid,
873 xfs_lsn_t lsn)
874{
875 xlog_recover_t *trans;
876
877 trans = kmem_zalloc(sizeof(xlog_recover_t), 0);
878 trans->r_log_tid = tid;
879 trans->r_lsn = lsn;
880 xlog_recover_put_hashq(q, trans);
881} /* xlog_recover_new_tid */
882
883
884STATIC int
885xlog_recover_unlink_tid(xlog_recover_t **q,
886 xlog_recover_t *trans)
887{
888 xlog_recover_t *tp;
889 int found = 0;
890
891 ASSERT(trans != 0);
892 if (trans == *q) {
893 *q = (*q)->r_next;
894 } else {
895 tp = *q;
896 while (tp != 0) {
897 if (tp->r_next == trans) {
898 found = 1;
899 break;
900 }
901 tp = tp->r_next;
902 }
903 if (!found) {
904 xlog_warn(
905 "XFS: xlog_recover_unlink_tid: trans not found");
906 ASSERT(0);
907 return XFS_ERROR(EIO);
908 }
909 tp->r_next = tp->r_next->r_next;
910 }
911 return 0;
912} /* xlog_recover_unlink_tid */
913
914/*
915 * Free up any resources allocated by the transaction
916 *
917 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
918 */
919STATIC void
5000d01d 920xlog_recover_free_trans(xlog_recover_t *trans)
d321ceac
NS
921{
922 xlog_recover_item_t *first_item, *item, *free_item;
923 int i;
924
925 item = first_item = trans->r_itemq;
926 do {
927 free_item = item;
928 item = item->ri_next;
929 /* Free the regions in the item. */
930 for (i = 0; i < free_item->ri_cnt; i++) {
931 kmem_free(free_item->ri_buf[i].i_addr,
932 free_item->ri_buf[i].i_len);
933 }
934 /* Free the item itself */
935 kmem_free(free_item->ri_buf,
936 (free_item->ri_total * sizeof(xfs_log_iovec_t)));
937 kmem_free(free_item, sizeof(xlog_recover_item_t));
938 } while (first_item != item);
939 /* Free the transaction recover structure */
940 kmem_free(trans, sizeof(xlog_recover_t));
941} /* xlog_recover_free_trans */
942
943
944STATIC int
945xlog_recover_commit_trans(xlog_t *log,
946 xlog_recover_t **q,
947 xlog_recover_t *trans,
948 int pass)
949{
950 int error;
951
952 if ((error = xlog_recover_unlink_tid(q, trans)))
953 return error;
954 if ((error = xlog_recover_do_trans(log, trans, pass)))
955 return error;
956 xlog_recover_free_trans(trans); /* no error */
957 return 0;
958} /* xlog_recover_commit_trans */
959
960STATIC void
961xlog_recover_insert_item_backq(xlog_recover_item_t **q,
962 xlog_recover_item_t *item)
963{
964 if (*q == 0) {
965 item->ri_prev = item->ri_next = item;
966 *q = item;
967 } else {
968 item->ri_next = *q;
969 item->ri_prev = (*q)->ri_prev;
970 (*q)->ri_prev = item;
971 item->ri_prev->ri_next = item;
972 }
973} /* xlog_recover_insert_item_backq */
974
975STATIC void
976xlog_recover_add_item(xlog_recover_item_t **itemq)
977{
978 xlog_recover_item_t *item;
979
980 item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
981 xlog_recover_insert_item_backq(itemq, item);
982} /* xlog_recover_add_item */
983
5000d01d 984/* The next region to add is the start of a new region. It could be
d321ceac
NS
985 * a whole region or it could be the first part of a new region. Because
986 * of this, the assumption here is that the type and size fields of all
987 * format structures fit into the first 32 bits of the structure.
988 *
989 * This works because all regions must be 32 bit aligned. Therefore, we
990 * either have both fields or we have neither field. In the case we have
991 * neither field, the data part of the region is zero length. We only have
992 * a log_op_header and can throw away the header since a new one will appear
993 * later. If we have at least 4 bytes, then we can determine how many regions
994 * will appear in the current log item.
995 */
996STATIC int
997xlog_recover_add_to_trans(xlog_recover_t *trans,
998 xfs_caddr_t dp,
999 int len)
1000{
1001 xfs_inode_log_format_t *in_f; /* any will do */
1002 xlog_recover_item_t *item;
1003 xfs_caddr_t ptr;
1004
1005 if (!len)
1006 return 0;
1007 ptr = kmem_zalloc(len, 0);
1008 bcopy(dp, ptr, len);
5000d01d 1009
d321ceac
NS
1010 in_f = (xfs_inode_log_format_t *)ptr;
1011 item = trans->r_itemq;
1012 if (item == 0) {
1013 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
1014 if (len == sizeof(xfs_trans_header_t))
1015 xlog_recover_add_item(&trans->r_itemq);
1016 bcopy(dp, &trans->r_theader, len); /* s, d, l */
1017 return 0;
1018 }
1019 if (item->ri_prev->ri_total != 0 &&
1020 item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
1021 xlog_recover_add_item(&trans->r_itemq);
1022 }
1023 item = trans->r_itemq;
1024 item = item->ri_prev;
1025
1026 if (item->ri_total == 0) { /* first region to be added */
1027 item->ri_total = in_f->ilf_size;
1028 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
1029 item->ri_buf = kmem_zalloc((item->ri_total *
1030 sizeof(xfs_log_iovec_t)), 0);
1031 }
1032 ASSERT(item->ri_total > item->ri_cnt);
1033 /* Description region is ri_buf[0] */
1034 item->ri_buf[item->ri_cnt].i_addr = ptr;
1035 item->ri_buf[item->ri_cnt].i_len = len;
1036 item->ri_cnt++;
1037 return 0;
1038} /* xlog_recover_add_to_trans */
1039
1040STATIC int
1041xlog_recover_add_to_cont_trans(xlog_recover_t *trans,
1042 xfs_caddr_t dp,
1043 int len)
1044{
1045 xlog_recover_item_t *item;
1046 xfs_caddr_t ptr, old_ptr;
1047 int old_len;
5000d01d 1048
d321ceac
NS
1049 item = trans->r_itemq;
1050 if (item == 0) {
1051 /* finish copying rest of trans header */
1052 xlog_recover_add_item(&trans->r_itemq);
1053 ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len;
1054 bcopy(dp, ptr, len); /* s, d, l */
1055 return 0;
1056 }
1057 item = item->ri_prev;
1058
1059 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1060 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1061
5000d01d
SL
1062 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
1063 bcopy(dp , &ptr[old_len], len); /* s, d, l */
d321ceac
NS
1064 item->ri_buf[item->ri_cnt-1].i_len += len;
1065 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1066 return 0;
1067} /* xlog_recover_add_to_cont_trans */
1068
1069STATIC int
1070xlog_recover_unmount_trans(xlog_recover_t *trans)
1071{
1072 /* Do nothing now */
1073 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
1074 return( 0 );
1075} /* xlog_recover_unmount_trans */
1076
1077
1078STATIC int
1079xlog_recover_process_data(xlog_t *log,
1080 xlog_recover_t *rhash[],
1081 xlog_rec_header_t *rhead,
1082 xfs_caddr_t dp,
1083 int pass)
1084{
1085 xfs_caddr_t lp = dp+INT_GET(rhead->h_len, ARCH_CONVERT);
1086 int num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
1087 xlog_op_header_t *ohead;
1088 xlog_recover_t *trans;
1089 xlog_tid_t tid;
1090 int error;
1091 unsigned long hash;
1092 uint flags;
5000d01d 1093
d321ceac
NS
1094 /* check the log format matches our own - else we can't recover */
1095 if (xlog_header_check_recover(log->l_mp, rhead))
1096 return (XFS_ERROR(EIO));
5000d01d 1097
73bf5988 1098 while ((dp < lp) && num_logops) {
d321ceac
NS
1099 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
1100 ohead = (xlog_op_header_t *)dp;
1101 dp += sizeof(xlog_op_header_t);
1102 if (ohead->oh_clientid != XFS_TRANSACTION &&
1103 ohead->oh_clientid != XFS_LOG) {
1104 xlog_warn("XFS: xlog_recover_process_data: bad clientid");
1105 ASSERT(0);
1106 return (XFS_ERROR(EIO));
5000d01d 1107 }
d321ceac
NS
1108 tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
1109 hash = XLOG_RHASH(tid);
1110 trans = xlog_recover_find_tid(rhash[hash], tid);
1111 if (trans == NULL) { /* not found; add new tid */
1112 if (ohead->oh_flags & XLOG_START_TRANS)
1113 xlog_recover_new_tid(&rhash[hash], tid, INT_GET(rhead->h_lsn, ARCH_CONVERT));
1114 } else {
1115 ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
1116 flags = ohead->oh_flags & ~XLOG_END_TRANS;
1117 if (flags & XLOG_WAS_CONT_TRANS)
1118 flags &= ~XLOG_CONTINUE_TRANS;
1119 switch (flags) {
1120 case XLOG_COMMIT_TRANS: {
1121 error = xlog_recover_commit_trans(log, &rhash[hash],
1122 trans, pass);
1123 break;
1124 }
1125 case XLOG_UNMOUNT_TRANS: {
1126 error = xlog_recover_unmount_trans(trans);
1127 break;
1128 }
1129 case XLOG_WAS_CONT_TRANS: {
1130 error = xlog_recover_add_to_cont_trans(trans, dp,
1131 INT_GET(ohead->oh_len, ARCH_CONVERT));
1132 break;
1133 }
1134 case XLOG_START_TRANS : {
1135 xlog_warn("XFS: xlog_recover_process_data: bad transaction");
1136 ASSERT(0);
1137 error = XFS_ERROR(EIO);
1138 break;
1139 }
1140 case 0:
1141 case XLOG_CONTINUE_TRANS: {
1142 error = xlog_recover_add_to_trans(trans, dp,
1143 INT_GET(ohead->oh_len, ARCH_CONVERT));
1144 break;
1145 }
1146 default: {
1147 xlog_warn("XFS: xlog_recover_process_data: bad flag");
1148 ASSERT(0);
1149 error = XFS_ERROR(EIO);
1150 break;
1151 }
1152 } /* switch */
1153 if (error)
1154 return error;
1155 } /* if */
1156 dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
1157 num_logops--;
1158 }
1159 return( 0 );
1160} /* xlog_recover_process_data */
1161
1162/*
1163 * Read the log from tail to head and process the log records found.
1164 * Handle the two cases where the tail and head are in the same cycle
1165 * and where the active portion of the log wraps around the end of
5000d01d 1166 * the physical log separately. The pass parameter is passed through
d321ceac
NS
1167 * to the routines called to process the data and is not looked at
1168 * here.
1169 */
1170int
1171xlog_do_recovery_pass(xlog_t *log,
1172 xfs_daddr_t head_blk,
1173 xfs_daddr_t tail_blk,
1174 int pass)
1175{
1176 xlog_rec_header_t *rhead;
1177 xfs_daddr_t blk_no;
1178 xfs_caddr_t bufaddr;
1179 xfs_buf_t *hbp, *dbp;
73bf5988 1180 int error, h_size;
5000d01d
SL
1181 int bblks, split_bblks;
1182 int hblks, split_hblks, wrapped_hblks;
d321ceac
NS
1183 xlog_recover_t *rhash[XLOG_RHASH_SIZE];
1184
1185 error = 0;
73bf5988
SL
1186
1187
1188 /*
1189 * Read the header of the tail block and get the iclog buffer size from
5000d01d 1190 * h_size. Use this to tell how many sectors make up the log header.
73bf5988
SL
1191 */
1192 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
1193 /*
1194 * When using variable length iclogs, read first sector of iclog
5000d01d 1195 * header and extract the header size from it. Get a new hbp that
73bf5988
SL
1196 * is the correct size.
1197 */
1198 hbp = xlog_get_bp(1, log->l_mp);
1199 if (!hbp)
1200 return ENOMEM;
1201 if ((error = xlog_bread(log, tail_blk, 1, hbp)))
1202 goto bread_err1;
1203 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1204 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) ==
1205 XLOG_HEADER_MAGIC_NUM);
1206 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) {
1207 xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number.");
1208 error = XFS_ERROR(EIO);
1209 goto bread_err1;
1210 }
1211 h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
1212
1213 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & XLOG_VERSION_2) &&
1214 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1215 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1216 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1217 hblks++;
1218 xlog_put_bp(hbp);
1219 hbp = xlog_get_bp(hblks, log->l_mp);
1220 } else {
1221 hblks=1;
1222 }
1223 } else {
1224 hblks=1;
1225 hbp = xlog_get_bp(1, log->l_mp);
1226 h_size = XLOG_BIG_RECORD_BSIZE;
1227 }
1228
d321ceac 1229 if (!hbp)
ce029dc1 1230 return ENOMEM;
73bf5988 1231 dbp = xlog_get_bp(BTOBB(h_size),log->l_mp);
d321ceac
NS
1232 if (!dbp) {
1233 xlog_put_bp(hbp);
ce029dc1 1234 return ENOMEM;
d321ceac 1235 }
73bf5988 1236
d321ceac
NS
1237 bzero(rhash, sizeof(rhash));
1238 if (tail_blk <= head_blk) {
1239 for (blk_no = tail_blk; blk_no < head_blk; ) {
73bf5988
SL
1240 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1241 goto bread_err2;
d321ceac
NS
1242 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1243 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1244 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
5f651f11
NS
1245 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */
1246
1247 if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) ||
1248 (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) ||
1249 (bblks <= 0) ||
1250 (blk_no > log->l_logBBsize)) {
1251 error = EFSCORRUPTED;
1252 goto bread_err2;
1253 }
1254
73bf5988
SL
1255 if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) {
1256 xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number.");
1257 error = XFS_ERROR(EIO);
1258 goto bread_err2;
1259 }
d321ceac
NS
1260 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */
1261 if (bblks > 0) {
73bf5988
SL
1262 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
1263 goto bread_err2;
d321ceac
NS
1264 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1265 if ((error = xlog_recover_process_data(log, rhash,
1266 rhead, XFS_BUF_PTR(dbp),
1267 pass)))
73bf5988 1268 goto bread_err2;
d321ceac 1269 }
73bf5988 1270 blk_no += (bblks+hblks);
d321ceac
NS
1271 }
1272 } else {
1273 /*
5000d01d 1274 * Perform recovery around the end of the physical log. When the head
d321ceac
NS
1275 * is not on the same cycle number as the tail, we can't do a sequential
1276 * recovery as above.
1277 */
1278 blk_no = tail_blk;
1279 while (blk_no < log->l_logBBsize) {
73bf5988
SL
1280 /*
1281 * Check for header wrapping around physical end-of-log
1282 */
1283 wrapped_hblks = 0;
1284 if (blk_no+hblks <= log->l_logBBsize) {
1285 /* Read header in one read */
1286 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1287 goto bread_err2;
1288 } else {
1289 /* This log record is split across physical end of log */
1290 split_hblks = 0;
1291 if (blk_no != log->l_logBBsize) {
1292 /* some data is before physical end of log */
1293 ASSERT(blk_no <= INT_MAX);
1294 split_hblks = log->l_logBBsize - (int)blk_no;
1295 ASSERT(split_hblks > 0);
1296 if ((error = xlog_bread(log, blk_no, split_hblks, hbp)))
1297 goto bread_err2;
1298 }
1299 bufaddr = XFS_BUF_PTR(hbp);
1300 XFS_BUF_SET_PTR(hbp, bufaddr + BBTOB(split_hblks),
1301 BBTOB(hblks - split_hblks));
1302 wrapped_hblks = hblks - split_hblks;
1303 if ((error = xlog_bread(log, 0, wrapped_hblks, hbp)))
1304 goto bread_err2;
1305 XFS_BUF_SET_PTR(hbp, bufaddr, hblks);
1306 }
d321ceac
NS
1307 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1308 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
5000d01d 1309 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
d321ceac
NS
1310 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1311
1312 /* LR body must have data or it wouldn't have been written */
1313 ASSERT(bblks > 0);
73bf5988 1314 blk_no += hblks; /* successfully read header */
d321ceac
NS
1315
1316 if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) ||
1317 (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) ||
73bf5988 1318 (bblks <= 0)) {
d321ceac 1319 error = EFSCORRUPTED;
73bf5988 1320 goto bread_err2;
d321ceac 1321 }
5000d01d 1322
d321ceac
NS
1323 /* Read in data for log record */
1324 if (blk_no+bblks <= log->l_logBBsize) {
1325 if ((error = xlog_bread(log, blk_no, bblks, dbp)))
73bf5988 1326 goto bread_err2;
d321ceac
NS
1327 } else {
1328 /* This log record is split across physical end of log */
1329 split_bblks = 0;
1330 if (blk_no != log->l_logBBsize) {
1331
1332 /* some data is before physical end of log */
1333 ASSERT(blk_no <= INT_MAX);
1334 split_bblks = log->l_logBBsize - (int)blk_no;
1335 ASSERT(split_bblks > 0);
1336 if ((error = xlog_bread(log, blk_no, split_bblks, dbp)))
73bf5988 1337 goto bread_err2;
d321ceac
NS
1338 }
1339 bufaddr = XFS_BUF_PTR(dbp);
1340 XFS_BUF_SET_PTR(dbp, bufaddr + BBTOB(split_bblks),
1341 BBTOB(bblks - split_bblks));
73bf5988
SL
1342 if ((error = xlog_bread(log, wrapped_hblks,
1343 bblks - split_bblks, dbp)))
1344 goto bread_err2;
1345 XFS_BUF_SET_PTR(dbp, bufaddr, XLOG_BIG_RECORD_BSIZE);
d321ceac
NS
1346 }
1347 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1348 if ((error = xlog_recover_process_data(log, rhash,
1349 rhead, XFS_BUF_PTR(dbp),
1350 pass)))
73bf5988 1351 goto bread_err2;
d321ceac
NS
1352 blk_no += bblks;
1353 }
1354
1355 ASSERT(blk_no >= log->l_logBBsize);
1356 blk_no -= log->l_logBBsize;
1357
1358 /* read first part of physical log */
1359 while (blk_no < head_blk) {
73bf5988
SL
1360 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1361 goto bread_err2;
d321ceac
NS
1362 rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
1363 ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
1364 ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
1365 bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1366 ASSERT(bblks > 0);
73bf5988
SL
1367 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
1368 goto bread_err2;
d321ceac
NS
1369 xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
1370 if ((error = xlog_recover_process_data(log, rhash,
1371 rhead, XFS_BUF_PTR(dbp),
1372 pass)))
73bf5988
SL
1373 goto bread_err2;
1374 blk_no += (bblks+hblks);
5000d01d 1375 }
d321ceac
NS
1376 }
1377
73bf5988 1378bread_err2:
d321ceac 1379 xlog_put_bp(dbp);
73bf5988 1380bread_err1:
d321ceac
NS
1381 xlog_put_bp(hbp);
1382
1383 return error;
1384}