]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxlog/xfs_log_recover.c
Update copyright/license notices to match SGI legal prefered boilerplate.
[thirdparty/xfsprogs-dev.git] / libxlog / xfs_log_recover.c
CommitLineData
d321ceac 1/*
da23017d
NS
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
5000d01d 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
d321ceac 7 * published by the Free Software Foundation.
5000d01d 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
5000d01d 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
d321ceac
NS
17 */
18
1d7e80ee 19#include <xfs/libxlog.h>
d321ceac 20
a562a63b
NS
21#define xlog_unpack_data_checksum(rhead, dp, log) ((void)0)
22#define xlog_clear_stale_blocks(log, tail_lsn) (0)
23#define xfs_readonly_buftarg(buftarg) (0)
24
d321ceac
NS
25/*
26 * This routine finds (to an approximation) the first block in the physical
4ed50f8a 27 * log which contains the given cycle. It uses a binary search algorithm.
d321ceac
NS
28 * Note that the algorithm can not be perfect because the disk will not
29 * necessarily be perfect.
30 */
31int
a562a63b
NS
32xlog_find_cycle_start(
33 xlog_t *log,
34 xfs_buf_t *bp,
35 xfs_daddr_t first_blk,
36 xfs_daddr_t *last_blk,
37 uint cycle)
d321ceac 38{
a562a63b 39 xfs_caddr_t offset;
ffe29fb5
NS
40 xfs_daddr_t mid_blk;
41 uint mid_cycle;
42 int error;
d321ceac
NS
43
44 mid_blk = BLK_AVG(first_blk, *last_blk);
45 while (mid_blk != first_blk && mid_blk != *last_blk) {
46 if ((error = xlog_bread(log, mid_blk, 1, bp)))
47 return error;
a562a63b
NS
48 offset = xlog_align(log, mid_blk, 1, bp);
49 mid_cycle = GET_CYCLE(offset, ARCH_CONVERT);
d321ceac
NS
50 if (mid_cycle == cycle) {
51 *last_blk = mid_blk;
52 /* last_half_cycle == mid_cycle */
53 } else {
54 first_blk = mid_blk;
55 /* first_half_cycle == mid_cycle */
56 }
57 mid_blk = BLK_AVG(first_blk, *last_blk);
58 }
59 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
60 (mid_blk == *last_blk && mid_blk-1 == first_blk));
61
62 return 0;
a562a63b 63}
d321ceac
NS
64
65/*
66 * Check that the range of blocks does not contain the cycle number
67 * given. The scan needs to occur from front to back and the ptr into the
68 * region must be updated since a later routine will need to perform another
69 * test. If the region is completely good, we end up returning the same
70 * last block number.
71 *
ce029dc1 72 * Set blkno to -1 if we encounter no errors. This is an invalid block number
d321ceac
NS
73 * since we don't ever expect logs to get this large.
74 */
ce029dc1 75STATIC int
a562a63b
NS
76xlog_find_verify_cycle(
77 xlog_t *log,
78 xfs_daddr_t start_blk,
79 int nbblks,
80 uint stop_on_cycle_no,
81 xfs_daddr_t *new_blk)
d321ceac 82{
a562a63b
NS
83 xfs_daddr_t i, j;
84 uint cycle;
85 xfs_buf_t *bp;
86 xfs_daddr_t bufblks;
87 xfs_caddr_t buf = NULL;
88 int error = 0;
85a875e9
ES
89
90 bufblks = 1 << ffs(nbblks);
d321ceac 91
a562a63b 92 while (!(bp = xlog_get_bp(log, bufblks))) {
5000d01d 93 /* can't get enough memory to do everything in one big buffer */
d321ceac 94 bufblks >>= 1;
a562a63b 95 if (bufblks <= log->l_sectbb_log)
5000d01d
SL
96 return ENOMEM;
97 }
98
ffe29fb5
NS
99 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
100 int bcount;
d321ceac 101
ffe29fb5 102 bcount = min(bufblks, (start_blk + nbblks - i));
d321ceac 103
5000d01d
SL
104 if ((error = xlog_bread(log, i, bcount, bp)))
105 goto out;
d321ceac 106
a562a63b 107 buf = xlog_align(log, i, bcount, bp);
d321ceac
NS
108 for (j = 0; j < bcount; j++) {
109 cycle = GET_CYCLE(buf, ARCH_CONVERT);
110 if (cycle == stop_on_cycle_no) {
e56fcdce 111 *new_blk = i+j;
d321ceac
NS
112 goto out;
113 }
5000d01d
SL
114
115 buf += BBSIZE;
d321ceac
NS
116 }
117 }
118
ce029dc1 119 *new_blk = -1;
d321ceac
NS
120
121out:
122 xlog_put_bp(bp);
d321ceac 123 return error;
a562a63b 124}
d321ceac
NS
125
126/*
127 * Potentially backup over partial log record write.
128 *
129 * In the typical case, last_blk is the number of the block directly after
130 * a good log record. Therefore, we subtract one to get the block number
131 * of the last block in the given buffer. extra_bblks contains the number
132 * of blocks we would have read on a previous read. This happens when the
133 * last log record is split over the end of the physical log.
134 *
135 * extra_bblks is the number of blocks potentially verified on a previous
136 * call to this routine.
137 */
d321ceac 138STATIC int
a562a63b
NS
139xlog_find_verify_log_record(
140 xlog_t *log,
141 xfs_daddr_t start_blk,
142 xfs_daddr_t *last_blk,
143 int extra_bblks)
d321ceac 144{
a562a63b
NS
145 xfs_daddr_t i;
146 xfs_buf_t *bp;
147 xfs_caddr_t offset = NULL;
148 xlog_rec_header_t *head = NULL;
149 int error = 0;
150 int smallmem = 0;
151 int num_blks = *last_blk - start_blk;
152 int xhdrs;
153
154 ASSERT(start_blk != 0 || *last_blk != start_blk);
155
156 if (!(bp = xlog_get_bp(log, num_blks))) {
157 if (!(bp = xlog_get_bp(log, 1)))
158 return ENOMEM;
159 smallmem = 1;
160 } else {
161 if ((error = xlog_bread(log, start_blk, num_blks, bp)))
162 goto out;
163 offset = xlog_align(log, start_blk, num_blks, bp);
164 offset += ((num_blks - 1) << BBSHIFT);
d321ceac
NS
165 }
166
a562a63b
NS
167 for (i = (*last_blk) - 1; i >= 0; i--) {
168 if (i < start_blk) {
05bba5b7 169 /* valid log record not found */
a562a63b
NS
170 xlog_warn(
171 "XFS: Log inconsistent (didn't find previous header)");
172 ASSERT(0);
173 error = XFS_ERROR(EIO);
174 goto out;
175 }
d321ceac 176
a562a63b
NS
177 if (smallmem) {
178 if ((error = xlog_bread(log, i, 1, bp)))
179 goto out;
180 offset = xlog_align(log, i, 1, bp);
181 }
182
183 head = (xlog_rec_header_t *)offset;
d321ceac 184
a562a63b
NS
185 if (XLOG_HEADER_MAGIC_NUM ==
186 INT_GET(head->h_magicno, ARCH_CONVERT))
187 break;
188
189 if (!smallmem)
190 offset -= BBSIZE;
191 }
192
193 /*
194 * We hit the beginning of the physical log & still no header. Return
195 * to caller. If caller can handle a return of -1, then this routine
196 * will be called again for the end of the physical log.
197 */
198 if (i == -1) {
199 error = -1;
200 goto out;
201 }
202
203 /*
204 * We have the final block of the good log (the first block
205 * of the log record _before_ the head. So we check the uuid.
206 */
207 if ((error = xlog_header_check_mount(log->l_mp, head)))
208 goto out;
209
210 /*
211 * We may have found a log record header before we expected one.
212 * last_blk will be the 1st block # with a given cycle #. We may end
213 * up reading an entire log record. In this case, we don't want to
214 * reset last_blk. Only when last_blk points in the middle of a log
215 * record do we update last_blk.
216 */
217 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
218 uint h_size = INT_GET(head->h_size, ARCH_CONVERT);
219
220 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
221 if (h_size % XLOG_HEADER_CYCLE_SIZE)
222 xhdrs++;
223 } else {
224 xhdrs = 1;
225 }
226
227 if (*last_blk - i + extra_bblks
228 != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs)
229 *last_blk = i;
230
231out:
232 xlog_put_bp(bp);
233 return error;
234}
d321ceac
NS
235
236/*
237 * Head is defined to be the point of the log where the next log write
238 * write could go. This means that incomplete LR writes at the end are
239 * eliminated when calculating the head. We aren't guaranteed that previous
5000d01d 240 * LR have complete transactions. We only know that a cycle number of
d321ceac
NS
241 * current cycle number -1 won't be present in the log if we start writing
242 * from our current block number.
243 *
244 * last_blk contains the block number of the first block with a given
245 * cycle number.
246 *
d321ceac
NS
247 * Return: zero if normal, non-zero if error.
248 */
249int
a562a63b
NS
250xlog_find_head(
251 xlog_t *log,
252 xfs_daddr_t *return_head_blk)
d321ceac 253{
a562a63b
NS
254 xfs_buf_t *bp;
255 xfs_caddr_t offset;
256 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
257 int num_scan_bblks;
258 uint first_half_cycle, last_half_cycle;
259 uint stop_on_cycle;
260 int error, log_bbnum = log->l_logBBsize;
261
262 /* Is the end of the log device zeroed? */
263 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
264 *return_head_blk = first_blk;
265
266 /* Is the whole lot zeroed? */
267 if (!first_blk) {
268 /* Linux XFS shouldn't generate totally zeroed logs -
269 * mkfs etc write a dummy unmount record to a fresh
270 * log so we can store the uuid in there
271 */
272 xlog_warn("XFS: totally zeroed log");
273 }
274
275 return 0;
276 } else if (error) {
277 xlog_warn("XFS: empty log check failed");
278 return error;
5000d01d
SL
279 }
280
a562a63b
NS
281 first_blk = 0; /* get cycle # of 1st block */
282 bp = xlog_get_bp(log, 1);
283 if (!bp)
284 return ENOMEM;
285 if ((error = xlog_bread(log, 0, 1, bp)))
286 goto bp_err;
287 offset = xlog_align(log, 0, 1, bp);
288 first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
289
290 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
291 if ((error = xlog_bread(log, last_blk, 1, bp)))
292 goto bp_err;
293 offset = xlog_align(log, last_blk, 1, bp);
294 last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
295 ASSERT(last_half_cycle != 0);
296
d321ceac 297 /*
a562a63b
NS
298 * If the 1st half cycle number is equal to the last half cycle number,
299 * then the entire log is stamped with the same cycle number. In this
300 * case, head_blk can't be set to zero (which makes sense). The below
301 * math doesn't work out properly with head_blk equal to zero. Instead,
05bba5b7 302 * we set it to log_bbnum which is an invalid block number, but this
a562a63b
NS
303 * value makes the math correct. If head_blk doesn't changed through
304 * all the tests below, *head_blk is set to zero at the very end rather
305 * than log_bbnum. In a sense, log_bbnum and zero are the same block
306 * in a circular file.
d321ceac 307 */
a562a63b
NS
308 if (first_half_cycle == last_half_cycle) {
309 /*
310 * In this case we believe that the entire log should have
311 * cycle number last_half_cycle. We need to scan backwards
312 * from the end verifying that there are no holes still
313 * containing last_half_cycle - 1. If we find such a hole,
314 * then the start of that hole will be the new head. The
315 * simple case looks like
316 * x | x ... | x - 1 | x
317 * Another case that fits this picture would be
318 * x | x + 1 | x ... | x
319 * In this case the head really is somwhere at the end of the
320 * log, as one of the latest writes at the beginning was
321 * incomplete.
322 * One more case is
323 * x | x + 1 | x ... | x - 1 | x
324 * This is really the combination of the above two cases, and
325 * the head has to end up at the start of the x-1 hole at the
326 * end of the log.
327 *
328 * In the 256k log case, we will read from the beginning to the
329 * end of the log and search for cycle numbers equal to x-1.
330 * We don't worry about the x+1 blocks that we encounter,
331 * because we know that they cannot be the head since the log
332 * started with x.
333 */
334 head_blk = log_bbnum;
335 stop_on_cycle = last_half_cycle - 1;
336 } else {
337 /*
338 * In this case we want to find the first block with cycle
339 * number matching last_half_cycle. We expect the log to be
340 * some variation on
341 * x + 1 ... | x ...
342 * The first block with cycle number x (last_half_cycle) will
343 * be where the new head belongs. First we do a binary search
344 * for the first occurrence of last_half_cycle. The binary
345 * search may not be totally accurate, so then we scan back
346 * from there looking for occurrences of last_half_cycle before
347 * us. If that backwards scan wraps around the beginning of
348 * the log, then we look for occurrences of last_half_cycle - 1
349 * at the end of the log. The cases we're looking for look
350 * like
351 * x + 1 ... | x | x + 1 | x ...
352 * ^ binary search stopped here
353 * or
354 * x + 1 ... | x ... | x - 1 | x
355 * <---------> less than scan distance
356 */
357 stop_on_cycle = last_half_cycle;
358 if ((error = xlog_find_cycle_start(log, bp, first_blk,
359 &head_blk, last_half_cycle)))
360 goto bp_err;
361 }
362
d321ceac 363 /*
a562a63b
NS
364 * Now validate the answer. Scan back some number of maximum possible
365 * blocks and make sure each one has the expected cycle number. The
366 * maximum is determined by the total possible amount of buffering
367 * in the in-core log. The following number can be made tighter if
368 * we actually look at the block size of the filesystem.
d321ceac 369 */
a562a63b
NS
370 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
371 if (head_blk >= num_scan_bblks) {
372 /*
373 * We are guaranteed that the entire check can be performed
374 * in one buffer.
375 */
376 start_blk = head_blk - num_scan_bblks;
377 if ((error = xlog_find_verify_cycle(log,
378 start_blk, num_scan_bblks,
379 stop_on_cycle, &new_blk)))
380 goto bp_err;
381 if (new_blk != -1)
382 head_blk = new_blk;
383 } else { /* need to read 2 parts of log */
384 /*
385 * We are going to scan backwards in the log in two parts.
386 * First we scan the physical end of the log. In this part
387 * of the log, we are looking for blocks with cycle number
388 * last_half_cycle - 1.
389 * If we find one, then we know that the log starts there, as
390 * we've found a hole that didn't get written in going around
391 * the end of the physical log. The simple case for this is
392 * x + 1 ... | x ... | x - 1 | x
393 * <---------> less than scan distance
394 * If all of the blocks at the end of the log have cycle number
395 * last_half_cycle, then we check the blocks at the start of
396 * the log looking for occurrences of last_half_cycle. If we
397 * find one, then our current estimate for the location of the
398 * first occurrence of last_half_cycle is wrong and we move
399 * back to the hole we've found. This case looks like
400 * x + 1 ... | x | x + 1 | x ...
401 * ^ binary search stopped here
402 * Another case we need to handle that only occurs in 256k
403 * logs is
404 * x + 1 ... | x ... | x+1 | x ...
405 * ^ binary search stops here
406 * In a 256k log, the scan at the end of the log will see the
407 * x + 1 blocks. We need to skip past those since that is
408 * certainly not the head of the log. By searching for
409 * last_half_cycle-1 we accomplish that.
410 */
411 start_blk = log_bbnum - num_scan_bblks + head_blk;
412 ASSERT(head_blk <= INT_MAX &&
413 (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
414 if ((error = xlog_find_verify_cycle(log, start_blk,
415 num_scan_bblks - (int)head_blk,
416 (stop_on_cycle - 1), &new_blk)))
417 goto bp_err;
418 if (new_blk != -1) {
419 head_blk = new_blk;
420 goto bad_blk;
421 }
422
423 /*
424 * Scan beginning of log now. The last part of the physical
425 * log is good. This scan needs to verify that it doesn't find
426 * the last_half_cycle.
427 */
428 start_blk = 0;
429 ASSERT(head_blk <= INT_MAX);
430 if ((error = xlog_find_verify_cycle(log,
431 start_blk, (int)head_blk,
432 stop_on_cycle, &new_blk)))
433 goto bp_err;
434 if (new_blk != -1)
435 head_blk = new_blk;
436 }
437
438 bad_blk:
5000d01d 439 /*
a562a63b
NS
440 * Now we need to make sure head_blk is not pointing to a block in
441 * the middle of a log record.
d321ceac 442 */
a562a63b
NS
443 num_scan_bblks = XLOG_REC_SHIFT(log);
444 if (head_blk >= num_scan_bblks) {
445 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
446
447 /* start ptr at last block ptr before head_blk */
448 if ((error = xlog_find_verify_log_record(log, start_blk,
449 &head_blk, 0)) == -1) {
450 error = XFS_ERROR(EIO);
451 goto bp_err;
452 } else if (error)
453 goto bp_err;
454 } else {
455 start_blk = 0;
456 ASSERT(head_blk <= INT_MAX);
457 if ((error = xlog_find_verify_log_record(log, start_blk,
458 &head_blk, 0)) == -1) {
459 /* We hit the beginning of the log during our search */
460 start_blk = log_bbnum - num_scan_bblks + head_blk;
461 new_blk = log_bbnum;
462 ASSERT(start_blk <= INT_MAX &&
463 (xfs_daddr_t) log_bbnum-start_blk >= 0);
464 ASSERT(head_blk <= INT_MAX);
465 if ((error = xlog_find_verify_log_record(log,
466 start_blk, &new_blk,
467 (int)head_blk)) == -1) {
468 error = XFS_ERROR(EIO);
469 goto bp_err;
470 } else if (error)
471 goto bp_err;
472 if (new_blk != log_bbnum)
473 head_blk = new_blk;
474 } else if (error)
475 goto bp_err;
d321ceac
NS
476 }
477
a562a63b
NS
478 xlog_put_bp(bp);
479 if (head_blk == log_bbnum)
480 *return_head_blk = 0;
481 else
482 *return_head_blk = head_blk;
d321ceac 483 /*
a562a63b
NS
484 * When returning here, we have a good block number. Bad block
485 * means that during a previous crash, we didn't have a clean break
486 * from cycle number N to cycle number N-1. In this case, we need
487 * to find the first block with cycle number N-1.
d321ceac 488 */
a562a63b 489 return 0;
d321ceac 490
a562a63b 491 bp_err:
d321ceac
NS
492 xlog_put_bp(bp);
493
5000d01d
SL
494 if (error)
495 xlog_warn("XFS: failed to find log head");
d321ceac 496 return error;
a562a63b 497}
d321ceac
NS
498
499/*
500 * Find the sync block number or the tail of the log.
501 *
502 * This will be the block number of the last record to have its
503 * associated buffers synced to disk. Every log record header has
504 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
4ed50f8a 505 * to get a sync block number. The only concern is to figure out which
d321ceac
NS
506 * log record header to believe.
507 *
508 * The following algorithm uses the log record header with the largest
4ed50f8a 509 * lsn. The entire log record does not need to be valid. We only care
d321ceac
NS
510 * that the header is valid.
511 *
512 * We could speed up search by using current head_blk buffer, but it is not
513 * available.
514 */
515int
a562a63b
NS
516xlog_find_tail(
517 xlog_t *log,
518 xfs_daddr_t *head_blk,
519 xfs_daddr_t *tail_blk,
520 int readonly)
d321ceac
NS
521{
522 xlog_rec_header_t *rhead;
523 xlog_op_header_t *op_head;
a562a63b 524 xfs_caddr_t offset = NULL;
d321ceac
NS
525 xfs_buf_t *bp;
526 int error, i, found;
527 xfs_daddr_t umount_data_blk;
528 xfs_daddr_t after_umount_blk;
529 xfs_lsn_t tail_lsn;
73bf5988 530 int hblks;
5000d01d 531
1b6a0044 532 found = 0;
d321ceac
NS
533
534 /*
5000d01d 535 * Find previous log record
d321ceac
NS
536 */
537 if ((error = xlog_find_head(log, head_blk)))
538 return error;
539
a562a63b 540 bp = xlog_get_bp(log, 1);
d321ceac 541 if (!bp)
ce029dc1 542 return ENOMEM;
d321ceac
NS
543 if (*head_blk == 0) { /* special case */
544 if ((error = xlog_bread(log, 0, 1, bp)))
545 goto bread_err;
a562a63b
NS
546 offset = xlog_align(log, 0, 1, bp);
547 if (GET_CYCLE(offset, ARCH_CONVERT) == 0) {
d321ceac
NS
548 *tail_blk = 0;
549 /* leave all other log inited values alone */
550 goto exit;
551 }
552 }
553
554 /*
555 * Search backwards looking for log record header block
556 */
557 ASSERT(*head_blk < INT_MAX);
1b6a0044 558 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
d321ceac
NS
559 if ((error = xlog_bread(log, i, 1, bp)))
560 goto bread_err;
a562a63b 561 offset = xlog_align(log, i, 1, bp);
1b6a0044 562 if (XLOG_HEADER_MAGIC_NUM ==
a562a63b 563 INT_GET(*(uint *)offset, ARCH_CONVERT)) {
d321ceac
NS
564 found = 1;
565 break;
566 }
567 }
568 /*
569 * If we haven't found the log record header block, start looking
570 * again from the end of the physical log. XXXmiken: There should be
571 * a check here to make sure we didn't search more than N blocks in
572 * the previous code.
573 */
574 if (!found) {
1b6a0044 575 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
d321ceac
NS
576 if ((error = xlog_bread(log, i, 1, bp)))
577 goto bread_err;
a562a63b 578 offset = xlog_align(log, i, 1, bp);
1b6a0044 579 if (XLOG_HEADER_MAGIC_NUM ==
a562a63b 580 INT_GET(*(uint*)offset, ARCH_CONVERT)) {
d321ceac
NS
581 found = 2;
582 break;
583 }
584 }
585 }
586 if (!found) {
587 xlog_warn("XFS: xlog_find_tail: couldn't find sync record");
588 ASSERT(0);
589 return XFS_ERROR(EIO);
590 }
591
592 /* find blk_no of tail of log */
a562a63b 593 rhead = (xlog_rec_header_t *)offset;
46eca962 594 *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT));
d321ceac
NS
595
596 /*
597 * Reset log values according to the state of the log when we
598 * crashed. In the case where head_blk == 0, we bump curr_cycle
599 * one because the next write starts a new cycle rather than
600 * continuing the cycle of the last good log record. At this
601 * point we have guaranteed that all partial log records have been
602 * accounted for. Therefore, we know that the last good log record
603 * written was complete and ended exactly on the end boundary
604 * of the physical log.
605 */
606 log->l_prev_block = i;
607 log->l_curr_block = (int)*head_blk;
608 log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
609 if (found == 2)
610 log->l_curr_cycle++;
611 log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
612 log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
613 log->l_grant_reserve_cycle = log->l_curr_cycle;
614 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
615 log->l_grant_write_cycle = log->l_curr_cycle;
616 log->l_grant_write_bytes = BBTOB(log->l_curr_block);
617
618 /*
619 * Look for unmount record. If we find it, then we know there
4ed50f8a 620 * was a clean unmount. Since 'i' could be the last block in
d321ceac
NS
621 * the physical log, we convert to a log block before comparing
622 * to the head_blk.
623 *
624 * Save the current tail lsn to use to pass to
625 * xlog_clear_stale_blocks() below. We won't want to clear the
626 * unmount record if there is one, so we pass the lsn of the
627 * unmount record rather than the block after it.
628 */
73bf5988
SL
629 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
630 int h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
631 int h_version = INT_GET(rhead->h_version, ARCH_CONVERT);
1b6a0044
NS
632
633 if ((h_version & XLOG_VERSION_2) &&
73bf5988
SL
634 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
635 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
636 if (h_size % XLOG_HEADER_CYCLE_SIZE)
637 hblks++;
638 } else {
639 hblks = 1;
640 }
641 } else {
642 hblks = 1;
643 }
1b6a0044
NS
644 after_umount_blk = (i + hblks + (int)
645 BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize;
d321ceac 646 tail_lsn = log->l_tail_lsn;
1b6a0044
NS
647 if (*head_blk == after_umount_blk &&
648 INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
73bf5988 649 umount_data_blk = (i + hblks) % log->l_logBBsize;
d321ceac
NS
650 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
651 goto bread_err;
652 }
a562a63b
NS
653 offset = xlog_align(log, umount_data_blk, 1, bp);
654 op_head = (xlog_op_header_t *)offset;
d321ceac
NS
655 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
656 /*
657 * Set tail and last sync so that newly written
658 * log records will point recovery to after the
659 * current unmount record.
660 */
46eca962
NS
661 ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle,
662 after_umount_blk);
663 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
664 after_umount_blk);
d321ceac 665 *tail_blk = after_umount_blk;
46eca962
NS
666
667 /*
668 * Note that the unmount was clean. If the unmount
669 * was not clean, we need to know this to rebuild the
670 * superblock counters from the perag headers if we
671 * have a filesystem using non-persistent counters.
672 */
673 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
d321ceac
NS
674 }
675 }
676
d321ceac
NS
677 /*
678 * Make sure that there are no blocks in front of the head
679 * with the same cycle number as the head. This can happen
680 * because we allow multiple outstanding log writes concurrently,
681 * and the later writes might make it out before earlier ones.
682 *
683 * We use the lsn from before modifying it so that we'll never
684 * overwrite the unmount record after a clean unmount.
685 *
686 * Do this only if we are going to recover the filesystem
32181a02
NS
687 *
688 * NOTE: This used to say "if (!readonly)"
689 * However on Linux, we can & do recover a read-only filesystem.
690 * We only skip recovery if NORECOVERY is specified on mount,
691 * in which case we would not be here.
692 *
693 * But... if the -device- itself is readonly, just skip this.
694 * We can't recover this device anyway, so it won't matter.
d321ceac 695 */
a562a63b 696 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
d321ceac 697 error = xlog_clear_stale_blocks(log, tail_lsn);
32181a02 698 }
d321ceac
NS
699
700bread_err:
701exit:
702 xlog_put_bp(bp);
703
5000d01d
SL
704 if (error)
705 xlog_warn("XFS: failed to locate log tail");
d321ceac 706 return error;
a562a63b 707}
4ed50f8a 708
d321ceac
NS
709/*
710 * Is the log zeroed at all?
711 *
712 * The last binary search should be changed to perform an X block read
4ed50f8a 713 * once X becomes small enough. You can then search linearly through
d321ceac
NS
714 * the X blocks. This will cut down on the number of reads we need to do.
715 *
716 * If the log is partially zeroed, this routine will pass back the blkno
717 * of the first block with cycle number 0. It won't have a complete LR
718 * preceding it.
719 *
720 * Return:
721 * 0 => the log is completely written to
722 * -1 => use *blk_no as the first block of the log
723 * >0 => error has occurred
724 */
725int
a562a63b
NS
726xlog_find_zeroed(
727 xlog_t *log,
728 xfs_daddr_t *blk_no)
d321ceac
NS
729{
730 xfs_buf_t *bp;
a562a63b 731 xfs_caddr_t offset;
4ed50f8a 732 uint first_cycle, last_cycle;
d321ceac 733 xfs_daddr_t new_blk, last_blk, start_blk;
4ed50f8a
RC
734 xfs_daddr_t num_scan_bblks;
735 int error, log_bbnum = log->l_logBBsize;
d321ceac 736
d321ceac 737 /* check totally zeroed log */
a562a63b 738 bp = xlog_get_bp(log, 1);
d321ceac 739 if (!bp)
ce029dc1 740 return ENOMEM;
d321ceac
NS
741 if ((error = xlog_bread(log, 0, 1, bp)))
742 goto bp_err;
a562a63b
NS
743 offset = xlog_align(log, 0, 1, bp);
744 first_cycle = GET_CYCLE(offset, ARCH_CONVERT);
d321ceac
NS
745 if (first_cycle == 0) { /* completely zeroed log */
746 *blk_no = 0;
747 xlog_put_bp(bp);
748 return -1;
749 }
750
751 /* check partially zeroed log */
752 if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
753 goto bp_err;
a562a63b
NS
754 offset = xlog_align(log, log_bbnum-1, 1, bp);
755 last_cycle = GET_CYCLE(offset, ARCH_CONVERT);
d321ceac
NS
756 if (last_cycle != 0) { /* log completely written to */
757 xlog_put_bp(bp);
758 return 0;
759 } else if (first_cycle != 1) {
760 /*
761 * If the cycle of the last block is zero, the cycle of
5000d01d
SL
762 * the first block must be 1. If it's not, maybe we're
763 * not looking at a log... Bail out.
d321ceac 764 */
5000d01d 765 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)");
d321ceac
NS
766 return XFS_ERROR(EINVAL);
767 }
5000d01d 768
d321ceac
NS
769 /* we have a partially zeroed log */
770 last_blk = log_bbnum-1;
771 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
772 goto bp_err;
773
774 /*
4ed50f8a 775 * Validate the answer. Because there is no way to guarantee that
d321ceac
NS
776 * the entire log is made up of log records which are the same size,
777 * we scan over the defined maximum blocks. At this point, the maximum
778 * is not chosen to mean anything special. XXXmiken
779 */
73bf5988 780 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
d321ceac 781 ASSERT(num_scan_bblks <= INT_MAX);
5000d01d 782
d321ceac
NS
783 if (last_blk < num_scan_bblks)
784 num_scan_bblks = last_blk;
785 start_blk = last_blk - num_scan_bblks;
5000d01d 786
d321ceac
NS
787 /*
788 * We search for any instances of cycle number 0 that occur before
789 * our current estimate of the head. What we're trying to detect is
4ed50f8a
RC
790 * 1 ... | 0 | 1 | 0...
791 * ^ binary search ends here
d321ceac 792 */
ce029dc1
ES
793 if ((error = xlog_find_verify_cycle(log, start_blk,
794 (int)num_scan_bblks, 0, &new_blk)))
606d804d 795 goto bp_err;
ce029dc1
ES
796 if (new_blk != -1)
797 last_blk = new_blk;
d321ceac
NS
798
799 /*
800 * Potentially backup over partial log record write. We don't need
801 * to search the end of the log because we know it is zero.
802 */
5000d01d 803 if ((error = xlog_find_verify_log_record(log, start_blk,
79c48ada
ES
804 &last_blk, 0)) == -1) {
805 error = XFS_ERROR(EIO);
806 goto bp_err;
807 } else if (error)
d321ceac
NS
808 goto bp_err;
809
810 *blk_no = last_blk;
811bp_err:
812 xlog_put_bp(bp);
813 if (error)
814 return error;
815 return -1;
a562a63b 816}
d321ceac 817
d321ceac 818STATIC void
a562a63b
NS
819xlog_unpack_data(
820 xlog_rec_header_t *rhead,
821 xfs_caddr_t dp,
822 xlog_t *log)
d321ceac 823{
a562a63b
NS
824 int i, j, k;
825 xlog_in_core_2_t *xhdr;
826
827 for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
73bf5988 828 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
5ce1d1f7 829 *(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
d321ceac
NS
830 dp += BBSIZE;
831 }
73bf5988
SL
832
833 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
a562a63b 834 xhdr = (xlog_in_core_2_t *)rhead;
73bf5988
SL
835 for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
836 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
837 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
838 *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
839 dp += BBSIZE;
840 }
841 }
842
a562a63b
NS
843 xlog_unpack_data_checksum(rhead, dp, log);
844}
d321ceac 845
d321ceac 846STATIC xlog_recover_t *
a562a63b
NS
847xlog_recover_find_tid(
848 xlog_recover_t *q,
849 xlog_tid_t tid)
d321ceac 850{
a562a63b 851 xlog_recover_t *p = q;
d321ceac
NS
852
853 while (p != NULL) {
854 if (p->r_log_tid == tid)
855 break;
856 p = p->r_next;
857 }
858 return p;
a562a63b 859}
4ed50f8a 860
d321ceac 861STATIC void
a562a63b
NS
862xlog_recover_put_hashq(
863 xlog_recover_t **q,
864 xlog_recover_t *trans)
d321ceac
NS
865{
866 trans->r_next = *q;
867 *q = trans;
a562a63b 868}
4ed50f8a 869
d321ceac 870STATIC void
a562a63b
NS
871xlog_recover_new_tid(
872 xlog_recover_t **q,
873 xlog_tid_t tid,
874 xfs_lsn_t lsn)
d321ceac 875{
a562a63b 876 xlog_recover_t *trans;
d321ceac 877
a562a63b 878 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
d321ceac
NS
879 trans->r_log_tid = tid;
880 trans->r_lsn = lsn;
881 xlog_recover_put_hashq(q, trans);
a562a63b 882}
d321ceac
NS
883
884STATIC int
a562a63b
NS
885xlog_recover_unlink_tid(
886 xlog_recover_t **q,
887 xlog_recover_t *trans)
d321ceac 888{
a562a63b
NS
889 xlog_recover_t *tp;
890 int found = 0;
d321ceac
NS
891
892 ASSERT(trans != 0);
893 if (trans == *q) {
894 *q = (*q)->r_next;
895 } else {
896 tp = *q;
897 while (tp != 0) {
898 if (tp->r_next == trans) {
899 found = 1;
900 break;
901 }
902 tp = tp->r_next;
903 }
904 if (!found) {
905 xlog_warn(
906 "XFS: xlog_recover_unlink_tid: trans not found");
907 ASSERT(0);
908 return XFS_ERROR(EIO);
909 }
910 tp->r_next = tp->r_next->r_next;
911 }
912 return 0;
a562a63b 913}
d321ceac
NS
914
915/*
916 * Free up any resources allocated by the transaction
917 *
918 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
919 */
920STATIC void
a562a63b
NS
921xlog_recover_free_trans(
922 xlog_recover_t *trans)
d321ceac 923{
a562a63b
NS
924 xlog_recover_item_t *first_item, *item, *free_item;
925 int i;
d321ceac
NS
926
927 item = first_item = trans->r_itemq;
928 do {
929 free_item = item;
930 item = item->ri_next;
931 /* Free the regions in the item. */
932 for (i = 0; i < free_item->ri_cnt; i++) {
933 kmem_free(free_item->ri_buf[i].i_addr,
934 free_item->ri_buf[i].i_len);
935 }
936 /* Free the item itself */
937 kmem_free(free_item->ri_buf,
938 (free_item->ri_total * sizeof(xfs_log_iovec_t)));
939 kmem_free(free_item, sizeof(xlog_recover_item_t));
940 } while (first_item != item);
941 /* Free the transaction recover structure */
942 kmem_free(trans, sizeof(xlog_recover_t));
a562a63b 943}
d321ceac
NS
944
945STATIC int
a562a63b
NS
946xlog_recover_commit_trans(
947 xlog_t *log,
948 xlog_recover_t **q,
949 xlog_recover_t *trans,
950 int pass)
d321ceac 951{
a562a63b 952 int error;
d321ceac
NS
953
954 if ((error = xlog_recover_unlink_tid(q, trans)))
955 return error;
956 if ((error = xlog_recover_do_trans(log, trans, pass)))
957 return error;
958 xlog_recover_free_trans(trans); /* no error */
959 return 0;
a562a63b 960}
d321ceac
NS
961
962STATIC void
a562a63b
NS
963xlog_recover_insert_item_backq(
964 xlog_recover_item_t **q,
965 xlog_recover_item_t *item)
d321ceac
NS
966{
967 if (*q == 0) {
968 item->ri_prev = item->ri_next = item;
969 *q = item;
970 } else {
971 item->ri_next = *q;
972 item->ri_prev = (*q)->ri_prev;
973 (*q)->ri_prev = item;
974 item->ri_prev->ri_next = item;
975 }
a562a63b 976}
d321ceac
NS
977
978STATIC void
a562a63b
NS
979xlog_recover_add_item(
980 xlog_recover_item_t **itemq)
d321ceac 981{
a562a63b 982 xlog_recover_item_t *item;
d321ceac 983
2b288ccf 984 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
d321ceac 985 xlog_recover_insert_item_backq(itemq, item);
a562a63b 986}
d321ceac 987
a562a63b
NS
988STATIC int
989xlog_recover_add_to_cont_trans(
990 xlog_recover_t *trans,
991 xfs_caddr_t dp,
992 int len)
993{
994 xlog_recover_item_t *item;
995 xfs_caddr_t ptr, old_ptr;
996 int old_len;
997
998 item = trans->r_itemq;
999 if (item == 0) {
1000 /* finish copying rest of trans header */
1001 xlog_recover_add_item(&trans->r_itemq);
1002 ptr = (xfs_caddr_t) &trans->r_theader +
1003 sizeof(xfs_trans_header_t) - len;
1004 memcpy(ptr, dp, len); /* d, s, l */
1005 return 0;
1006 }
1007 item = item->ri_prev;
1008
1009 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1010 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1011
6239071d 1012 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
a562a63b
NS
1013 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1014 item->ri_buf[item->ri_cnt-1].i_len += len;
1015 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1016 return 0;
1017}
1018
1019/*
1020 * The next region to add is the start of a new region. It could be
d321ceac
NS
1021 * a whole region or it could be the first part of a new region. Because
1022 * of this, the assumption here is that the type and size fields of all
1023 * format structures fit into the first 32 bits of the structure.
1024 *
1025 * This works because all regions must be 32 bit aligned. Therefore, we
1026 * either have both fields or we have neither field. In the case we have
1027 * neither field, the data part of the region is zero length. We only have
1028 * a log_op_header and can throw away the header since a new one will appear
1029 * later. If we have at least 4 bytes, then we can determine how many regions
1030 * will appear in the current log item.
1031 */
1032STATIC int
a562a63b
NS
1033xlog_recover_add_to_trans(
1034 xlog_recover_t *trans,
1035 xfs_caddr_t dp,
1036 int len)
d321ceac 1037{
a562a63b
NS
1038 xfs_inode_log_format_t *in_f; /* any will do */
1039 xlog_recover_item_t *item;
1040 xfs_caddr_t ptr;
d321ceac
NS
1041
1042 if (!len)
1043 return 0;
d321ceac
NS
1044 item = trans->r_itemq;
1045 if (item == 0) {
1046 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
1047 if (len == sizeof(xfs_trans_header_t))
1048 xlog_recover_add_item(&trans->r_itemq);
32181a02 1049 memcpy(&trans->r_theader, dp, len); /* d, s, l */
d321ceac
NS
1050 return 0;
1051 }
a562a63b
NS
1052
1053 ptr = kmem_alloc(len, KM_SLEEP);
1054 memcpy(ptr, dp, len);
1055 in_f = (xfs_inode_log_format_t *)ptr;
1056
d321ceac
NS
1057 if (item->ri_prev->ri_total != 0 &&
1058 item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
1059 xlog_recover_add_item(&trans->r_itemq);
1060 }
1061 item = trans->r_itemq;
1062 item = item->ri_prev;
1063
1064 if (item->ri_total == 0) { /* first region to be added */
1065 item->ri_total = in_f->ilf_size;
1066 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
1067 item->ri_buf = kmem_zalloc((item->ri_total *
2b288ccf 1068 sizeof(xfs_log_iovec_t)), KM_SLEEP);
d321ceac
NS
1069 }
1070 ASSERT(item->ri_total > item->ri_cnt);
1071 /* Description region is ri_buf[0] */
1072 item->ri_buf[item->ri_cnt].i_addr = ptr;
1073 item->ri_buf[item->ri_cnt].i_len = len;
1074 item->ri_cnt++;
1075 return 0;
a562a63b 1076}
d321ceac
NS
1077
1078STATIC int
a562a63b
NS
1079xlog_recover_unmount_trans(
1080 xlog_recover_t *trans)
d321ceac
NS
1081{
1082 /* Do nothing now */
1083 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
a562a63b
NS
1084 return 0;
1085}
d321ceac 1086
a562a63b
NS
1087/*
1088 * There are two valid states of the r_state field. 0 indicates that the
1089 * transaction structure is in a normal state. We have either seen the
1090 * start of the transaction or the last operation we added was not a partial
1091 * operation. If the last operation we added to the transaction was a
1092 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
1093 *
1094 * NOTE: skip LRs with 0 data length.
1095 */
d321ceac 1096STATIC int
a562a63b
NS
1097xlog_recover_process_data(
1098 xlog_t *log,
1099 xlog_recover_t *rhash[],
1100 xlog_rec_header_t *rhead,
1101 xfs_caddr_t dp,
1102 int pass)
d321ceac 1103{
a562a63b
NS
1104 xfs_caddr_t lp;
1105 int num_logops;
1106 xlog_op_header_t *ohead;
1107 xlog_recover_t *trans;
1108 xlog_tid_t tid;
1109 int error;
1110 unsigned long hash;
1111 uint flags;
1112
1113 lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT);
1114 num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
1115
1116 /* check the log format matches our own - else we can't recover */
1117 if (xlog_header_check_recover(log->l_mp, rhead))
1118 return (XFS_ERROR(EIO));
1119
1120 while ((dp < lp) && num_logops) {
1121 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
1122 ohead = (xlog_op_header_t *)dp;
1123 dp += sizeof(xlog_op_header_t);
1124 if (ohead->oh_clientid != XFS_TRANSACTION &&
1125 ohead->oh_clientid != XFS_LOG) {
1126 xlog_warn(
1127 "XFS: xlog_recover_process_data: bad clientid");
1128 ASSERT(0);
1129 return (XFS_ERROR(EIO));
d321ceac 1130 }
a562a63b
NS
1131 tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
1132 hash = XLOG_RHASH(tid);
1133 trans = xlog_recover_find_tid(rhash[hash], tid);
1134 if (trans == NULL) { /* not found; add new tid */
1135 if (ohead->oh_flags & XLOG_START_TRANS)
1136 xlog_recover_new_tid(&rhash[hash], tid,
1137 INT_GET(rhead->h_lsn, ARCH_CONVERT));
1138 } else {
1139 ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
1140 flags = ohead->oh_flags & ~XLOG_END_TRANS;
1141 if (flags & XLOG_WAS_CONT_TRANS)
1142 flags &= ~XLOG_CONTINUE_TRANS;
1143 switch (flags) {
1144 case XLOG_COMMIT_TRANS:
1145 error = xlog_recover_commit_trans(log,
1146 &rhash[hash], trans, pass);
1147 break;
1148 case XLOG_UNMOUNT_TRANS:
1149 error = xlog_recover_unmount_trans(trans);
1150 break;
1151 case XLOG_WAS_CONT_TRANS:
1152 error = xlog_recover_add_to_cont_trans(trans,
1153 dp, INT_GET(ohead->oh_len,
1154 ARCH_CONVERT));
1155 break;
1156 case XLOG_START_TRANS:
1157 xlog_warn(
1158 "XFS: xlog_recover_process_data: bad transaction");
1159 ASSERT(0);
1160 error = XFS_ERROR(EIO);
1161 break;
1162 case 0:
1163 case XLOG_CONTINUE_TRANS:
1164 error = xlog_recover_add_to_trans(trans,
1165 dp, INT_GET(ohead->oh_len,
1166 ARCH_CONVERT));
1167 break;
1168 default:
1169 xlog_warn(
1170 "XFS: xlog_recover_process_data: bad flag");
1171 ASSERT(0);
1172 error = XFS_ERROR(EIO);
1173 break;
1174 }
1175 if (error)
1176 return error;
d321ceac 1177 }
a562a63b
NS
1178 dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
1179 num_logops--;
1180 }
1181 return 0;
1182}
d321ceac 1183
72c5917e
NS
1184STATIC int
1185xlog_valid_rec_header(
1186 xlog_t *log,
1187 xlog_rec_header_t *rhead,
1188 xfs_daddr_t blkno)
1189{
b0e364f6 1190 int hlen;
72c5917e
NS
1191
1192 if (unlikely(
1193 (INT_GET(rhead->h_magicno, ARCH_CONVERT) !=
1194 XLOG_HEADER_MAGIC_NUM))) {
1195 XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
1196 XFS_ERRLEVEL_LOW, log->l_mp);
1197 return XFS_ERROR(EFSCORRUPTED);
1198 }
1199 if (unlikely(
46eca962 1200 (!rhead->h_version ||
72c5917e
NS
1201 (INT_GET(rhead->h_version, ARCH_CONVERT) &
1202 (~XLOG_VERSION_OKBITS)) != 0))) {
1203 xlog_warn("XFS: %s: unrecognised log version (%d).",
1204 __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT));
1205 return XFS_ERROR(EIO);
1206 }
1207
1208 /* LR body must have data or it wouldn't have been written */
b0e364f6
NS
1209 hlen = INT_GET(rhead->h_len, ARCH_CONVERT);
1210 if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
72c5917e
NS
1211 XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
1212 XFS_ERRLEVEL_LOW, log->l_mp);
1213 return XFS_ERROR(EFSCORRUPTED);
1214 }
1215 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
1216 XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
1217 XFS_ERRLEVEL_LOW, log->l_mp);
1218 return XFS_ERROR(EFSCORRUPTED);
1219 }
1220 return 0;
1221}
1222
d321ceac
NS
1223/*
1224 * Read the log from tail to head and process the log records found.
1225 * Handle the two cases where the tail and head are in the same cycle
1226 * and where the active portion of the log wraps around the end of
4ed50f8a 1227 * the physical log separately. The pass parameter is passed through
d321ceac
NS
1228 * to the routines called to process the data and is not looked at
1229 * here.
1230 */
1231int
a562a63b
NS
1232xlog_do_recovery_pass(
1233 xlog_t *log,
1234 xfs_daddr_t head_blk,
1235 xfs_daddr_t tail_blk,
1236 int pass)
d321ceac 1237{
a562a63b
NS
1238 xlog_rec_header_t *rhead;
1239 xfs_daddr_t blk_no;
1240 xfs_caddr_t bufaddr, offset;
1241 xfs_buf_t *hbp, *dbp;
1242 int error = 0, h_size;
1243 int bblks, split_bblks;
1244 int hblks, split_hblks, wrapped_hblks;
1245 xlog_recover_t *rhash[XLOG_RHASH_SIZE];
1246
72c5917e
NS
1247 ASSERT(head_blk != tail_blk);
1248
73bf5988 1249 /*
a562a63b
NS
1250 * Read the header of the tail block and get the iclog buffer size from
1251 * h_size. Use this to tell how many sectors make up the log header.
73bf5988 1252 */
a562a63b
NS
1253 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
1254 /*
1255 * When using variable length iclogs, read first sector of
1256 * iclog header and extract the header size from it. Get a
1257 * new hbp that is the correct size.
1258 */
1259 hbp = xlog_get_bp(log, 1);
1260 if (!hbp)
1261 return ENOMEM;
1262 if ((error = xlog_bread(log, tail_blk, 1, hbp)))
1263 goto bread_err1;
1264 offset = xlog_align(log, tail_blk, 1, hbp);
1265 rhead = (xlog_rec_header_t *)offset;
72c5917e
NS
1266 error = xlog_valid_rec_header(log, rhead, tail_blk);
1267 if (error)
a562a63b 1268 goto bread_err1;
a562a63b 1269 h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
a562a63b
NS
1270 if ((INT_GET(rhead->h_version, ARCH_CONVERT)
1271 & XLOG_VERSION_2) &&
1272 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1273 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1274 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1275 hblks++;
1276 xlog_put_bp(hbp);
1277 hbp = xlog_get_bp(log, hblks);
1278 } else {
1279 hblks = 1;
1280 }
73bf5988 1281 } else {
a562a63b
NS
1282 ASSERT(log->l_sectbb_log == 0);
1283 hblks = 1;
1284 hbp = xlog_get_bp(log, 1);
1285 h_size = XLOG_BIG_RECORD_BSIZE;
73bf5988 1286 }
a562a63b
NS
1287
1288 if (!hbp)
1289 return ENOMEM;
1290 dbp = xlog_get_bp(log, BTOBB(h_size));
1291 if (!dbp) {
1292 xlog_put_bp(hbp);
1293 return ENOMEM;
d321ceac 1294 }
a562a63b
NS
1295
1296 memset(rhash, 0, sizeof(rhash));
1297 if (tail_blk <= head_blk) {
1298 for (blk_no = tail_blk; blk_no < head_blk; ) {
1299 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1300 goto bread_err2;
1301 offset = xlog_align(log, blk_no, hblks, hbp);
1302 rhead = (xlog_rec_header_t *)offset;
72c5917e
NS
1303 error = xlog_valid_rec_header(log, rhead, blk_no);
1304 if (error)
a562a63b 1305 goto bread_err2;
a562a63b 1306
a562a63b
NS
1307 /* blocks in data section */
1308 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
72c5917e
NS
1309 error = xlog_bread(log, blk_no + hblks, bblks, dbp);
1310 if (error)
1311 goto bread_err2;
1312 offset = xlog_align(log, blk_no + hblks, bblks, dbp);
1313 xlog_unpack_data(rhead, offset, log);
1314 if ((error = xlog_recover_process_data(log,
a562a63b 1315 rhash, rhead, offset, pass)))
72c5917e
NS
1316 goto bread_err2;
1317 blk_no += bblks + hblks;
73bf5988 1318 }
a562a63b
NS
1319 } else {
1320 /*
1321 * Perform recovery around the end of the physical log.
1322 * When the head is not on the same cycle number as the tail,
1323 * we can't do a sequential recovery as above.
1324 */
1325 blk_no = tail_blk;
1326 while (blk_no < log->l_logBBsize) {
1327 /*
1328 * Check for header wrapping around physical end-of-log
1329 */
72c5917e
NS
1330 offset = NULL;
1331 split_hblks = 0;
a562a63b 1332 wrapped_hblks = 0;
72c5917e 1333 if (blk_no + hblks <= log->l_logBBsize) {
a562a63b 1334 /* Read header in one read */
72c5917e
NS
1335 error = xlog_bread(log, blk_no, hblks, hbp);
1336 if (error)
a562a63b
NS
1337 goto bread_err2;
1338 offset = xlog_align(log, blk_no, hblks, hbp);
1339 } else {
1340 /* This LR is split across physical log end */
a562a63b
NS
1341 if (blk_no != log->l_logBBsize) {
1342 /* some data before physical log end */
1343 ASSERT(blk_no <= INT_MAX);
1344 split_hblks = log->l_logBBsize - (int)blk_no;
1345 ASSERT(split_hblks > 0);
1346 if ((error = xlog_bread(log, blk_no,
1347 split_hblks, hbp)))
1348 goto bread_err2;
1349 offset = xlog_align(log, blk_no,
1350 split_hblks, hbp);
1351 }
1352 /*
1353 * Note: this black magic still works with
1354 * large sector sizes (non-512) only because:
1355 * - we increased the buffer size originally
1356 * by 1 sector giving us enough extra space
1357 * for the second read;
1358 * - the log start is guaranteed to be sector
1359 * aligned;
1360 * - we read the log end (LR header start)
1361 * _first_, then the log start (LR header end)
1362 * - order is important.
1363 */
1364 bufaddr = XFS_BUF_PTR(hbp);
1365 XFS_BUF_SET_PTR(hbp,
1366 bufaddr + BBTOB(split_hblks),
1367 BBTOB(hblks - split_hblks));
1368 wrapped_hblks = hblks - split_hblks;
72c5917e
NS
1369 error = xlog_bread(log, 0, wrapped_hblks, hbp);
1370 if (error)
a562a63b 1371 goto bread_err2;
b0e364f6 1372 XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks));
a562a63b
NS
1373 if (!offset)
1374 offset = xlog_align(log, 0,
1375 wrapped_hblks, hbp);
1376 }
1377 rhead = (xlog_rec_header_t *)offset;
72c5917e
NS
1378 error = xlog_valid_rec_header(log, rhead,
1379 split_hblks ? blk_no : 0);
1380 if (error)
a562a63b 1381 goto bread_err2;
72c5917e
NS
1382
1383 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1384 blk_no += hblks;
a562a63b
NS
1385
1386 /* Read in data for log record */
72c5917e
NS
1387 if (blk_no + bblks <= log->l_logBBsize) {
1388 error = xlog_bread(log, blk_no, bblks, dbp);
1389 if (error)
a562a63b
NS
1390 goto bread_err2;
1391 offset = xlog_align(log, blk_no, bblks, dbp);
1392 } else {
1393 /* This log record is split across the
1394 * physical end of log */
1395 offset = NULL;
1396 split_bblks = 0;
1397 if (blk_no != log->l_logBBsize) {
1398 /* some data is before the physical
1399 * end of log */
1400 ASSERT(!wrapped_hblks);
1401 ASSERT(blk_no <= INT_MAX);
1402 split_bblks =
1403 log->l_logBBsize - (int)blk_no;
1404 ASSERT(split_bblks > 0);
1405 if ((error = xlog_bread(log, blk_no,
1406 split_bblks, dbp)))
1407 goto bread_err2;
1408 offset = xlog_align(log, blk_no,
1409 split_bblks, dbp);
1410 }
1411 /*
1412 * Note: this black magic still works with
1413 * large sector sizes (non-512) only because:
1414 * - we increased the buffer size originally
1415 * by 1 sector giving us enough extra space
1416 * for the second read;
1417 * - the log start is guaranteed to be sector
1418 * aligned;
1419 * - we read the log end (LR header start)
1420 * _first_, then the log start (LR header end)
1421 * - order is important.
1422 */
1423 bufaddr = XFS_BUF_PTR(dbp);
1424 XFS_BUF_SET_PTR(dbp,
1425 bufaddr + BBTOB(split_bblks),
1426 BBTOB(bblks - split_bblks));
1427 if ((error = xlog_bread(log, wrapped_hblks,
1428 bblks - split_bblks, dbp)))
1429 goto bread_err2;
b0e364f6 1430 XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
a562a63b
NS
1431 if (!offset)
1432 offset = xlog_align(log, wrapped_hblks,
1433 bblks - split_bblks, dbp);
1434 }
1435 xlog_unpack_data(rhead, offset, log);
1436 if ((error = xlog_recover_process_data(log, rhash,
72c5917e 1437 rhead, offset, pass)))
a562a63b
NS
1438 goto bread_err2;
1439 blk_no += bblks;
d321ceac 1440 }
d321ceac 1441
a562a63b
NS
1442 ASSERT(blk_no >= log->l_logBBsize);
1443 blk_no -= log->l_logBBsize;
1444
1445 /* read first part of physical log */
1446 while (blk_no < head_blk) {
1447 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1448 goto bread_err2;
1449 offset = xlog_align(log, blk_no, hblks, hbp);
1450 rhead = (xlog_rec_header_t *)offset;
72c5917e
NS
1451 error = xlog_valid_rec_header(log, rhead, blk_no);
1452 if (error)
1453 goto bread_err2;
a562a63b 1454 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
a562a63b
NS
1455 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
1456 goto bread_err2;
1457 offset = xlog_align(log, blk_no+hblks, bblks, dbp);
1458 xlog_unpack_data(rhead, offset, log);
1459 if ((error = xlog_recover_process_data(log, rhash,
72c5917e 1460 rhead, offset, pass)))
a562a63b 1461 goto bread_err2;
72c5917e 1462 blk_no += bblks + hblks;
a562a63b 1463 }
5000d01d 1464 }
d321ceac 1465
a562a63b
NS
1466 bread_err2:
1467 xlog_put_bp(dbp);
1468 bread_err1:
1469 xlog_put_bp(hbp);
1470 return error;
d321ceac 1471}