]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxlog/xfs_log_recover.c
Allow swab.h to be used in -pedantic c++ build environments.
[thirdparty/xfsprogs-dev.git] / libxlog / xfs_log_recover.c
CommitLineData
d321ceac 1/*
b0e364f6 2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
5000d01d 3 *
d321ceac
NS
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
5000d01d 7 *
d321ceac
NS
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5000d01d 11 *
d321ceac
NS
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
4ed50f8a 14 * or the like. Any license provided herein, whether implied or
d321ceac
NS
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
5000d01d 18 *
d321ceac
NS
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
5000d01d 22 *
d321ceac
NS
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
5000d01d
SL
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
d321ceac
NS
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
1d7e80ee 33#include <xfs/libxlog.h>
d321ceac 34
a562a63b
NS
35#define xlog_unpack_data_checksum(rhead, dp, log) ((void)0)
36#define xlog_clear_stale_blocks(log, tail_lsn) (0)
37#define xfs_readonly_buftarg(buftarg) (0)
38
d321ceac
NS
39/*
40 * This routine finds (to an approximation) the first block in the physical
4ed50f8a 41 * log which contains the given cycle. It uses a binary search algorithm.
d321ceac
NS
42 * Note that the algorithm can not be perfect because the disk will not
43 * necessarily be perfect.
44 */
45int
a562a63b
NS
46xlog_find_cycle_start(
47 xlog_t *log,
48 xfs_buf_t *bp,
49 xfs_daddr_t first_blk,
50 xfs_daddr_t *last_blk,
51 uint cycle)
d321ceac 52{
a562a63b 53 xfs_caddr_t offset;
ffe29fb5
NS
54 xfs_daddr_t mid_blk;
55 uint mid_cycle;
56 int error;
d321ceac
NS
57
58 mid_blk = BLK_AVG(first_blk, *last_blk);
59 while (mid_blk != first_blk && mid_blk != *last_blk) {
60 if ((error = xlog_bread(log, mid_blk, 1, bp)))
61 return error;
a562a63b
NS
62 offset = xlog_align(log, mid_blk, 1, bp);
63 mid_cycle = GET_CYCLE(offset, ARCH_CONVERT);
d321ceac
NS
64 if (mid_cycle == cycle) {
65 *last_blk = mid_blk;
66 /* last_half_cycle == mid_cycle */
67 } else {
68 first_blk = mid_blk;
69 /* first_half_cycle == mid_cycle */
70 }
71 mid_blk = BLK_AVG(first_blk, *last_blk);
72 }
73 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
74 (mid_blk == *last_blk && mid_blk-1 == first_blk));
75
76 return 0;
a562a63b 77}
d321ceac
NS
78
79/*
80 * Check that the range of blocks does not contain the cycle number
81 * given. The scan needs to occur from front to back and the ptr into the
82 * region must be updated since a later routine will need to perform another
83 * test. If the region is completely good, we end up returning the same
84 * last block number.
85 *
ce029dc1 86 * Set blkno to -1 if we encounter no errors. This is an invalid block number
d321ceac
NS
87 * since we don't ever expect logs to get this large.
88 */
ce029dc1 89STATIC int
a562a63b
NS
90xlog_find_verify_cycle(
91 xlog_t *log,
92 xfs_daddr_t start_blk,
93 int nbblks,
94 uint stop_on_cycle_no,
95 xfs_daddr_t *new_blk)
d321ceac 96{
a562a63b
NS
97 xfs_daddr_t i, j;
98 uint cycle;
99 xfs_buf_t *bp;
100 xfs_daddr_t bufblks;
101 xfs_caddr_t buf = NULL;
102 int error = 0;
85a875e9
ES
103
104 bufblks = 1 << ffs(nbblks);
d321ceac 105
a562a63b 106 while (!(bp = xlog_get_bp(log, bufblks))) {
5000d01d 107 /* can't get enough memory to do everything in one big buffer */
d321ceac 108 bufblks >>= 1;
a562a63b 109 if (bufblks <= log->l_sectbb_log)
5000d01d
SL
110 return ENOMEM;
111 }
112
ffe29fb5
NS
113 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
114 int bcount;
d321ceac 115
ffe29fb5 116 bcount = min(bufblks, (start_blk + nbblks - i));
d321ceac 117
5000d01d
SL
118 if ((error = xlog_bread(log, i, bcount, bp)))
119 goto out;
d321ceac 120
a562a63b 121 buf = xlog_align(log, i, bcount, bp);
d321ceac
NS
122 for (j = 0; j < bcount; j++) {
123 cycle = GET_CYCLE(buf, ARCH_CONVERT);
124 if (cycle == stop_on_cycle_no) {
e56fcdce 125 *new_blk = i+j;
d321ceac
NS
126 goto out;
127 }
5000d01d
SL
128
129 buf += BBSIZE;
d321ceac
NS
130 }
131 }
132
ce029dc1 133 *new_blk = -1;
d321ceac
NS
134
135out:
136 xlog_put_bp(bp);
d321ceac 137 return error;
a562a63b 138}
d321ceac
NS
139
140/*
141 * Potentially backup over partial log record write.
142 *
143 * In the typical case, last_blk is the number of the block directly after
144 * a good log record. Therefore, we subtract one to get the block number
145 * of the last block in the given buffer. extra_bblks contains the number
146 * of blocks we would have read on a previous read. This happens when the
147 * last log record is split over the end of the physical log.
148 *
149 * extra_bblks is the number of blocks potentially verified on a previous
150 * call to this routine.
151 */
d321ceac 152STATIC int
a562a63b
NS
153xlog_find_verify_log_record(
154 xlog_t *log,
155 xfs_daddr_t start_blk,
156 xfs_daddr_t *last_blk,
157 int extra_bblks)
d321ceac 158{
a562a63b
NS
159 xfs_daddr_t i;
160 xfs_buf_t *bp;
161 xfs_caddr_t offset = NULL;
162 xlog_rec_header_t *head = NULL;
163 int error = 0;
164 int smallmem = 0;
165 int num_blks = *last_blk - start_blk;
166 int xhdrs;
167
168 ASSERT(start_blk != 0 || *last_blk != start_blk);
169
170 if (!(bp = xlog_get_bp(log, num_blks))) {
171 if (!(bp = xlog_get_bp(log, 1)))
172 return ENOMEM;
173 smallmem = 1;
174 } else {
175 if ((error = xlog_bread(log, start_blk, num_blks, bp)))
176 goto out;
177 offset = xlog_align(log, start_blk, num_blks, bp);
178 offset += ((num_blks - 1) << BBSHIFT);
d321ceac
NS
179 }
180
a562a63b
NS
181 for (i = (*last_blk) - 1; i >= 0; i--) {
182 if (i < start_blk) {
05bba5b7 183 /* valid log record not found */
a562a63b
NS
184 xlog_warn(
185 "XFS: Log inconsistent (didn't find previous header)");
186 ASSERT(0);
187 error = XFS_ERROR(EIO);
188 goto out;
189 }
d321ceac 190
a562a63b
NS
191 if (smallmem) {
192 if ((error = xlog_bread(log, i, 1, bp)))
193 goto out;
194 offset = xlog_align(log, i, 1, bp);
195 }
196
197 head = (xlog_rec_header_t *)offset;
d321ceac 198
a562a63b
NS
199 if (XLOG_HEADER_MAGIC_NUM ==
200 INT_GET(head->h_magicno, ARCH_CONVERT))
201 break;
202
203 if (!smallmem)
204 offset -= BBSIZE;
205 }
206
207 /*
208 * We hit the beginning of the physical log & still no header. Return
209 * to caller. If caller can handle a return of -1, then this routine
210 * will be called again for the end of the physical log.
211 */
212 if (i == -1) {
213 error = -1;
214 goto out;
215 }
216
217 /*
218 * We have the final block of the good log (the first block
219 * of the log record _before_ the head. So we check the uuid.
220 */
221 if ((error = xlog_header_check_mount(log->l_mp, head)))
222 goto out;
223
224 /*
225 * We may have found a log record header before we expected one.
226 * last_blk will be the 1st block # with a given cycle #. We may end
227 * up reading an entire log record. In this case, we don't want to
228 * reset last_blk. Only when last_blk points in the middle of a log
229 * record do we update last_blk.
230 */
231 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
232 uint h_size = INT_GET(head->h_size, ARCH_CONVERT);
233
234 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
235 if (h_size % XLOG_HEADER_CYCLE_SIZE)
236 xhdrs++;
237 } else {
238 xhdrs = 1;
239 }
240
241 if (*last_blk - i + extra_bblks
242 != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs)
243 *last_blk = i;
244
245out:
246 xlog_put_bp(bp);
247 return error;
248}
d321ceac
NS
249
250/*
251 * Head is defined to be the point of the log where the next log write
252 * write could go. This means that incomplete LR writes at the end are
253 * eliminated when calculating the head. We aren't guaranteed that previous
5000d01d 254 * LR have complete transactions. We only know that a cycle number of
d321ceac
NS
255 * current cycle number -1 won't be present in the log if we start writing
256 * from our current block number.
257 *
258 * last_blk contains the block number of the first block with a given
259 * cycle number.
260 *
d321ceac
NS
261 * Return: zero if normal, non-zero if error.
262 */
263int
a562a63b
NS
264xlog_find_head(
265 xlog_t *log,
266 xfs_daddr_t *return_head_blk)
d321ceac 267{
a562a63b
NS
268 xfs_buf_t *bp;
269 xfs_caddr_t offset;
270 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
271 int num_scan_bblks;
272 uint first_half_cycle, last_half_cycle;
273 uint stop_on_cycle;
274 int error, log_bbnum = log->l_logBBsize;
275
276 /* Is the end of the log device zeroed? */
277 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
278 *return_head_blk = first_blk;
279
280 /* Is the whole lot zeroed? */
281 if (!first_blk) {
282 /* Linux XFS shouldn't generate totally zeroed logs -
283 * mkfs etc write a dummy unmount record to a fresh
284 * log so we can store the uuid in there
285 */
286 xlog_warn("XFS: totally zeroed log");
287 }
288
289 return 0;
290 } else if (error) {
291 xlog_warn("XFS: empty log check failed");
292 return error;
5000d01d
SL
293 }
294
a562a63b
NS
295 first_blk = 0; /* get cycle # of 1st block */
296 bp = xlog_get_bp(log, 1);
297 if (!bp)
298 return ENOMEM;
299 if ((error = xlog_bread(log, 0, 1, bp)))
300 goto bp_err;
301 offset = xlog_align(log, 0, 1, bp);
302 first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
303
304 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
305 if ((error = xlog_bread(log, last_blk, 1, bp)))
306 goto bp_err;
307 offset = xlog_align(log, last_blk, 1, bp);
308 last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
309 ASSERT(last_half_cycle != 0);
310
d321ceac 311 /*
a562a63b
NS
312 * If the 1st half cycle number is equal to the last half cycle number,
313 * then the entire log is stamped with the same cycle number. In this
314 * case, head_blk can't be set to zero (which makes sense). The below
315 * math doesn't work out properly with head_blk equal to zero. Instead,
05bba5b7 316 * we set it to log_bbnum which is an invalid block number, but this
a562a63b
NS
317 * value makes the math correct. If head_blk doesn't changed through
318 * all the tests below, *head_blk is set to zero at the very end rather
319 * than log_bbnum. In a sense, log_bbnum and zero are the same block
320 * in a circular file.
d321ceac 321 */
a562a63b
NS
322 if (first_half_cycle == last_half_cycle) {
323 /*
324 * In this case we believe that the entire log should have
325 * cycle number last_half_cycle. We need to scan backwards
326 * from the end verifying that there are no holes still
327 * containing last_half_cycle - 1. If we find such a hole,
328 * then the start of that hole will be the new head. The
329 * simple case looks like
330 * x | x ... | x - 1 | x
331 * Another case that fits this picture would be
332 * x | x + 1 | x ... | x
333 * In this case the head really is somwhere at the end of the
334 * log, as one of the latest writes at the beginning was
335 * incomplete.
336 * One more case is
337 * x | x + 1 | x ... | x - 1 | x
338 * This is really the combination of the above two cases, and
339 * the head has to end up at the start of the x-1 hole at the
340 * end of the log.
341 *
342 * In the 256k log case, we will read from the beginning to the
343 * end of the log and search for cycle numbers equal to x-1.
344 * We don't worry about the x+1 blocks that we encounter,
345 * because we know that they cannot be the head since the log
346 * started with x.
347 */
348 head_blk = log_bbnum;
349 stop_on_cycle = last_half_cycle - 1;
350 } else {
351 /*
352 * In this case we want to find the first block with cycle
353 * number matching last_half_cycle. We expect the log to be
354 * some variation on
355 * x + 1 ... | x ...
356 * The first block with cycle number x (last_half_cycle) will
357 * be where the new head belongs. First we do a binary search
358 * for the first occurrence of last_half_cycle. The binary
359 * search may not be totally accurate, so then we scan back
360 * from there looking for occurrences of last_half_cycle before
361 * us. If that backwards scan wraps around the beginning of
362 * the log, then we look for occurrences of last_half_cycle - 1
363 * at the end of the log. The cases we're looking for look
364 * like
365 * x + 1 ... | x | x + 1 | x ...
366 * ^ binary search stopped here
367 * or
368 * x + 1 ... | x ... | x - 1 | x
369 * <---------> less than scan distance
370 */
371 stop_on_cycle = last_half_cycle;
372 if ((error = xlog_find_cycle_start(log, bp, first_blk,
373 &head_blk, last_half_cycle)))
374 goto bp_err;
375 }
376
d321ceac 377 /*
a562a63b
NS
378 * Now validate the answer. Scan back some number of maximum possible
379 * blocks and make sure each one has the expected cycle number. The
380 * maximum is determined by the total possible amount of buffering
381 * in the in-core log. The following number can be made tighter if
382 * we actually look at the block size of the filesystem.
d321ceac 383 */
a562a63b
NS
384 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
385 if (head_blk >= num_scan_bblks) {
386 /*
387 * We are guaranteed that the entire check can be performed
388 * in one buffer.
389 */
390 start_blk = head_blk - num_scan_bblks;
391 if ((error = xlog_find_verify_cycle(log,
392 start_blk, num_scan_bblks,
393 stop_on_cycle, &new_blk)))
394 goto bp_err;
395 if (new_blk != -1)
396 head_blk = new_blk;
397 } else { /* need to read 2 parts of log */
398 /*
399 * We are going to scan backwards in the log in two parts.
400 * First we scan the physical end of the log. In this part
401 * of the log, we are looking for blocks with cycle number
402 * last_half_cycle - 1.
403 * If we find one, then we know that the log starts there, as
404 * we've found a hole that didn't get written in going around
405 * the end of the physical log. The simple case for this is
406 * x + 1 ... | x ... | x - 1 | x
407 * <---------> less than scan distance
408 * If all of the blocks at the end of the log have cycle number
409 * last_half_cycle, then we check the blocks at the start of
410 * the log looking for occurrences of last_half_cycle. If we
411 * find one, then our current estimate for the location of the
412 * first occurrence of last_half_cycle is wrong and we move
413 * back to the hole we've found. This case looks like
414 * x + 1 ... | x | x + 1 | x ...
415 * ^ binary search stopped here
416 * Another case we need to handle that only occurs in 256k
417 * logs is
418 * x + 1 ... | x ... | x+1 | x ...
419 * ^ binary search stops here
420 * In a 256k log, the scan at the end of the log will see the
421 * x + 1 blocks. We need to skip past those since that is
422 * certainly not the head of the log. By searching for
423 * last_half_cycle-1 we accomplish that.
424 */
425 start_blk = log_bbnum - num_scan_bblks + head_blk;
426 ASSERT(head_blk <= INT_MAX &&
427 (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
428 if ((error = xlog_find_verify_cycle(log, start_blk,
429 num_scan_bblks - (int)head_blk,
430 (stop_on_cycle - 1), &new_blk)))
431 goto bp_err;
432 if (new_blk != -1) {
433 head_blk = new_blk;
434 goto bad_blk;
435 }
436
437 /*
438 * Scan beginning of log now. The last part of the physical
439 * log is good. This scan needs to verify that it doesn't find
440 * the last_half_cycle.
441 */
442 start_blk = 0;
443 ASSERT(head_blk <= INT_MAX);
444 if ((error = xlog_find_verify_cycle(log,
445 start_blk, (int)head_blk,
446 stop_on_cycle, &new_blk)))
447 goto bp_err;
448 if (new_blk != -1)
449 head_blk = new_blk;
450 }
451
452 bad_blk:
5000d01d 453 /*
a562a63b
NS
454 * Now we need to make sure head_blk is not pointing to a block in
455 * the middle of a log record.
d321ceac 456 */
a562a63b
NS
457 num_scan_bblks = XLOG_REC_SHIFT(log);
458 if (head_blk >= num_scan_bblks) {
459 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
460
461 /* start ptr at last block ptr before head_blk */
462 if ((error = xlog_find_verify_log_record(log, start_blk,
463 &head_blk, 0)) == -1) {
464 error = XFS_ERROR(EIO);
465 goto bp_err;
466 } else if (error)
467 goto bp_err;
468 } else {
469 start_blk = 0;
470 ASSERT(head_blk <= INT_MAX);
471 if ((error = xlog_find_verify_log_record(log, start_blk,
472 &head_blk, 0)) == -1) {
473 /* We hit the beginning of the log during our search */
474 start_blk = log_bbnum - num_scan_bblks + head_blk;
475 new_blk = log_bbnum;
476 ASSERT(start_blk <= INT_MAX &&
477 (xfs_daddr_t) log_bbnum-start_blk >= 0);
478 ASSERT(head_blk <= INT_MAX);
479 if ((error = xlog_find_verify_log_record(log,
480 start_blk, &new_blk,
481 (int)head_blk)) == -1) {
482 error = XFS_ERROR(EIO);
483 goto bp_err;
484 } else if (error)
485 goto bp_err;
486 if (new_blk != log_bbnum)
487 head_blk = new_blk;
488 } else if (error)
489 goto bp_err;
d321ceac
NS
490 }
491
a562a63b
NS
492 xlog_put_bp(bp);
493 if (head_blk == log_bbnum)
494 *return_head_blk = 0;
495 else
496 *return_head_blk = head_blk;
d321ceac 497 /*
a562a63b
NS
498 * When returning here, we have a good block number. Bad block
499 * means that during a previous crash, we didn't have a clean break
500 * from cycle number N to cycle number N-1. In this case, we need
501 * to find the first block with cycle number N-1.
d321ceac 502 */
a562a63b 503 return 0;
d321ceac 504
a562a63b 505 bp_err:
d321ceac
NS
506 xlog_put_bp(bp);
507
5000d01d
SL
508 if (error)
509 xlog_warn("XFS: failed to find log head");
d321ceac 510 return error;
a562a63b 511}
d321ceac
NS
512
513/*
514 * Find the sync block number or the tail of the log.
515 *
516 * This will be the block number of the last record to have its
517 * associated buffers synced to disk. Every log record header has
518 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
4ed50f8a 519 * to get a sync block number. The only concern is to figure out which
d321ceac
NS
520 * log record header to believe.
521 *
522 * The following algorithm uses the log record header with the largest
4ed50f8a 523 * lsn. The entire log record does not need to be valid. We only care
d321ceac
NS
524 * that the header is valid.
525 *
526 * We could speed up search by using current head_blk buffer, but it is not
527 * available.
528 */
529int
a562a63b
NS
530xlog_find_tail(
531 xlog_t *log,
532 xfs_daddr_t *head_blk,
533 xfs_daddr_t *tail_blk,
534 int readonly)
d321ceac
NS
535{
536 xlog_rec_header_t *rhead;
537 xlog_op_header_t *op_head;
a562a63b 538 xfs_caddr_t offset = NULL;
d321ceac
NS
539 xfs_buf_t *bp;
540 int error, i, found;
541 xfs_daddr_t umount_data_blk;
542 xfs_daddr_t after_umount_blk;
543 xfs_lsn_t tail_lsn;
73bf5988 544 int hblks;
5000d01d 545
1b6a0044 546 found = 0;
d321ceac
NS
547
548 /*
5000d01d 549 * Find previous log record
d321ceac
NS
550 */
551 if ((error = xlog_find_head(log, head_blk)))
552 return error;
553
a562a63b 554 bp = xlog_get_bp(log, 1);
d321ceac 555 if (!bp)
ce029dc1 556 return ENOMEM;
d321ceac
NS
557 if (*head_blk == 0) { /* special case */
558 if ((error = xlog_bread(log, 0, 1, bp)))
559 goto bread_err;
a562a63b
NS
560 offset = xlog_align(log, 0, 1, bp);
561 if (GET_CYCLE(offset, ARCH_CONVERT) == 0) {
d321ceac
NS
562 *tail_blk = 0;
563 /* leave all other log inited values alone */
564 goto exit;
565 }
566 }
567
568 /*
569 * Search backwards looking for log record header block
570 */
571 ASSERT(*head_blk < INT_MAX);
1b6a0044 572 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
d321ceac
NS
573 if ((error = xlog_bread(log, i, 1, bp)))
574 goto bread_err;
a562a63b 575 offset = xlog_align(log, i, 1, bp);
1b6a0044 576 if (XLOG_HEADER_MAGIC_NUM ==
a562a63b 577 INT_GET(*(uint *)offset, ARCH_CONVERT)) {
d321ceac
NS
578 found = 1;
579 break;
580 }
581 }
582 /*
583 * If we haven't found the log record header block, start looking
584 * again from the end of the physical log. XXXmiken: There should be
585 * a check here to make sure we didn't search more than N blocks in
586 * the previous code.
587 */
588 if (!found) {
1b6a0044 589 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
d321ceac
NS
590 if ((error = xlog_bread(log, i, 1, bp)))
591 goto bread_err;
a562a63b 592 offset = xlog_align(log, i, 1, bp);
1b6a0044 593 if (XLOG_HEADER_MAGIC_NUM ==
a562a63b 594 INT_GET(*(uint*)offset, ARCH_CONVERT)) {
d321ceac
NS
595 found = 2;
596 break;
597 }
598 }
599 }
600 if (!found) {
601 xlog_warn("XFS: xlog_find_tail: couldn't find sync record");
602 ASSERT(0);
603 return XFS_ERROR(EIO);
604 }
605
606 /* find blk_no of tail of log */
a562a63b 607 rhead = (xlog_rec_header_t *)offset;
46eca962 608 *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT));
d321ceac
NS
609
610 /*
611 * Reset log values according to the state of the log when we
612 * crashed. In the case where head_blk == 0, we bump curr_cycle
613 * one because the next write starts a new cycle rather than
614 * continuing the cycle of the last good log record. At this
615 * point we have guaranteed that all partial log records have been
616 * accounted for. Therefore, we know that the last good log record
617 * written was complete and ended exactly on the end boundary
618 * of the physical log.
619 */
620 log->l_prev_block = i;
621 log->l_curr_block = (int)*head_blk;
622 log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
623 if (found == 2)
624 log->l_curr_cycle++;
625 log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
626 log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
627 log->l_grant_reserve_cycle = log->l_curr_cycle;
628 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
629 log->l_grant_write_cycle = log->l_curr_cycle;
630 log->l_grant_write_bytes = BBTOB(log->l_curr_block);
631
632 /*
633 * Look for unmount record. If we find it, then we know there
4ed50f8a 634 * was a clean unmount. Since 'i' could be the last block in
d321ceac
NS
635 * the physical log, we convert to a log block before comparing
636 * to the head_blk.
637 *
638 * Save the current tail lsn to use to pass to
639 * xlog_clear_stale_blocks() below. We won't want to clear the
640 * unmount record if there is one, so we pass the lsn of the
641 * unmount record rather than the block after it.
642 */
73bf5988
SL
643 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
644 int h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
645 int h_version = INT_GET(rhead->h_version, ARCH_CONVERT);
1b6a0044
NS
646
647 if ((h_version & XLOG_VERSION_2) &&
73bf5988
SL
648 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
649 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
650 if (h_size % XLOG_HEADER_CYCLE_SIZE)
651 hblks++;
652 } else {
653 hblks = 1;
654 }
655 } else {
656 hblks = 1;
657 }
1b6a0044
NS
658 after_umount_blk = (i + hblks + (int)
659 BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize;
d321ceac 660 tail_lsn = log->l_tail_lsn;
1b6a0044
NS
661 if (*head_blk == after_umount_blk &&
662 INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
73bf5988 663 umount_data_blk = (i + hblks) % log->l_logBBsize;
d321ceac
NS
664 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
665 goto bread_err;
666 }
a562a63b
NS
667 offset = xlog_align(log, umount_data_blk, 1, bp);
668 op_head = (xlog_op_header_t *)offset;
d321ceac
NS
669 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
670 /*
671 * Set tail and last sync so that newly written
672 * log records will point recovery to after the
673 * current unmount record.
674 */
46eca962
NS
675 ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle,
676 after_umount_blk);
677 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
678 after_umount_blk);
d321ceac 679 *tail_blk = after_umount_blk;
46eca962
NS
680
681 /*
682 * Note that the unmount was clean. If the unmount
683 * was not clean, we need to know this to rebuild the
684 * superblock counters from the perag headers if we
685 * have a filesystem using non-persistent counters.
686 */
687 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
d321ceac
NS
688 }
689 }
690
d321ceac
NS
691 /*
692 * Make sure that there are no blocks in front of the head
693 * with the same cycle number as the head. This can happen
694 * because we allow multiple outstanding log writes concurrently,
695 * and the later writes might make it out before earlier ones.
696 *
697 * We use the lsn from before modifying it so that we'll never
698 * overwrite the unmount record after a clean unmount.
699 *
700 * Do this only if we are going to recover the filesystem
32181a02
NS
701 *
702 * NOTE: This used to say "if (!readonly)"
703 * However on Linux, we can & do recover a read-only filesystem.
704 * We only skip recovery if NORECOVERY is specified on mount,
705 * in which case we would not be here.
706 *
707 * But... if the -device- itself is readonly, just skip this.
708 * We can't recover this device anyway, so it won't matter.
d321ceac 709 */
a562a63b 710 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
d321ceac 711 error = xlog_clear_stale_blocks(log, tail_lsn);
32181a02 712 }
d321ceac
NS
713
714bread_err:
715exit:
716 xlog_put_bp(bp);
717
5000d01d
SL
718 if (error)
719 xlog_warn("XFS: failed to locate log tail");
d321ceac 720 return error;
a562a63b 721}
4ed50f8a 722
d321ceac
NS
723/*
724 * Is the log zeroed at all?
725 *
726 * The last binary search should be changed to perform an X block read
4ed50f8a 727 * once X becomes small enough. You can then search linearly through
d321ceac
NS
728 * the X blocks. This will cut down on the number of reads we need to do.
729 *
730 * If the log is partially zeroed, this routine will pass back the blkno
731 * of the first block with cycle number 0. It won't have a complete LR
732 * preceding it.
733 *
734 * Return:
735 * 0 => the log is completely written to
736 * -1 => use *blk_no as the first block of the log
737 * >0 => error has occurred
738 */
739int
a562a63b
NS
740xlog_find_zeroed(
741 xlog_t *log,
742 xfs_daddr_t *blk_no)
d321ceac
NS
743{
744 xfs_buf_t *bp;
a562a63b 745 xfs_caddr_t offset;
4ed50f8a 746 uint first_cycle, last_cycle;
d321ceac 747 xfs_daddr_t new_blk, last_blk, start_blk;
4ed50f8a
RC
748 xfs_daddr_t num_scan_bblks;
749 int error, log_bbnum = log->l_logBBsize;
d321ceac 750
d321ceac 751 /* check totally zeroed log */
a562a63b 752 bp = xlog_get_bp(log, 1);
d321ceac 753 if (!bp)
ce029dc1 754 return ENOMEM;
d321ceac
NS
755 if ((error = xlog_bread(log, 0, 1, bp)))
756 goto bp_err;
a562a63b
NS
757 offset = xlog_align(log, 0, 1, bp);
758 first_cycle = GET_CYCLE(offset, ARCH_CONVERT);
d321ceac
NS
759 if (first_cycle == 0) { /* completely zeroed log */
760 *blk_no = 0;
761 xlog_put_bp(bp);
762 return -1;
763 }
764
765 /* check partially zeroed log */
766 if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
767 goto bp_err;
a562a63b
NS
768 offset = xlog_align(log, log_bbnum-1, 1, bp);
769 last_cycle = GET_CYCLE(offset, ARCH_CONVERT);
d321ceac
NS
770 if (last_cycle != 0) { /* log completely written to */
771 xlog_put_bp(bp);
772 return 0;
773 } else if (first_cycle != 1) {
774 /*
775 * If the cycle of the last block is zero, the cycle of
5000d01d
SL
776 * the first block must be 1. If it's not, maybe we're
777 * not looking at a log... Bail out.
d321ceac 778 */
5000d01d 779 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)");
d321ceac
NS
780 return XFS_ERROR(EINVAL);
781 }
5000d01d 782
d321ceac
NS
783 /* we have a partially zeroed log */
784 last_blk = log_bbnum-1;
785 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
786 goto bp_err;
787
788 /*
4ed50f8a 789 * Validate the answer. Because there is no way to guarantee that
d321ceac
NS
790 * the entire log is made up of log records which are the same size,
791 * we scan over the defined maximum blocks. At this point, the maximum
792 * is not chosen to mean anything special. XXXmiken
793 */
73bf5988 794 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
d321ceac 795 ASSERT(num_scan_bblks <= INT_MAX);
5000d01d 796
d321ceac
NS
797 if (last_blk < num_scan_bblks)
798 num_scan_bblks = last_blk;
799 start_blk = last_blk - num_scan_bblks;
5000d01d 800
d321ceac
NS
801 /*
802 * We search for any instances of cycle number 0 that occur before
803 * our current estimate of the head. What we're trying to detect is
4ed50f8a
RC
804 * 1 ... | 0 | 1 | 0...
805 * ^ binary search ends here
d321ceac 806 */
ce029dc1
ES
807 if ((error = xlog_find_verify_cycle(log, start_blk,
808 (int)num_scan_bblks, 0, &new_blk)))
606d804d 809 goto bp_err;
ce029dc1
ES
810 if (new_blk != -1)
811 last_blk = new_blk;
d321ceac
NS
812
813 /*
814 * Potentially backup over partial log record write. We don't need
815 * to search the end of the log because we know it is zero.
816 */
5000d01d 817 if ((error = xlog_find_verify_log_record(log, start_blk,
79c48ada
ES
818 &last_blk, 0)) == -1) {
819 error = XFS_ERROR(EIO);
820 goto bp_err;
821 } else if (error)
d321ceac
NS
822 goto bp_err;
823
824 *blk_no = last_blk;
825bp_err:
826 xlog_put_bp(bp);
827 if (error)
828 return error;
829 return -1;
a562a63b 830}
d321ceac 831
d321ceac 832STATIC void
a562a63b
NS
833xlog_unpack_data(
834 xlog_rec_header_t *rhead,
835 xfs_caddr_t dp,
836 xlog_t *log)
d321ceac 837{
a562a63b
NS
838 int i, j, k;
839 xlog_in_core_2_t *xhdr;
840
841 for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
73bf5988 842 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
5ce1d1f7 843 *(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
d321ceac
NS
844 dp += BBSIZE;
845 }
73bf5988
SL
846
847 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
a562a63b 848 xhdr = (xlog_in_core_2_t *)rhead;
73bf5988
SL
849 for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
850 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
851 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
852 *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
853 dp += BBSIZE;
854 }
855 }
856
a562a63b
NS
857 xlog_unpack_data_checksum(rhead, dp, log);
858}
d321ceac 859
d321ceac 860STATIC xlog_recover_t *
a562a63b
NS
861xlog_recover_find_tid(
862 xlog_recover_t *q,
863 xlog_tid_t tid)
d321ceac 864{
a562a63b 865 xlog_recover_t *p = q;
d321ceac
NS
866
867 while (p != NULL) {
868 if (p->r_log_tid == tid)
869 break;
870 p = p->r_next;
871 }
872 return p;
a562a63b 873}
4ed50f8a 874
d321ceac 875STATIC void
a562a63b
NS
876xlog_recover_put_hashq(
877 xlog_recover_t **q,
878 xlog_recover_t *trans)
d321ceac
NS
879{
880 trans->r_next = *q;
881 *q = trans;
a562a63b 882}
4ed50f8a 883
d321ceac 884STATIC void
a562a63b
NS
885xlog_recover_new_tid(
886 xlog_recover_t **q,
887 xlog_tid_t tid,
888 xfs_lsn_t lsn)
d321ceac 889{
a562a63b 890 xlog_recover_t *trans;
d321ceac 891
a562a63b 892 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
d321ceac
NS
893 trans->r_log_tid = tid;
894 trans->r_lsn = lsn;
895 xlog_recover_put_hashq(q, trans);
a562a63b 896}
d321ceac
NS
897
898STATIC int
a562a63b
NS
899xlog_recover_unlink_tid(
900 xlog_recover_t **q,
901 xlog_recover_t *trans)
d321ceac 902{
a562a63b
NS
903 xlog_recover_t *tp;
904 int found = 0;
d321ceac
NS
905
906 ASSERT(trans != 0);
907 if (trans == *q) {
908 *q = (*q)->r_next;
909 } else {
910 tp = *q;
911 while (tp != 0) {
912 if (tp->r_next == trans) {
913 found = 1;
914 break;
915 }
916 tp = tp->r_next;
917 }
918 if (!found) {
919 xlog_warn(
920 "XFS: xlog_recover_unlink_tid: trans not found");
921 ASSERT(0);
922 return XFS_ERROR(EIO);
923 }
924 tp->r_next = tp->r_next->r_next;
925 }
926 return 0;
a562a63b 927}
d321ceac
NS
928
929/*
930 * Free up any resources allocated by the transaction
931 *
932 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
933 */
934STATIC void
a562a63b
NS
935xlog_recover_free_trans(
936 xlog_recover_t *trans)
d321ceac 937{
a562a63b
NS
938 xlog_recover_item_t *first_item, *item, *free_item;
939 int i;
d321ceac
NS
940
941 item = first_item = trans->r_itemq;
942 do {
943 free_item = item;
944 item = item->ri_next;
945 /* Free the regions in the item. */
946 for (i = 0; i < free_item->ri_cnt; i++) {
947 kmem_free(free_item->ri_buf[i].i_addr,
948 free_item->ri_buf[i].i_len);
949 }
950 /* Free the item itself */
951 kmem_free(free_item->ri_buf,
952 (free_item->ri_total * sizeof(xfs_log_iovec_t)));
953 kmem_free(free_item, sizeof(xlog_recover_item_t));
954 } while (first_item != item);
955 /* Free the transaction recover structure */
956 kmem_free(trans, sizeof(xlog_recover_t));
a562a63b 957}
d321ceac
NS
958
959STATIC int
a562a63b
NS
960xlog_recover_commit_trans(
961 xlog_t *log,
962 xlog_recover_t **q,
963 xlog_recover_t *trans,
964 int pass)
d321ceac 965{
a562a63b 966 int error;
d321ceac
NS
967
968 if ((error = xlog_recover_unlink_tid(q, trans)))
969 return error;
970 if ((error = xlog_recover_do_trans(log, trans, pass)))
971 return error;
972 xlog_recover_free_trans(trans); /* no error */
973 return 0;
a562a63b 974}
d321ceac
NS
975
976STATIC void
a562a63b
NS
977xlog_recover_insert_item_backq(
978 xlog_recover_item_t **q,
979 xlog_recover_item_t *item)
d321ceac
NS
980{
981 if (*q == 0) {
982 item->ri_prev = item->ri_next = item;
983 *q = item;
984 } else {
985 item->ri_next = *q;
986 item->ri_prev = (*q)->ri_prev;
987 (*q)->ri_prev = item;
988 item->ri_prev->ri_next = item;
989 }
a562a63b 990}
d321ceac
NS
991
992STATIC void
a562a63b
NS
993xlog_recover_add_item(
994 xlog_recover_item_t **itemq)
d321ceac 995{
a562a63b 996 xlog_recover_item_t *item;
d321ceac 997
2b288ccf 998 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
d321ceac 999 xlog_recover_insert_item_backq(itemq, item);
a562a63b 1000}
d321ceac 1001
a562a63b
NS
1002STATIC int
1003xlog_recover_add_to_cont_trans(
1004 xlog_recover_t *trans,
1005 xfs_caddr_t dp,
1006 int len)
1007{
1008 xlog_recover_item_t *item;
1009 xfs_caddr_t ptr, old_ptr;
1010 int old_len;
1011
1012 item = trans->r_itemq;
1013 if (item == 0) {
1014 /* finish copying rest of trans header */
1015 xlog_recover_add_item(&trans->r_itemq);
1016 ptr = (xfs_caddr_t) &trans->r_theader +
1017 sizeof(xfs_trans_header_t) - len;
1018 memcpy(ptr, dp, len); /* d, s, l */
1019 return 0;
1020 }
1021 item = item->ri_prev;
1022
1023 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1024 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1025
6239071d 1026 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
a562a63b
NS
1027 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1028 item->ri_buf[item->ri_cnt-1].i_len += len;
1029 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1030 return 0;
1031}
1032
1033/*
1034 * The next region to add is the start of a new region. It could be
d321ceac
NS
1035 * a whole region or it could be the first part of a new region. Because
1036 * of this, the assumption here is that the type and size fields of all
1037 * format structures fit into the first 32 bits of the structure.
1038 *
1039 * This works because all regions must be 32 bit aligned. Therefore, we
1040 * either have both fields or we have neither field. In the case we have
1041 * neither field, the data part of the region is zero length. We only have
1042 * a log_op_header and can throw away the header since a new one will appear
1043 * later. If we have at least 4 bytes, then we can determine how many regions
1044 * will appear in the current log item.
1045 */
1046STATIC int
a562a63b
NS
1047xlog_recover_add_to_trans(
1048 xlog_recover_t *trans,
1049 xfs_caddr_t dp,
1050 int len)
d321ceac 1051{
a562a63b
NS
1052 xfs_inode_log_format_t *in_f; /* any will do */
1053 xlog_recover_item_t *item;
1054 xfs_caddr_t ptr;
d321ceac
NS
1055
1056 if (!len)
1057 return 0;
d321ceac
NS
1058 item = trans->r_itemq;
1059 if (item == 0) {
1060 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
1061 if (len == sizeof(xfs_trans_header_t))
1062 xlog_recover_add_item(&trans->r_itemq);
32181a02 1063 memcpy(&trans->r_theader, dp, len); /* d, s, l */
d321ceac
NS
1064 return 0;
1065 }
a562a63b
NS
1066
1067 ptr = kmem_alloc(len, KM_SLEEP);
1068 memcpy(ptr, dp, len);
1069 in_f = (xfs_inode_log_format_t *)ptr;
1070
d321ceac
NS
1071 if (item->ri_prev->ri_total != 0 &&
1072 item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
1073 xlog_recover_add_item(&trans->r_itemq);
1074 }
1075 item = trans->r_itemq;
1076 item = item->ri_prev;
1077
1078 if (item->ri_total == 0) { /* first region to be added */
1079 item->ri_total = in_f->ilf_size;
1080 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
1081 item->ri_buf = kmem_zalloc((item->ri_total *
2b288ccf 1082 sizeof(xfs_log_iovec_t)), KM_SLEEP);
d321ceac
NS
1083 }
1084 ASSERT(item->ri_total > item->ri_cnt);
1085 /* Description region is ri_buf[0] */
1086 item->ri_buf[item->ri_cnt].i_addr = ptr;
1087 item->ri_buf[item->ri_cnt].i_len = len;
1088 item->ri_cnt++;
1089 return 0;
a562a63b 1090}
d321ceac
NS
1091
1092STATIC int
a562a63b
NS
1093xlog_recover_unmount_trans(
1094 xlog_recover_t *trans)
d321ceac
NS
1095{
1096 /* Do nothing now */
1097 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
a562a63b
NS
1098 return 0;
1099}
d321ceac 1100
a562a63b
NS
1101/*
1102 * There are two valid states of the r_state field. 0 indicates that the
1103 * transaction structure is in a normal state. We have either seen the
1104 * start of the transaction or the last operation we added was not a partial
1105 * operation. If the last operation we added to the transaction was a
1106 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
1107 *
1108 * NOTE: skip LRs with 0 data length.
1109 */
d321ceac 1110STATIC int
a562a63b
NS
1111xlog_recover_process_data(
1112 xlog_t *log,
1113 xlog_recover_t *rhash[],
1114 xlog_rec_header_t *rhead,
1115 xfs_caddr_t dp,
1116 int pass)
d321ceac 1117{
a562a63b
NS
1118 xfs_caddr_t lp;
1119 int num_logops;
1120 xlog_op_header_t *ohead;
1121 xlog_recover_t *trans;
1122 xlog_tid_t tid;
1123 int error;
1124 unsigned long hash;
1125 uint flags;
1126
1127 lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT);
1128 num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
1129
1130 /* check the log format matches our own - else we can't recover */
1131 if (xlog_header_check_recover(log->l_mp, rhead))
1132 return (XFS_ERROR(EIO));
1133
1134 while ((dp < lp) && num_logops) {
1135 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
1136 ohead = (xlog_op_header_t *)dp;
1137 dp += sizeof(xlog_op_header_t);
1138 if (ohead->oh_clientid != XFS_TRANSACTION &&
1139 ohead->oh_clientid != XFS_LOG) {
1140 xlog_warn(
1141 "XFS: xlog_recover_process_data: bad clientid");
1142 ASSERT(0);
1143 return (XFS_ERROR(EIO));
d321ceac 1144 }
a562a63b
NS
1145 tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
1146 hash = XLOG_RHASH(tid);
1147 trans = xlog_recover_find_tid(rhash[hash], tid);
1148 if (trans == NULL) { /* not found; add new tid */
1149 if (ohead->oh_flags & XLOG_START_TRANS)
1150 xlog_recover_new_tid(&rhash[hash], tid,
1151 INT_GET(rhead->h_lsn, ARCH_CONVERT));
1152 } else {
1153 ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
1154 flags = ohead->oh_flags & ~XLOG_END_TRANS;
1155 if (flags & XLOG_WAS_CONT_TRANS)
1156 flags &= ~XLOG_CONTINUE_TRANS;
1157 switch (flags) {
1158 case XLOG_COMMIT_TRANS:
1159 error = xlog_recover_commit_trans(log,
1160 &rhash[hash], trans, pass);
1161 break;
1162 case XLOG_UNMOUNT_TRANS:
1163 error = xlog_recover_unmount_trans(trans);
1164 break;
1165 case XLOG_WAS_CONT_TRANS:
1166 error = xlog_recover_add_to_cont_trans(trans,
1167 dp, INT_GET(ohead->oh_len,
1168 ARCH_CONVERT));
1169 break;
1170 case XLOG_START_TRANS:
1171 xlog_warn(
1172 "XFS: xlog_recover_process_data: bad transaction");
1173 ASSERT(0);
1174 error = XFS_ERROR(EIO);
1175 break;
1176 case 0:
1177 case XLOG_CONTINUE_TRANS:
1178 error = xlog_recover_add_to_trans(trans,
1179 dp, INT_GET(ohead->oh_len,
1180 ARCH_CONVERT));
1181 break;
1182 default:
1183 xlog_warn(
1184 "XFS: xlog_recover_process_data: bad flag");
1185 ASSERT(0);
1186 error = XFS_ERROR(EIO);
1187 break;
1188 }
1189 if (error)
1190 return error;
d321ceac 1191 }
a562a63b
NS
1192 dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
1193 num_logops--;
1194 }
1195 return 0;
1196}
d321ceac 1197
72c5917e
NS
1198STATIC int
1199xlog_valid_rec_header(
1200 xlog_t *log,
1201 xlog_rec_header_t *rhead,
1202 xfs_daddr_t blkno)
1203{
b0e364f6 1204 int hlen;
72c5917e
NS
1205
1206 if (unlikely(
1207 (INT_GET(rhead->h_magicno, ARCH_CONVERT) !=
1208 XLOG_HEADER_MAGIC_NUM))) {
1209 XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
1210 XFS_ERRLEVEL_LOW, log->l_mp);
1211 return XFS_ERROR(EFSCORRUPTED);
1212 }
1213 if (unlikely(
46eca962 1214 (!rhead->h_version ||
72c5917e
NS
1215 (INT_GET(rhead->h_version, ARCH_CONVERT) &
1216 (~XLOG_VERSION_OKBITS)) != 0))) {
1217 xlog_warn("XFS: %s: unrecognised log version (%d).",
1218 __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT));
1219 return XFS_ERROR(EIO);
1220 }
1221
1222 /* LR body must have data or it wouldn't have been written */
b0e364f6
NS
1223 hlen = INT_GET(rhead->h_len, ARCH_CONVERT);
1224 if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
72c5917e
NS
1225 XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
1226 XFS_ERRLEVEL_LOW, log->l_mp);
1227 return XFS_ERROR(EFSCORRUPTED);
1228 }
1229 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
1230 XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
1231 XFS_ERRLEVEL_LOW, log->l_mp);
1232 return XFS_ERROR(EFSCORRUPTED);
1233 }
1234 return 0;
1235}
1236
d321ceac
NS
1237/*
1238 * Read the log from tail to head and process the log records found.
1239 * Handle the two cases where the tail and head are in the same cycle
1240 * and where the active portion of the log wraps around the end of
4ed50f8a 1241 * the physical log separately. The pass parameter is passed through
d321ceac
NS
1242 * to the routines called to process the data and is not looked at
1243 * here.
1244 */
1245int
a562a63b
NS
1246xlog_do_recovery_pass(
1247 xlog_t *log,
1248 xfs_daddr_t head_blk,
1249 xfs_daddr_t tail_blk,
1250 int pass)
d321ceac 1251{
a562a63b
NS
1252 xlog_rec_header_t *rhead;
1253 xfs_daddr_t blk_no;
1254 xfs_caddr_t bufaddr, offset;
1255 xfs_buf_t *hbp, *dbp;
1256 int error = 0, h_size;
1257 int bblks, split_bblks;
1258 int hblks, split_hblks, wrapped_hblks;
1259 xlog_recover_t *rhash[XLOG_RHASH_SIZE];
1260
72c5917e
NS
1261 ASSERT(head_blk != tail_blk);
1262
73bf5988 1263 /*
a562a63b
NS
1264 * Read the header of the tail block and get the iclog buffer size from
1265 * h_size. Use this to tell how many sectors make up the log header.
73bf5988 1266 */
a562a63b
NS
1267 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
1268 /*
1269 * When using variable length iclogs, read first sector of
1270 * iclog header and extract the header size from it. Get a
1271 * new hbp that is the correct size.
1272 */
1273 hbp = xlog_get_bp(log, 1);
1274 if (!hbp)
1275 return ENOMEM;
1276 if ((error = xlog_bread(log, tail_blk, 1, hbp)))
1277 goto bread_err1;
1278 offset = xlog_align(log, tail_blk, 1, hbp);
1279 rhead = (xlog_rec_header_t *)offset;
72c5917e
NS
1280 error = xlog_valid_rec_header(log, rhead, tail_blk);
1281 if (error)
a562a63b 1282 goto bread_err1;
a562a63b 1283 h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
a562a63b
NS
1284 if ((INT_GET(rhead->h_version, ARCH_CONVERT)
1285 & XLOG_VERSION_2) &&
1286 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1287 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1288 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1289 hblks++;
1290 xlog_put_bp(hbp);
1291 hbp = xlog_get_bp(log, hblks);
1292 } else {
1293 hblks = 1;
1294 }
73bf5988 1295 } else {
a562a63b
NS
1296 ASSERT(log->l_sectbb_log == 0);
1297 hblks = 1;
1298 hbp = xlog_get_bp(log, 1);
1299 h_size = XLOG_BIG_RECORD_BSIZE;
73bf5988 1300 }
a562a63b
NS
1301
1302 if (!hbp)
1303 return ENOMEM;
1304 dbp = xlog_get_bp(log, BTOBB(h_size));
1305 if (!dbp) {
1306 xlog_put_bp(hbp);
1307 return ENOMEM;
d321ceac 1308 }
a562a63b
NS
1309
1310 memset(rhash, 0, sizeof(rhash));
1311 if (tail_blk <= head_blk) {
1312 for (blk_no = tail_blk; blk_no < head_blk; ) {
1313 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1314 goto bread_err2;
1315 offset = xlog_align(log, blk_no, hblks, hbp);
1316 rhead = (xlog_rec_header_t *)offset;
72c5917e
NS
1317 error = xlog_valid_rec_header(log, rhead, blk_no);
1318 if (error)
a562a63b 1319 goto bread_err2;
a562a63b 1320
a562a63b
NS
1321 /* blocks in data section */
1322 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
72c5917e
NS
1323 error = xlog_bread(log, blk_no + hblks, bblks, dbp);
1324 if (error)
1325 goto bread_err2;
1326 offset = xlog_align(log, blk_no + hblks, bblks, dbp);
1327 xlog_unpack_data(rhead, offset, log);
1328 if ((error = xlog_recover_process_data(log,
a562a63b 1329 rhash, rhead, offset, pass)))
72c5917e
NS
1330 goto bread_err2;
1331 blk_no += bblks + hblks;
73bf5988 1332 }
a562a63b
NS
1333 } else {
1334 /*
1335 * Perform recovery around the end of the physical log.
1336 * When the head is not on the same cycle number as the tail,
1337 * we can't do a sequential recovery as above.
1338 */
1339 blk_no = tail_blk;
1340 while (blk_no < log->l_logBBsize) {
1341 /*
1342 * Check for header wrapping around physical end-of-log
1343 */
72c5917e
NS
1344 offset = NULL;
1345 split_hblks = 0;
a562a63b 1346 wrapped_hblks = 0;
72c5917e 1347 if (blk_no + hblks <= log->l_logBBsize) {
a562a63b 1348 /* Read header in one read */
72c5917e
NS
1349 error = xlog_bread(log, blk_no, hblks, hbp);
1350 if (error)
a562a63b
NS
1351 goto bread_err2;
1352 offset = xlog_align(log, blk_no, hblks, hbp);
1353 } else {
1354 /* This LR is split across physical log end */
a562a63b
NS
1355 if (blk_no != log->l_logBBsize) {
1356 /* some data before physical log end */
1357 ASSERT(blk_no <= INT_MAX);
1358 split_hblks = log->l_logBBsize - (int)blk_no;
1359 ASSERT(split_hblks > 0);
1360 if ((error = xlog_bread(log, blk_no,
1361 split_hblks, hbp)))
1362 goto bread_err2;
1363 offset = xlog_align(log, blk_no,
1364 split_hblks, hbp);
1365 }
1366 /*
1367 * Note: this black magic still works with
1368 * large sector sizes (non-512) only because:
1369 * - we increased the buffer size originally
1370 * by 1 sector giving us enough extra space
1371 * for the second read;
1372 * - the log start is guaranteed to be sector
1373 * aligned;
1374 * - we read the log end (LR header start)
1375 * _first_, then the log start (LR header end)
1376 * - order is important.
1377 */
1378 bufaddr = XFS_BUF_PTR(hbp);
1379 XFS_BUF_SET_PTR(hbp,
1380 bufaddr + BBTOB(split_hblks),
1381 BBTOB(hblks - split_hblks));
1382 wrapped_hblks = hblks - split_hblks;
72c5917e
NS
1383 error = xlog_bread(log, 0, wrapped_hblks, hbp);
1384 if (error)
a562a63b 1385 goto bread_err2;
b0e364f6 1386 XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks));
a562a63b
NS
1387 if (!offset)
1388 offset = xlog_align(log, 0,
1389 wrapped_hblks, hbp);
1390 }
1391 rhead = (xlog_rec_header_t *)offset;
72c5917e
NS
1392 error = xlog_valid_rec_header(log, rhead,
1393 split_hblks ? blk_no : 0);
1394 if (error)
a562a63b 1395 goto bread_err2;
72c5917e
NS
1396
1397 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
1398 blk_no += hblks;
a562a63b
NS
1399
1400 /* Read in data for log record */
72c5917e
NS
1401 if (blk_no + bblks <= log->l_logBBsize) {
1402 error = xlog_bread(log, blk_no, bblks, dbp);
1403 if (error)
a562a63b
NS
1404 goto bread_err2;
1405 offset = xlog_align(log, blk_no, bblks, dbp);
1406 } else {
1407 /* This log record is split across the
1408 * physical end of log */
1409 offset = NULL;
1410 split_bblks = 0;
1411 if (blk_no != log->l_logBBsize) {
1412 /* some data is before the physical
1413 * end of log */
1414 ASSERT(!wrapped_hblks);
1415 ASSERT(blk_no <= INT_MAX);
1416 split_bblks =
1417 log->l_logBBsize - (int)blk_no;
1418 ASSERT(split_bblks > 0);
1419 if ((error = xlog_bread(log, blk_no,
1420 split_bblks, dbp)))
1421 goto bread_err2;
1422 offset = xlog_align(log, blk_no,
1423 split_bblks, dbp);
1424 }
1425 /*
1426 * Note: this black magic still works with
1427 * large sector sizes (non-512) only because:
1428 * - we increased the buffer size originally
1429 * by 1 sector giving us enough extra space
1430 * for the second read;
1431 * - the log start is guaranteed to be sector
1432 * aligned;
1433 * - we read the log end (LR header start)
1434 * _first_, then the log start (LR header end)
1435 * - order is important.
1436 */
1437 bufaddr = XFS_BUF_PTR(dbp);
1438 XFS_BUF_SET_PTR(dbp,
1439 bufaddr + BBTOB(split_bblks),
1440 BBTOB(bblks - split_bblks));
1441 if ((error = xlog_bread(log, wrapped_hblks,
1442 bblks - split_bblks, dbp)))
1443 goto bread_err2;
b0e364f6 1444 XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
a562a63b
NS
1445 if (!offset)
1446 offset = xlog_align(log, wrapped_hblks,
1447 bblks - split_bblks, dbp);
1448 }
1449 xlog_unpack_data(rhead, offset, log);
1450 if ((error = xlog_recover_process_data(log, rhash,
72c5917e 1451 rhead, offset, pass)))
a562a63b
NS
1452 goto bread_err2;
1453 blk_no += bblks;
d321ceac 1454 }
d321ceac 1455
a562a63b
NS
1456 ASSERT(blk_no >= log->l_logBBsize);
1457 blk_no -= log->l_logBBsize;
1458
1459 /* read first part of physical log */
1460 while (blk_no < head_blk) {
1461 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
1462 goto bread_err2;
1463 offset = xlog_align(log, blk_no, hblks, hbp);
1464 rhead = (xlog_rec_header_t *)offset;
72c5917e
NS
1465 error = xlog_valid_rec_header(log, rhead, blk_no);
1466 if (error)
1467 goto bread_err2;
a562a63b 1468 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
a562a63b
NS
1469 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
1470 goto bread_err2;
1471 offset = xlog_align(log, blk_no+hblks, bblks, dbp);
1472 xlog_unpack_data(rhead, offset, log);
1473 if ((error = xlog_recover_process_data(log, rhash,
72c5917e 1474 rhead, offset, pass)))
a562a63b 1475 goto bread_err2;
72c5917e 1476 blk_no += bblks + hblks;
a562a63b 1477 }
5000d01d 1478 }
d321ceac 1479
a562a63b
NS
1480 bread_err2:
1481 xlog_put_bp(dbp);
1482 bread_err1:
1483 xlog_put_bp(hbp);
1484 return error;
d321ceac 1485}