]>
Commit | Line | Data |
---|---|---|
d321ceac | 1 | /* |
b0e364f6 | 2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. |
5000d01d | 3 | * |
d321ceac NS |
4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | |
6 | * published by the Free Software Foundation. | |
5000d01d | 7 | * |
d321ceac NS |
8 | * This program is distributed in the hope that it would be useful, but |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
5000d01d | 11 | * |
d321ceac NS |
12 | * Further, this software is distributed without any warranty that it is |
13 | * free of the rightful claim of any third person regarding infringement | |
4ed50f8a | 14 | * or the like. Any license provided herein, whether implied or |
d321ceac NS |
15 | * otherwise, applies only to this software file. Patent licenses, if |
16 | * any, provided herein do not apply to combinations of this program with | |
17 | * other software, or any other product whatsoever. | |
5000d01d | 18 | * |
d321ceac NS |
19 | * You should have received a copy of the GNU General Public License along |
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | |
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | |
5000d01d | 22 | * |
d321ceac NS |
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, |
24 | * Mountain View, CA 94043, or: | |
5000d01d SL |
25 | * |
26 | * http://www.sgi.com | |
27 | * | |
28 | * For further information regarding this notice, see: | |
29 | * | |
d321ceac NS |
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ |
31 | */ | |
32 | ||
1d7e80ee | 33 | #include <xfs/libxlog.h> |
d321ceac | 34 | |
a562a63b NS |
35 | #define xlog_unpack_data_checksum(rhead, dp, log) ((void)0) |
36 | #define xlog_clear_stale_blocks(log, tail_lsn) (0) | |
37 | #define xfs_readonly_buftarg(buftarg) (0) | |
38 | ||
d321ceac NS |
39 | /* |
40 | * This routine finds (to an approximation) the first block in the physical | |
4ed50f8a | 41 | * log which contains the given cycle. It uses a binary search algorithm. |
d321ceac NS |
42 | * Note that the algorithm can not be perfect because the disk will not |
43 | * necessarily be perfect. | |
44 | */ | |
45 | int | |
a562a63b NS |
46 | xlog_find_cycle_start( |
47 | xlog_t *log, | |
48 | xfs_buf_t *bp, | |
49 | xfs_daddr_t first_blk, | |
50 | xfs_daddr_t *last_blk, | |
51 | uint cycle) | |
d321ceac | 52 | { |
a562a63b | 53 | xfs_caddr_t offset; |
ffe29fb5 NS |
54 | xfs_daddr_t mid_blk; |
55 | uint mid_cycle; | |
56 | int error; | |
d321ceac NS |
57 | |
58 | mid_blk = BLK_AVG(first_blk, *last_blk); | |
59 | while (mid_blk != first_blk && mid_blk != *last_blk) { | |
60 | if ((error = xlog_bread(log, mid_blk, 1, bp))) | |
61 | return error; | |
a562a63b NS |
62 | offset = xlog_align(log, mid_blk, 1, bp); |
63 | mid_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
d321ceac NS |
64 | if (mid_cycle == cycle) { |
65 | *last_blk = mid_blk; | |
66 | /* last_half_cycle == mid_cycle */ | |
67 | } else { | |
68 | first_blk = mid_blk; | |
69 | /* first_half_cycle == mid_cycle */ | |
70 | } | |
71 | mid_blk = BLK_AVG(first_blk, *last_blk); | |
72 | } | |
73 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || | |
74 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); | |
75 | ||
76 | return 0; | |
a562a63b | 77 | } |
d321ceac NS |
78 | |
79 | /* | |
80 | * Check that the range of blocks does not contain the cycle number | |
81 | * given. The scan needs to occur from front to back and the ptr into the | |
82 | * region must be updated since a later routine will need to perform another | |
83 | * test. If the region is completely good, we end up returning the same | |
84 | * last block number. | |
85 | * | |
ce029dc1 | 86 | * Set blkno to -1 if we encounter no errors. This is an invalid block number |
d321ceac NS |
87 | * since we don't ever expect logs to get this large. |
88 | */ | |
ce029dc1 | 89 | STATIC int |
a562a63b NS |
90 | xlog_find_verify_cycle( |
91 | xlog_t *log, | |
92 | xfs_daddr_t start_blk, | |
93 | int nbblks, | |
94 | uint stop_on_cycle_no, | |
95 | xfs_daddr_t *new_blk) | |
d321ceac | 96 | { |
a562a63b NS |
97 | xfs_daddr_t i, j; |
98 | uint cycle; | |
99 | xfs_buf_t *bp; | |
100 | xfs_daddr_t bufblks; | |
101 | xfs_caddr_t buf = NULL; | |
102 | int error = 0; | |
85a875e9 ES |
103 | |
104 | bufblks = 1 << ffs(nbblks); | |
d321ceac | 105 | |
a562a63b | 106 | while (!(bp = xlog_get_bp(log, bufblks))) { |
5000d01d | 107 | /* can't get enough memory to do everything in one big buffer */ |
d321ceac | 108 | bufblks >>= 1; |
a562a63b | 109 | if (bufblks <= log->l_sectbb_log) |
5000d01d SL |
110 | return ENOMEM; |
111 | } | |
112 | ||
ffe29fb5 NS |
113 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { |
114 | int bcount; | |
d321ceac | 115 | |
ffe29fb5 | 116 | bcount = min(bufblks, (start_blk + nbblks - i)); |
d321ceac | 117 | |
5000d01d SL |
118 | if ((error = xlog_bread(log, i, bcount, bp))) |
119 | goto out; | |
d321ceac | 120 | |
a562a63b | 121 | buf = xlog_align(log, i, bcount, bp); |
d321ceac NS |
122 | for (j = 0; j < bcount; j++) { |
123 | cycle = GET_CYCLE(buf, ARCH_CONVERT); | |
124 | if (cycle == stop_on_cycle_no) { | |
e56fcdce | 125 | *new_blk = i+j; |
d321ceac NS |
126 | goto out; |
127 | } | |
5000d01d SL |
128 | |
129 | buf += BBSIZE; | |
d321ceac NS |
130 | } |
131 | } | |
132 | ||
ce029dc1 | 133 | *new_blk = -1; |
d321ceac NS |
134 | |
135 | out: | |
136 | xlog_put_bp(bp); | |
d321ceac | 137 | return error; |
a562a63b | 138 | } |
d321ceac NS |
139 | |
140 | /* | |
141 | * Potentially backup over partial log record write. | |
142 | * | |
143 | * In the typical case, last_blk is the number of the block directly after | |
144 | * a good log record. Therefore, we subtract one to get the block number | |
145 | * of the last block in the given buffer. extra_bblks contains the number | |
146 | * of blocks we would have read on a previous read. This happens when the | |
147 | * last log record is split over the end of the physical log. | |
148 | * | |
149 | * extra_bblks is the number of blocks potentially verified on a previous | |
150 | * call to this routine. | |
151 | */ | |
d321ceac | 152 | STATIC int |
a562a63b NS |
153 | xlog_find_verify_log_record( |
154 | xlog_t *log, | |
155 | xfs_daddr_t start_blk, | |
156 | xfs_daddr_t *last_blk, | |
157 | int extra_bblks) | |
d321ceac | 158 | { |
a562a63b NS |
159 | xfs_daddr_t i; |
160 | xfs_buf_t *bp; | |
161 | xfs_caddr_t offset = NULL; | |
162 | xlog_rec_header_t *head = NULL; | |
163 | int error = 0; | |
164 | int smallmem = 0; | |
165 | int num_blks = *last_blk - start_blk; | |
166 | int xhdrs; | |
167 | ||
168 | ASSERT(start_blk != 0 || *last_blk != start_blk); | |
169 | ||
170 | if (!(bp = xlog_get_bp(log, num_blks))) { | |
171 | if (!(bp = xlog_get_bp(log, 1))) | |
172 | return ENOMEM; | |
173 | smallmem = 1; | |
174 | } else { | |
175 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) | |
176 | goto out; | |
177 | offset = xlog_align(log, start_blk, num_blks, bp); | |
178 | offset += ((num_blks - 1) << BBSHIFT); | |
d321ceac NS |
179 | } |
180 | ||
a562a63b NS |
181 | for (i = (*last_blk) - 1; i >= 0; i--) { |
182 | if (i < start_blk) { | |
05bba5b7 | 183 | /* valid log record not found */ |
a562a63b NS |
184 | xlog_warn( |
185 | "XFS: Log inconsistent (didn't find previous header)"); | |
186 | ASSERT(0); | |
187 | error = XFS_ERROR(EIO); | |
188 | goto out; | |
189 | } | |
d321ceac | 190 | |
a562a63b NS |
191 | if (smallmem) { |
192 | if ((error = xlog_bread(log, i, 1, bp))) | |
193 | goto out; | |
194 | offset = xlog_align(log, i, 1, bp); | |
195 | } | |
196 | ||
197 | head = (xlog_rec_header_t *)offset; | |
d321ceac | 198 | |
a562a63b NS |
199 | if (XLOG_HEADER_MAGIC_NUM == |
200 | INT_GET(head->h_magicno, ARCH_CONVERT)) | |
201 | break; | |
202 | ||
203 | if (!smallmem) | |
204 | offset -= BBSIZE; | |
205 | } | |
206 | ||
207 | /* | |
208 | * We hit the beginning of the physical log & still no header. Return | |
209 | * to caller. If caller can handle a return of -1, then this routine | |
210 | * will be called again for the end of the physical log. | |
211 | */ | |
212 | if (i == -1) { | |
213 | error = -1; | |
214 | goto out; | |
215 | } | |
216 | ||
217 | /* | |
218 | * We have the final block of the good log (the first block | |
219 | * of the log record _before_ the head. So we check the uuid. | |
220 | */ | |
221 | if ((error = xlog_header_check_mount(log->l_mp, head))) | |
222 | goto out; | |
223 | ||
224 | /* | |
225 | * We may have found a log record header before we expected one. | |
226 | * last_blk will be the 1st block # with a given cycle #. We may end | |
227 | * up reading an entire log record. In this case, we don't want to | |
228 | * reset last_blk. Only when last_blk points in the middle of a log | |
229 | * record do we update last_blk. | |
230 | */ | |
231 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { | |
232 | uint h_size = INT_GET(head->h_size, ARCH_CONVERT); | |
233 | ||
234 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; | |
235 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
236 | xhdrs++; | |
237 | } else { | |
238 | xhdrs = 1; | |
239 | } | |
240 | ||
241 | if (*last_blk - i + extra_bblks | |
242 | != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs) | |
243 | *last_blk = i; | |
244 | ||
245 | out: | |
246 | xlog_put_bp(bp); | |
247 | return error; | |
248 | } | |
d321ceac NS |
249 | |
250 | /* | |
251 | * Head is defined to be the point of the log where the next log write | |
252 | * write could go. This means that incomplete LR writes at the end are | |
253 | * eliminated when calculating the head. We aren't guaranteed that previous | |
5000d01d | 254 | * LR have complete transactions. We only know that a cycle number of |
d321ceac NS |
255 | * current cycle number -1 won't be present in the log if we start writing |
256 | * from our current block number. | |
257 | * | |
258 | * last_blk contains the block number of the first block with a given | |
259 | * cycle number. | |
260 | * | |
d321ceac NS |
261 | * Return: zero if normal, non-zero if error. |
262 | */ | |
263 | int | |
a562a63b NS |
264 | xlog_find_head( |
265 | xlog_t *log, | |
266 | xfs_daddr_t *return_head_blk) | |
d321ceac | 267 | { |
a562a63b NS |
268 | xfs_buf_t *bp; |
269 | xfs_caddr_t offset; | |
270 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; | |
271 | int num_scan_bblks; | |
272 | uint first_half_cycle, last_half_cycle; | |
273 | uint stop_on_cycle; | |
274 | int error, log_bbnum = log->l_logBBsize; | |
275 | ||
276 | /* Is the end of the log device zeroed? */ | |
277 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { | |
278 | *return_head_blk = first_blk; | |
279 | ||
280 | /* Is the whole lot zeroed? */ | |
281 | if (!first_blk) { | |
282 | /* Linux XFS shouldn't generate totally zeroed logs - | |
283 | * mkfs etc write a dummy unmount record to a fresh | |
284 | * log so we can store the uuid in there | |
285 | */ | |
286 | xlog_warn("XFS: totally zeroed log"); | |
287 | } | |
288 | ||
289 | return 0; | |
290 | } else if (error) { | |
291 | xlog_warn("XFS: empty log check failed"); | |
292 | return error; | |
5000d01d SL |
293 | } |
294 | ||
a562a63b NS |
295 | first_blk = 0; /* get cycle # of 1st block */ |
296 | bp = xlog_get_bp(log, 1); | |
297 | if (!bp) | |
298 | return ENOMEM; | |
299 | if ((error = xlog_bread(log, 0, 1, bp))) | |
300 | goto bp_err; | |
301 | offset = xlog_align(log, 0, 1, bp); | |
302 | first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
303 | ||
304 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ | |
305 | if ((error = xlog_bread(log, last_blk, 1, bp))) | |
306 | goto bp_err; | |
307 | offset = xlog_align(log, last_blk, 1, bp); | |
308 | last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
309 | ASSERT(last_half_cycle != 0); | |
310 | ||
d321ceac | 311 | /* |
a562a63b NS |
312 | * If the 1st half cycle number is equal to the last half cycle number, |
313 | * then the entire log is stamped with the same cycle number. In this | |
314 | * case, head_blk can't be set to zero (which makes sense). The below | |
315 | * math doesn't work out properly with head_blk equal to zero. Instead, | |
05bba5b7 | 316 | * we set it to log_bbnum which is an invalid block number, but this |
a562a63b NS |
317 | * value makes the math correct. If head_blk doesn't changed through |
318 | * all the tests below, *head_blk is set to zero at the very end rather | |
319 | * than log_bbnum. In a sense, log_bbnum and zero are the same block | |
320 | * in a circular file. | |
d321ceac | 321 | */ |
a562a63b NS |
322 | if (first_half_cycle == last_half_cycle) { |
323 | /* | |
324 | * In this case we believe that the entire log should have | |
325 | * cycle number last_half_cycle. We need to scan backwards | |
326 | * from the end verifying that there are no holes still | |
327 | * containing last_half_cycle - 1. If we find such a hole, | |
328 | * then the start of that hole will be the new head. The | |
329 | * simple case looks like | |
330 | * x | x ... | x - 1 | x | |
331 | * Another case that fits this picture would be | |
332 | * x | x + 1 | x ... | x | |
333 | * In this case the head really is somwhere at the end of the | |
334 | * log, as one of the latest writes at the beginning was | |
335 | * incomplete. | |
336 | * One more case is | |
337 | * x | x + 1 | x ... | x - 1 | x | |
338 | * This is really the combination of the above two cases, and | |
339 | * the head has to end up at the start of the x-1 hole at the | |
340 | * end of the log. | |
341 | * | |
342 | * In the 256k log case, we will read from the beginning to the | |
343 | * end of the log and search for cycle numbers equal to x-1. | |
344 | * We don't worry about the x+1 blocks that we encounter, | |
345 | * because we know that they cannot be the head since the log | |
346 | * started with x. | |
347 | */ | |
348 | head_blk = log_bbnum; | |
349 | stop_on_cycle = last_half_cycle - 1; | |
350 | } else { | |
351 | /* | |
352 | * In this case we want to find the first block with cycle | |
353 | * number matching last_half_cycle. We expect the log to be | |
354 | * some variation on | |
355 | * x + 1 ... | x ... | |
356 | * The first block with cycle number x (last_half_cycle) will | |
357 | * be where the new head belongs. First we do a binary search | |
358 | * for the first occurrence of last_half_cycle. The binary | |
359 | * search may not be totally accurate, so then we scan back | |
360 | * from there looking for occurrences of last_half_cycle before | |
361 | * us. If that backwards scan wraps around the beginning of | |
362 | * the log, then we look for occurrences of last_half_cycle - 1 | |
363 | * at the end of the log. The cases we're looking for look | |
364 | * like | |
365 | * x + 1 ... | x | x + 1 | x ... | |
366 | * ^ binary search stopped here | |
367 | * or | |
368 | * x + 1 ... | x ... | x - 1 | x | |
369 | * <---------> less than scan distance | |
370 | */ | |
371 | stop_on_cycle = last_half_cycle; | |
372 | if ((error = xlog_find_cycle_start(log, bp, first_blk, | |
373 | &head_blk, last_half_cycle))) | |
374 | goto bp_err; | |
375 | } | |
376 | ||
d321ceac | 377 | /* |
a562a63b NS |
378 | * Now validate the answer. Scan back some number of maximum possible |
379 | * blocks and make sure each one has the expected cycle number. The | |
380 | * maximum is determined by the total possible amount of buffering | |
381 | * in the in-core log. The following number can be made tighter if | |
382 | * we actually look at the block size of the filesystem. | |
d321ceac | 383 | */ |
a562a63b NS |
384 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
385 | if (head_blk >= num_scan_bblks) { | |
386 | /* | |
387 | * We are guaranteed that the entire check can be performed | |
388 | * in one buffer. | |
389 | */ | |
390 | start_blk = head_blk - num_scan_bblks; | |
391 | if ((error = xlog_find_verify_cycle(log, | |
392 | start_blk, num_scan_bblks, | |
393 | stop_on_cycle, &new_blk))) | |
394 | goto bp_err; | |
395 | if (new_blk != -1) | |
396 | head_blk = new_blk; | |
397 | } else { /* need to read 2 parts of log */ | |
398 | /* | |
399 | * We are going to scan backwards in the log in two parts. | |
400 | * First we scan the physical end of the log. In this part | |
401 | * of the log, we are looking for blocks with cycle number | |
402 | * last_half_cycle - 1. | |
403 | * If we find one, then we know that the log starts there, as | |
404 | * we've found a hole that didn't get written in going around | |
405 | * the end of the physical log. The simple case for this is | |
406 | * x + 1 ... | x ... | x - 1 | x | |
407 | * <---------> less than scan distance | |
408 | * If all of the blocks at the end of the log have cycle number | |
409 | * last_half_cycle, then we check the blocks at the start of | |
410 | * the log looking for occurrences of last_half_cycle. If we | |
411 | * find one, then our current estimate for the location of the | |
412 | * first occurrence of last_half_cycle is wrong and we move | |
413 | * back to the hole we've found. This case looks like | |
414 | * x + 1 ... | x | x + 1 | x ... | |
415 | * ^ binary search stopped here | |
416 | * Another case we need to handle that only occurs in 256k | |
417 | * logs is | |
418 | * x + 1 ... | x ... | x+1 | x ... | |
419 | * ^ binary search stops here | |
420 | * In a 256k log, the scan at the end of the log will see the | |
421 | * x + 1 blocks. We need to skip past those since that is | |
422 | * certainly not the head of the log. By searching for | |
423 | * last_half_cycle-1 we accomplish that. | |
424 | */ | |
425 | start_blk = log_bbnum - num_scan_bblks + head_blk; | |
426 | ASSERT(head_blk <= INT_MAX && | |
427 | (xfs_daddr_t) num_scan_bblks - head_blk >= 0); | |
428 | if ((error = xlog_find_verify_cycle(log, start_blk, | |
429 | num_scan_bblks - (int)head_blk, | |
430 | (stop_on_cycle - 1), &new_blk))) | |
431 | goto bp_err; | |
432 | if (new_blk != -1) { | |
433 | head_blk = new_blk; | |
434 | goto bad_blk; | |
435 | } | |
436 | ||
437 | /* | |
438 | * Scan beginning of log now. The last part of the physical | |
439 | * log is good. This scan needs to verify that it doesn't find | |
440 | * the last_half_cycle. | |
441 | */ | |
442 | start_blk = 0; | |
443 | ASSERT(head_blk <= INT_MAX); | |
444 | if ((error = xlog_find_verify_cycle(log, | |
445 | start_blk, (int)head_blk, | |
446 | stop_on_cycle, &new_blk))) | |
447 | goto bp_err; | |
448 | if (new_blk != -1) | |
449 | head_blk = new_blk; | |
450 | } | |
451 | ||
452 | bad_blk: | |
5000d01d | 453 | /* |
a562a63b NS |
454 | * Now we need to make sure head_blk is not pointing to a block in |
455 | * the middle of a log record. | |
d321ceac | 456 | */ |
a562a63b NS |
457 | num_scan_bblks = XLOG_REC_SHIFT(log); |
458 | if (head_blk >= num_scan_bblks) { | |
459 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ | |
460 | ||
461 | /* start ptr at last block ptr before head_blk */ | |
462 | if ((error = xlog_find_verify_log_record(log, start_blk, | |
463 | &head_blk, 0)) == -1) { | |
464 | error = XFS_ERROR(EIO); | |
465 | goto bp_err; | |
466 | } else if (error) | |
467 | goto bp_err; | |
468 | } else { | |
469 | start_blk = 0; | |
470 | ASSERT(head_blk <= INT_MAX); | |
471 | if ((error = xlog_find_verify_log_record(log, start_blk, | |
472 | &head_blk, 0)) == -1) { | |
473 | /* We hit the beginning of the log during our search */ | |
474 | start_blk = log_bbnum - num_scan_bblks + head_blk; | |
475 | new_blk = log_bbnum; | |
476 | ASSERT(start_blk <= INT_MAX && | |
477 | (xfs_daddr_t) log_bbnum-start_blk >= 0); | |
478 | ASSERT(head_blk <= INT_MAX); | |
479 | if ((error = xlog_find_verify_log_record(log, | |
480 | start_blk, &new_blk, | |
481 | (int)head_blk)) == -1) { | |
482 | error = XFS_ERROR(EIO); | |
483 | goto bp_err; | |
484 | } else if (error) | |
485 | goto bp_err; | |
486 | if (new_blk != log_bbnum) | |
487 | head_blk = new_blk; | |
488 | } else if (error) | |
489 | goto bp_err; | |
d321ceac NS |
490 | } |
491 | ||
a562a63b NS |
492 | xlog_put_bp(bp); |
493 | if (head_blk == log_bbnum) | |
494 | *return_head_blk = 0; | |
495 | else | |
496 | *return_head_blk = head_blk; | |
d321ceac | 497 | /* |
a562a63b NS |
498 | * When returning here, we have a good block number. Bad block |
499 | * means that during a previous crash, we didn't have a clean break | |
500 | * from cycle number N to cycle number N-1. In this case, we need | |
501 | * to find the first block with cycle number N-1. | |
d321ceac | 502 | */ |
a562a63b | 503 | return 0; |
d321ceac | 504 | |
a562a63b | 505 | bp_err: |
d321ceac NS |
506 | xlog_put_bp(bp); |
507 | ||
5000d01d SL |
508 | if (error) |
509 | xlog_warn("XFS: failed to find log head"); | |
d321ceac | 510 | return error; |
a562a63b | 511 | } |
d321ceac NS |
512 | |
513 | /* | |
514 | * Find the sync block number or the tail of the log. | |
515 | * | |
516 | * This will be the block number of the last record to have its | |
517 | * associated buffers synced to disk. Every log record header has | |
518 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy | |
4ed50f8a | 519 | * to get a sync block number. The only concern is to figure out which |
d321ceac NS |
520 | * log record header to believe. |
521 | * | |
522 | * The following algorithm uses the log record header with the largest | |
4ed50f8a | 523 | * lsn. The entire log record does not need to be valid. We only care |
d321ceac NS |
524 | * that the header is valid. |
525 | * | |
526 | * We could speed up search by using current head_blk buffer, but it is not | |
527 | * available. | |
528 | */ | |
529 | int | |
a562a63b NS |
530 | xlog_find_tail( |
531 | xlog_t *log, | |
532 | xfs_daddr_t *head_blk, | |
533 | xfs_daddr_t *tail_blk, | |
534 | int readonly) | |
d321ceac NS |
535 | { |
536 | xlog_rec_header_t *rhead; | |
537 | xlog_op_header_t *op_head; | |
a562a63b | 538 | xfs_caddr_t offset = NULL; |
d321ceac NS |
539 | xfs_buf_t *bp; |
540 | int error, i, found; | |
541 | xfs_daddr_t umount_data_blk; | |
542 | xfs_daddr_t after_umount_blk; | |
543 | xfs_lsn_t tail_lsn; | |
73bf5988 | 544 | int hblks; |
5000d01d | 545 | |
1b6a0044 | 546 | found = 0; |
d321ceac NS |
547 | |
548 | /* | |
5000d01d | 549 | * Find previous log record |
d321ceac NS |
550 | */ |
551 | if ((error = xlog_find_head(log, head_blk))) | |
552 | return error; | |
553 | ||
a562a63b | 554 | bp = xlog_get_bp(log, 1); |
d321ceac | 555 | if (!bp) |
ce029dc1 | 556 | return ENOMEM; |
d321ceac NS |
557 | if (*head_blk == 0) { /* special case */ |
558 | if ((error = xlog_bread(log, 0, 1, bp))) | |
559 | goto bread_err; | |
a562a63b NS |
560 | offset = xlog_align(log, 0, 1, bp); |
561 | if (GET_CYCLE(offset, ARCH_CONVERT) == 0) { | |
d321ceac NS |
562 | *tail_blk = 0; |
563 | /* leave all other log inited values alone */ | |
564 | goto exit; | |
565 | } | |
566 | } | |
567 | ||
568 | /* | |
569 | * Search backwards looking for log record header block | |
570 | */ | |
571 | ASSERT(*head_blk < INT_MAX); | |
1b6a0044 | 572 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { |
d321ceac NS |
573 | if ((error = xlog_bread(log, i, 1, bp))) |
574 | goto bread_err; | |
a562a63b | 575 | offset = xlog_align(log, i, 1, bp); |
1b6a0044 | 576 | if (XLOG_HEADER_MAGIC_NUM == |
a562a63b | 577 | INT_GET(*(uint *)offset, ARCH_CONVERT)) { |
d321ceac NS |
578 | found = 1; |
579 | break; | |
580 | } | |
581 | } | |
582 | /* | |
583 | * If we haven't found the log record header block, start looking | |
584 | * again from the end of the physical log. XXXmiken: There should be | |
585 | * a check here to make sure we didn't search more than N blocks in | |
586 | * the previous code. | |
587 | */ | |
588 | if (!found) { | |
1b6a0044 | 589 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { |
d321ceac NS |
590 | if ((error = xlog_bread(log, i, 1, bp))) |
591 | goto bread_err; | |
a562a63b | 592 | offset = xlog_align(log, i, 1, bp); |
1b6a0044 | 593 | if (XLOG_HEADER_MAGIC_NUM == |
a562a63b | 594 | INT_GET(*(uint*)offset, ARCH_CONVERT)) { |
d321ceac NS |
595 | found = 2; |
596 | break; | |
597 | } | |
598 | } | |
599 | } | |
600 | if (!found) { | |
601 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); | |
602 | ASSERT(0); | |
603 | return XFS_ERROR(EIO); | |
604 | } | |
605 | ||
606 | /* find blk_no of tail of log */ | |
a562a63b | 607 | rhead = (xlog_rec_header_t *)offset; |
46eca962 | 608 | *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT)); |
d321ceac NS |
609 | |
610 | /* | |
611 | * Reset log values according to the state of the log when we | |
612 | * crashed. In the case where head_blk == 0, we bump curr_cycle | |
613 | * one because the next write starts a new cycle rather than | |
614 | * continuing the cycle of the last good log record. At this | |
615 | * point we have guaranteed that all partial log records have been | |
616 | * accounted for. Therefore, we know that the last good log record | |
617 | * written was complete and ended exactly on the end boundary | |
618 | * of the physical log. | |
619 | */ | |
620 | log->l_prev_block = i; | |
621 | log->l_curr_block = (int)*head_blk; | |
622 | log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT); | |
623 | if (found == 2) | |
624 | log->l_curr_cycle++; | |
625 | log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT); | |
626 | log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT); | |
627 | log->l_grant_reserve_cycle = log->l_curr_cycle; | |
628 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | |
629 | log->l_grant_write_cycle = log->l_curr_cycle; | |
630 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | |
631 | ||
632 | /* | |
633 | * Look for unmount record. If we find it, then we know there | |
4ed50f8a | 634 | * was a clean unmount. Since 'i' could be the last block in |
d321ceac NS |
635 | * the physical log, we convert to a log block before comparing |
636 | * to the head_blk. | |
637 | * | |
638 | * Save the current tail lsn to use to pass to | |
639 | * xlog_clear_stale_blocks() below. We won't want to clear the | |
640 | * unmount record if there is one, so we pass the lsn of the | |
641 | * unmount record rather than the block after it. | |
642 | */ | |
73bf5988 SL |
643 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
644 | int h_size = INT_GET(rhead->h_size, ARCH_CONVERT); | |
645 | int h_version = INT_GET(rhead->h_version, ARCH_CONVERT); | |
1b6a0044 NS |
646 | |
647 | if ((h_version & XLOG_VERSION_2) && | |
73bf5988 SL |
648 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
649 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | |
650 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
651 | hblks++; | |
652 | } else { | |
653 | hblks = 1; | |
654 | } | |
655 | } else { | |
656 | hblks = 1; | |
657 | } | |
1b6a0044 NS |
658 | after_umount_blk = (i + hblks + (int) |
659 | BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize; | |
d321ceac | 660 | tail_lsn = log->l_tail_lsn; |
1b6a0044 NS |
661 | if (*head_blk == after_umount_blk && |
662 | INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) { | |
73bf5988 | 663 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
d321ceac NS |
664 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { |
665 | goto bread_err; | |
666 | } | |
a562a63b NS |
667 | offset = xlog_align(log, umount_data_blk, 1, bp); |
668 | op_head = (xlog_op_header_t *)offset; | |
d321ceac NS |
669 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { |
670 | /* | |
671 | * Set tail and last sync so that newly written | |
672 | * log records will point recovery to after the | |
673 | * current unmount record. | |
674 | */ | |
46eca962 NS |
675 | ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle, |
676 | after_umount_blk); | |
677 | ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, | |
678 | after_umount_blk); | |
d321ceac | 679 | *tail_blk = after_umount_blk; |
46eca962 NS |
680 | |
681 | /* | |
682 | * Note that the unmount was clean. If the unmount | |
683 | * was not clean, we need to know this to rebuild the | |
684 | * superblock counters from the perag headers if we | |
685 | * have a filesystem using non-persistent counters. | |
686 | */ | |
687 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; | |
d321ceac NS |
688 | } |
689 | } | |
690 | ||
d321ceac NS |
691 | /* |
692 | * Make sure that there are no blocks in front of the head | |
693 | * with the same cycle number as the head. This can happen | |
694 | * because we allow multiple outstanding log writes concurrently, | |
695 | * and the later writes might make it out before earlier ones. | |
696 | * | |
697 | * We use the lsn from before modifying it so that we'll never | |
698 | * overwrite the unmount record after a clean unmount. | |
699 | * | |
700 | * Do this only if we are going to recover the filesystem | |
32181a02 NS |
701 | * |
702 | * NOTE: This used to say "if (!readonly)" | |
703 | * However on Linux, we can & do recover a read-only filesystem. | |
704 | * We only skip recovery if NORECOVERY is specified on mount, | |
705 | * in which case we would not be here. | |
706 | * | |
707 | * But... if the -device- itself is readonly, just skip this. | |
708 | * We can't recover this device anyway, so it won't matter. | |
d321ceac | 709 | */ |
a562a63b | 710 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { |
d321ceac | 711 | error = xlog_clear_stale_blocks(log, tail_lsn); |
32181a02 | 712 | } |
d321ceac NS |
713 | |
714 | bread_err: | |
715 | exit: | |
716 | xlog_put_bp(bp); | |
717 | ||
5000d01d SL |
718 | if (error) |
719 | xlog_warn("XFS: failed to locate log tail"); | |
d321ceac | 720 | return error; |
a562a63b | 721 | } |
4ed50f8a | 722 | |
d321ceac NS |
723 | /* |
724 | * Is the log zeroed at all? | |
725 | * | |
726 | * The last binary search should be changed to perform an X block read | |
4ed50f8a | 727 | * once X becomes small enough. You can then search linearly through |
d321ceac NS |
728 | * the X blocks. This will cut down on the number of reads we need to do. |
729 | * | |
730 | * If the log is partially zeroed, this routine will pass back the blkno | |
731 | * of the first block with cycle number 0. It won't have a complete LR | |
732 | * preceding it. | |
733 | * | |
734 | * Return: | |
735 | * 0 => the log is completely written to | |
736 | * -1 => use *blk_no as the first block of the log | |
737 | * >0 => error has occurred | |
738 | */ | |
739 | int | |
a562a63b NS |
740 | xlog_find_zeroed( |
741 | xlog_t *log, | |
742 | xfs_daddr_t *blk_no) | |
d321ceac NS |
743 | { |
744 | xfs_buf_t *bp; | |
a562a63b | 745 | xfs_caddr_t offset; |
4ed50f8a | 746 | uint first_cycle, last_cycle; |
d321ceac | 747 | xfs_daddr_t new_blk, last_blk, start_blk; |
4ed50f8a RC |
748 | xfs_daddr_t num_scan_bblks; |
749 | int error, log_bbnum = log->l_logBBsize; | |
d321ceac | 750 | |
d321ceac | 751 | /* check totally zeroed log */ |
a562a63b | 752 | bp = xlog_get_bp(log, 1); |
d321ceac | 753 | if (!bp) |
ce029dc1 | 754 | return ENOMEM; |
d321ceac NS |
755 | if ((error = xlog_bread(log, 0, 1, bp))) |
756 | goto bp_err; | |
a562a63b NS |
757 | offset = xlog_align(log, 0, 1, bp); |
758 | first_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
d321ceac NS |
759 | if (first_cycle == 0) { /* completely zeroed log */ |
760 | *blk_no = 0; | |
761 | xlog_put_bp(bp); | |
762 | return -1; | |
763 | } | |
764 | ||
765 | /* check partially zeroed log */ | |
766 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) | |
767 | goto bp_err; | |
a562a63b NS |
768 | offset = xlog_align(log, log_bbnum-1, 1, bp); |
769 | last_cycle = GET_CYCLE(offset, ARCH_CONVERT); | |
d321ceac NS |
770 | if (last_cycle != 0) { /* log completely written to */ |
771 | xlog_put_bp(bp); | |
772 | return 0; | |
773 | } else if (first_cycle != 1) { | |
774 | /* | |
775 | * If the cycle of the last block is zero, the cycle of | |
5000d01d SL |
776 | * the first block must be 1. If it's not, maybe we're |
777 | * not looking at a log... Bail out. | |
d321ceac | 778 | */ |
5000d01d | 779 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); |
d321ceac NS |
780 | return XFS_ERROR(EINVAL); |
781 | } | |
5000d01d | 782 | |
d321ceac NS |
783 | /* we have a partially zeroed log */ |
784 | last_blk = log_bbnum-1; | |
785 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) | |
786 | goto bp_err; | |
787 | ||
788 | /* | |
4ed50f8a | 789 | * Validate the answer. Because there is no way to guarantee that |
d321ceac NS |
790 | * the entire log is made up of log records which are the same size, |
791 | * we scan over the defined maximum blocks. At this point, the maximum | |
792 | * is not chosen to mean anything special. XXXmiken | |
793 | */ | |
73bf5988 | 794 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
d321ceac | 795 | ASSERT(num_scan_bblks <= INT_MAX); |
5000d01d | 796 | |
d321ceac NS |
797 | if (last_blk < num_scan_bblks) |
798 | num_scan_bblks = last_blk; | |
799 | start_blk = last_blk - num_scan_bblks; | |
5000d01d | 800 | |
d321ceac NS |
801 | /* |
802 | * We search for any instances of cycle number 0 that occur before | |
803 | * our current estimate of the head. What we're trying to detect is | |
4ed50f8a RC |
804 | * 1 ... | 0 | 1 | 0... |
805 | * ^ binary search ends here | |
d321ceac | 806 | */ |
ce029dc1 ES |
807 | if ((error = xlog_find_verify_cycle(log, start_blk, |
808 | (int)num_scan_bblks, 0, &new_blk))) | |
606d804d | 809 | goto bp_err; |
ce029dc1 ES |
810 | if (new_blk != -1) |
811 | last_blk = new_blk; | |
d321ceac NS |
812 | |
813 | /* | |
814 | * Potentially backup over partial log record write. We don't need | |
815 | * to search the end of the log because we know it is zero. | |
816 | */ | |
5000d01d | 817 | if ((error = xlog_find_verify_log_record(log, start_blk, |
79c48ada ES |
818 | &last_blk, 0)) == -1) { |
819 | error = XFS_ERROR(EIO); | |
820 | goto bp_err; | |
821 | } else if (error) | |
d321ceac NS |
822 | goto bp_err; |
823 | ||
824 | *blk_no = last_blk; | |
825 | bp_err: | |
826 | xlog_put_bp(bp); | |
827 | if (error) | |
828 | return error; | |
829 | return -1; | |
a562a63b | 830 | } |
d321ceac | 831 | |
d321ceac | 832 | STATIC void |
a562a63b NS |
833 | xlog_unpack_data( |
834 | xlog_rec_header_t *rhead, | |
835 | xfs_caddr_t dp, | |
836 | xlog_t *log) | |
d321ceac | 837 | { |
a562a63b NS |
838 | int i, j, k; |
839 | xlog_in_core_2_t *xhdr; | |
840 | ||
841 | for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && | |
73bf5988 | 842 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { |
5ce1d1f7 | 843 | *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; |
d321ceac NS |
844 | dp += BBSIZE; |
845 | } | |
73bf5988 SL |
846 | |
847 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { | |
a562a63b | 848 | xhdr = (xlog_in_core_2_t *)rhead; |
73bf5988 SL |
849 | for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { |
850 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | |
851 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | |
852 | *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; | |
853 | dp += BBSIZE; | |
854 | } | |
855 | } | |
856 | ||
a562a63b NS |
857 | xlog_unpack_data_checksum(rhead, dp, log); |
858 | } | |
d321ceac | 859 | |
d321ceac | 860 | STATIC xlog_recover_t * |
a562a63b NS |
861 | xlog_recover_find_tid( |
862 | xlog_recover_t *q, | |
863 | xlog_tid_t tid) | |
d321ceac | 864 | { |
a562a63b | 865 | xlog_recover_t *p = q; |
d321ceac NS |
866 | |
867 | while (p != NULL) { | |
868 | if (p->r_log_tid == tid) | |
869 | break; | |
870 | p = p->r_next; | |
871 | } | |
872 | return p; | |
a562a63b | 873 | } |
4ed50f8a | 874 | |
d321ceac | 875 | STATIC void |
a562a63b NS |
876 | xlog_recover_put_hashq( |
877 | xlog_recover_t **q, | |
878 | xlog_recover_t *trans) | |
d321ceac NS |
879 | { |
880 | trans->r_next = *q; | |
881 | *q = trans; | |
a562a63b | 882 | } |
4ed50f8a | 883 | |
d321ceac | 884 | STATIC void |
a562a63b NS |
885 | xlog_recover_new_tid( |
886 | xlog_recover_t **q, | |
887 | xlog_tid_t tid, | |
888 | xfs_lsn_t lsn) | |
d321ceac | 889 | { |
a562a63b | 890 | xlog_recover_t *trans; |
d321ceac | 891 | |
a562a63b | 892 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); |
d321ceac NS |
893 | trans->r_log_tid = tid; |
894 | trans->r_lsn = lsn; | |
895 | xlog_recover_put_hashq(q, trans); | |
a562a63b | 896 | } |
d321ceac NS |
897 | |
898 | STATIC int | |
a562a63b NS |
899 | xlog_recover_unlink_tid( |
900 | xlog_recover_t **q, | |
901 | xlog_recover_t *trans) | |
d321ceac | 902 | { |
a562a63b NS |
903 | xlog_recover_t *tp; |
904 | int found = 0; | |
d321ceac NS |
905 | |
906 | ASSERT(trans != 0); | |
907 | if (trans == *q) { | |
908 | *q = (*q)->r_next; | |
909 | } else { | |
910 | tp = *q; | |
911 | while (tp != 0) { | |
912 | if (tp->r_next == trans) { | |
913 | found = 1; | |
914 | break; | |
915 | } | |
916 | tp = tp->r_next; | |
917 | } | |
918 | if (!found) { | |
919 | xlog_warn( | |
920 | "XFS: xlog_recover_unlink_tid: trans not found"); | |
921 | ASSERT(0); | |
922 | return XFS_ERROR(EIO); | |
923 | } | |
924 | tp->r_next = tp->r_next->r_next; | |
925 | } | |
926 | return 0; | |
a562a63b | 927 | } |
d321ceac NS |
928 | |
929 | /* | |
930 | * Free up any resources allocated by the transaction | |
931 | * | |
932 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | |
933 | */ | |
934 | STATIC void | |
a562a63b NS |
935 | xlog_recover_free_trans( |
936 | xlog_recover_t *trans) | |
d321ceac | 937 | { |
a562a63b NS |
938 | xlog_recover_item_t *first_item, *item, *free_item; |
939 | int i; | |
d321ceac NS |
940 | |
941 | item = first_item = trans->r_itemq; | |
942 | do { | |
943 | free_item = item; | |
944 | item = item->ri_next; | |
945 | /* Free the regions in the item. */ | |
946 | for (i = 0; i < free_item->ri_cnt; i++) { | |
947 | kmem_free(free_item->ri_buf[i].i_addr, | |
948 | free_item->ri_buf[i].i_len); | |
949 | } | |
950 | /* Free the item itself */ | |
951 | kmem_free(free_item->ri_buf, | |
952 | (free_item->ri_total * sizeof(xfs_log_iovec_t))); | |
953 | kmem_free(free_item, sizeof(xlog_recover_item_t)); | |
954 | } while (first_item != item); | |
955 | /* Free the transaction recover structure */ | |
956 | kmem_free(trans, sizeof(xlog_recover_t)); | |
a562a63b | 957 | } |
d321ceac NS |
958 | |
959 | STATIC int | |
a562a63b NS |
960 | xlog_recover_commit_trans( |
961 | xlog_t *log, | |
962 | xlog_recover_t **q, | |
963 | xlog_recover_t *trans, | |
964 | int pass) | |
d321ceac | 965 | { |
a562a63b | 966 | int error; |
d321ceac NS |
967 | |
968 | if ((error = xlog_recover_unlink_tid(q, trans))) | |
969 | return error; | |
970 | if ((error = xlog_recover_do_trans(log, trans, pass))) | |
971 | return error; | |
972 | xlog_recover_free_trans(trans); /* no error */ | |
973 | return 0; | |
a562a63b | 974 | } |
d321ceac NS |
975 | |
976 | STATIC void | |
a562a63b NS |
977 | xlog_recover_insert_item_backq( |
978 | xlog_recover_item_t **q, | |
979 | xlog_recover_item_t *item) | |
d321ceac NS |
980 | { |
981 | if (*q == 0) { | |
982 | item->ri_prev = item->ri_next = item; | |
983 | *q = item; | |
984 | } else { | |
985 | item->ri_next = *q; | |
986 | item->ri_prev = (*q)->ri_prev; | |
987 | (*q)->ri_prev = item; | |
988 | item->ri_prev->ri_next = item; | |
989 | } | |
a562a63b | 990 | } |
d321ceac NS |
991 | |
992 | STATIC void | |
a562a63b NS |
993 | xlog_recover_add_item( |
994 | xlog_recover_item_t **itemq) | |
d321ceac | 995 | { |
a562a63b | 996 | xlog_recover_item_t *item; |
d321ceac | 997 | |
2b288ccf | 998 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); |
d321ceac | 999 | xlog_recover_insert_item_backq(itemq, item); |
a562a63b | 1000 | } |
d321ceac | 1001 | |
a562a63b NS |
1002 | STATIC int |
1003 | xlog_recover_add_to_cont_trans( | |
1004 | xlog_recover_t *trans, | |
1005 | xfs_caddr_t dp, | |
1006 | int len) | |
1007 | { | |
1008 | xlog_recover_item_t *item; | |
1009 | xfs_caddr_t ptr, old_ptr; | |
1010 | int old_len; | |
1011 | ||
1012 | item = trans->r_itemq; | |
1013 | if (item == 0) { | |
1014 | /* finish copying rest of trans header */ | |
1015 | xlog_recover_add_item(&trans->r_itemq); | |
1016 | ptr = (xfs_caddr_t) &trans->r_theader + | |
1017 | sizeof(xfs_trans_header_t) - len; | |
1018 | memcpy(ptr, dp, len); /* d, s, l */ | |
1019 | return 0; | |
1020 | } | |
1021 | item = item->ri_prev; | |
1022 | ||
1023 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | |
1024 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | |
1025 | ||
6239071d | 1026 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); |
a562a63b NS |
1027 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ |
1028 | item->ri_buf[item->ri_cnt-1].i_len += len; | |
1029 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | |
1030 | return 0; | |
1031 | } | |
1032 | ||
1033 | /* | |
1034 | * The next region to add is the start of a new region. It could be | |
d321ceac NS |
1035 | * a whole region or it could be the first part of a new region. Because |
1036 | * of this, the assumption here is that the type and size fields of all | |
1037 | * format structures fit into the first 32 bits of the structure. | |
1038 | * | |
1039 | * This works because all regions must be 32 bit aligned. Therefore, we | |
1040 | * either have both fields or we have neither field. In the case we have | |
1041 | * neither field, the data part of the region is zero length. We only have | |
1042 | * a log_op_header and can throw away the header since a new one will appear | |
1043 | * later. If we have at least 4 bytes, then we can determine how many regions | |
1044 | * will appear in the current log item. | |
1045 | */ | |
1046 | STATIC int | |
a562a63b NS |
1047 | xlog_recover_add_to_trans( |
1048 | xlog_recover_t *trans, | |
1049 | xfs_caddr_t dp, | |
1050 | int len) | |
d321ceac | 1051 | { |
a562a63b NS |
1052 | xfs_inode_log_format_t *in_f; /* any will do */ |
1053 | xlog_recover_item_t *item; | |
1054 | xfs_caddr_t ptr; | |
d321ceac NS |
1055 | |
1056 | if (!len) | |
1057 | return 0; | |
d321ceac NS |
1058 | item = trans->r_itemq; |
1059 | if (item == 0) { | |
1060 | ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); | |
1061 | if (len == sizeof(xfs_trans_header_t)) | |
1062 | xlog_recover_add_item(&trans->r_itemq); | |
32181a02 | 1063 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ |
d321ceac NS |
1064 | return 0; |
1065 | } | |
a562a63b NS |
1066 | |
1067 | ptr = kmem_alloc(len, KM_SLEEP); | |
1068 | memcpy(ptr, dp, len); | |
1069 | in_f = (xfs_inode_log_format_t *)ptr; | |
1070 | ||
d321ceac NS |
1071 | if (item->ri_prev->ri_total != 0 && |
1072 | item->ri_prev->ri_total == item->ri_prev->ri_cnt) { | |
1073 | xlog_recover_add_item(&trans->r_itemq); | |
1074 | } | |
1075 | item = trans->r_itemq; | |
1076 | item = item->ri_prev; | |
1077 | ||
1078 | if (item->ri_total == 0) { /* first region to be added */ | |
1079 | item->ri_total = in_f->ilf_size; | |
1080 | ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); | |
1081 | item->ri_buf = kmem_zalloc((item->ri_total * | |
2b288ccf | 1082 | sizeof(xfs_log_iovec_t)), KM_SLEEP); |
d321ceac NS |
1083 | } |
1084 | ASSERT(item->ri_total > item->ri_cnt); | |
1085 | /* Description region is ri_buf[0] */ | |
1086 | item->ri_buf[item->ri_cnt].i_addr = ptr; | |
1087 | item->ri_buf[item->ri_cnt].i_len = len; | |
1088 | item->ri_cnt++; | |
1089 | return 0; | |
a562a63b | 1090 | } |
d321ceac NS |
1091 | |
1092 | STATIC int | |
a562a63b NS |
1093 | xlog_recover_unmount_trans( |
1094 | xlog_recover_t *trans) | |
d321ceac NS |
1095 | { |
1096 | /* Do nothing now */ | |
1097 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); | |
a562a63b NS |
1098 | return 0; |
1099 | } | |
d321ceac | 1100 | |
a562a63b NS |
1101 | /* |
1102 | * There are two valid states of the r_state field. 0 indicates that the | |
1103 | * transaction structure is in a normal state. We have either seen the | |
1104 | * start of the transaction or the last operation we added was not a partial | |
1105 | * operation. If the last operation we added to the transaction was a | |
1106 | * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. | |
1107 | * | |
1108 | * NOTE: skip LRs with 0 data length. | |
1109 | */ | |
d321ceac | 1110 | STATIC int |
a562a63b NS |
1111 | xlog_recover_process_data( |
1112 | xlog_t *log, | |
1113 | xlog_recover_t *rhash[], | |
1114 | xlog_rec_header_t *rhead, | |
1115 | xfs_caddr_t dp, | |
1116 | int pass) | |
d321ceac | 1117 | { |
a562a63b NS |
1118 | xfs_caddr_t lp; |
1119 | int num_logops; | |
1120 | xlog_op_header_t *ohead; | |
1121 | xlog_recover_t *trans; | |
1122 | xlog_tid_t tid; | |
1123 | int error; | |
1124 | unsigned long hash; | |
1125 | uint flags; | |
1126 | ||
1127 | lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT); | |
1128 | num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); | |
1129 | ||
1130 | /* check the log format matches our own - else we can't recover */ | |
1131 | if (xlog_header_check_recover(log->l_mp, rhead)) | |
1132 | return (XFS_ERROR(EIO)); | |
1133 | ||
1134 | while ((dp < lp) && num_logops) { | |
1135 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); | |
1136 | ohead = (xlog_op_header_t *)dp; | |
1137 | dp += sizeof(xlog_op_header_t); | |
1138 | if (ohead->oh_clientid != XFS_TRANSACTION && | |
1139 | ohead->oh_clientid != XFS_LOG) { | |
1140 | xlog_warn( | |
1141 | "XFS: xlog_recover_process_data: bad clientid"); | |
1142 | ASSERT(0); | |
1143 | return (XFS_ERROR(EIO)); | |
d321ceac | 1144 | } |
a562a63b NS |
1145 | tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); |
1146 | hash = XLOG_RHASH(tid); | |
1147 | trans = xlog_recover_find_tid(rhash[hash], tid); | |
1148 | if (trans == NULL) { /* not found; add new tid */ | |
1149 | if (ohead->oh_flags & XLOG_START_TRANS) | |
1150 | xlog_recover_new_tid(&rhash[hash], tid, | |
1151 | INT_GET(rhead->h_lsn, ARCH_CONVERT)); | |
1152 | } else { | |
1153 | ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); | |
1154 | flags = ohead->oh_flags & ~XLOG_END_TRANS; | |
1155 | if (flags & XLOG_WAS_CONT_TRANS) | |
1156 | flags &= ~XLOG_CONTINUE_TRANS; | |
1157 | switch (flags) { | |
1158 | case XLOG_COMMIT_TRANS: | |
1159 | error = xlog_recover_commit_trans(log, | |
1160 | &rhash[hash], trans, pass); | |
1161 | break; | |
1162 | case XLOG_UNMOUNT_TRANS: | |
1163 | error = xlog_recover_unmount_trans(trans); | |
1164 | break; | |
1165 | case XLOG_WAS_CONT_TRANS: | |
1166 | error = xlog_recover_add_to_cont_trans(trans, | |
1167 | dp, INT_GET(ohead->oh_len, | |
1168 | ARCH_CONVERT)); | |
1169 | break; | |
1170 | case XLOG_START_TRANS: | |
1171 | xlog_warn( | |
1172 | "XFS: xlog_recover_process_data: bad transaction"); | |
1173 | ASSERT(0); | |
1174 | error = XFS_ERROR(EIO); | |
1175 | break; | |
1176 | case 0: | |
1177 | case XLOG_CONTINUE_TRANS: | |
1178 | error = xlog_recover_add_to_trans(trans, | |
1179 | dp, INT_GET(ohead->oh_len, | |
1180 | ARCH_CONVERT)); | |
1181 | break; | |
1182 | default: | |
1183 | xlog_warn( | |
1184 | "XFS: xlog_recover_process_data: bad flag"); | |
1185 | ASSERT(0); | |
1186 | error = XFS_ERROR(EIO); | |
1187 | break; | |
1188 | } | |
1189 | if (error) | |
1190 | return error; | |
d321ceac | 1191 | } |
a562a63b NS |
1192 | dp += INT_GET(ohead->oh_len, ARCH_CONVERT); |
1193 | num_logops--; | |
1194 | } | |
1195 | return 0; | |
1196 | } | |
d321ceac | 1197 | |
72c5917e NS |
1198 | STATIC int |
1199 | xlog_valid_rec_header( | |
1200 | xlog_t *log, | |
1201 | xlog_rec_header_t *rhead, | |
1202 | xfs_daddr_t blkno) | |
1203 | { | |
b0e364f6 | 1204 | int hlen; |
72c5917e NS |
1205 | |
1206 | if (unlikely( | |
1207 | (INT_GET(rhead->h_magicno, ARCH_CONVERT) != | |
1208 | XLOG_HEADER_MAGIC_NUM))) { | |
1209 | XFS_ERROR_REPORT("xlog_valid_rec_header(1)", | |
1210 | XFS_ERRLEVEL_LOW, log->l_mp); | |
1211 | return XFS_ERROR(EFSCORRUPTED); | |
1212 | } | |
1213 | if (unlikely( | |
46eca962 | 1214 | (!rhead->h_version || |
72c5917e NS |
1215 | (INT_GET(rhead->h_version, ARCH_CONVERT) & |
1216 | (~XLOG_VERSION_OKBITS)) != 0))) { | |
1217 | xlog_warn("XFS: %s: unrecognised log version (%d).", | |
1218 | __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT)); | |
1219 | return XFS_ERROR(EIO); | |
1220 | } | |
1221 | ||
1222 | /* LR body must have data or it wouldn't have been written */ | |
b0e364f6 NS |
1223 | hlen = INT_GET(rhead->h_len, ARCH_CONVERT); |
1224 | if (unlikely( hlen <= 0 || hlen > INT_MAX )) { | |
72c5917e NS |
1225 | XFS_ERROR_REPORT("xlog_valid_rec_header(2)", |
1226 | XFS_ERRLEVEL_LOW, log->l_mp); | |
1227 | return XFS_ERROR(EFSCORRUPTED); | |
1228 | } | |
1229 | if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { | |
1230 | XFS_ERROR_REPORT("xlog_valid_rec_header(3)", | |
1231 | XFS_ERRLEVEL_LOW, log->l_mp); | |
1232 | return XFS_ERROR(EFSCORRUPTED); | |
1233 | } | |
1234 | return 0; | |
1235 | } | |
1236 | ||
d321ceac NS |
1237 | /* |
1238 | * Read the log from tail to head and process the log records found. | |
1239 | * Handle the two cases where the tail and head are in the same cycle | |
1240 | * and where the active portion of the log wraps around the end of | |
4ed50f8a | 1241 | * the physical log separately. The pass parameter is passed through |
d321ceac NS |
1242 | * to the routines called to process the data and is not looked at |
1243 | * here. | |
1244 | */ | |
1245 | int | |
a562a63b NS |
1246 | xlog_do_recovery_pass( |
1247 | xlog_t *log, | |
1248 | xfs_daddr_t head_blk, | |
1249 | xfs_daddr_t tail_blk, | |
1250 | int pass) | |
d321ceac | 1251 | { |
a562a63b NS |
1252 | xlog_rec_header_t *rhead; |
1253 | xfs_daddr_t blk_no; | |
1254 | xfs_caddr_t bufaddr, offset; | |
1255 | xfs_buf_t *hbp, *dbp; | |
1256 | int error = 0, h_size; | |
1257 | int bblks, split_bblks; | |
1258 | int hblks, split_hblks, wrapped_hblks; | |
1259 | xlog_recover_t *rhash[XLOG_RHASH_SIZE]; | |
1260 | ||
72c5917e NS |
1261 | ASSERT(head_blk != tail_blk); |
1262 | ||
73bf5988 | 1263 | /* |
a562a63b NS |
1264 | * Read the header of the tail block and get the iclog buffer size from |
1265 | * h_size. Use this to tell how many sectors make up the log header. | |
73bf5988 | 1266 | */ |
a562a63b NS |
1267 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
1268 | /* | |
1269 | * When using variable length iclogs, read first sector of | |
1270 | * iclog header and extract the header size from it. Get a | |
1271 | * new hbp that is the correct size. | |
1272 | */ | |
1273 | hbp = xlog_get_bp(log, 1); | |
1274 | if (!hbp) | |
1275 | return ENOMEM; | |
1276 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) | |
1277 | goto bread_err1; | |
1278 | offset = xlog_align(log, tail_blk, 1, hbp); | |
1279 | rhead = (xlog_rec_header_t *)offset; | |
72c5917e NS |
1280 | error = xlog_valid_rec_header(log, rhead, tail_blk); |
1281 | if (error) | |
a562a63b | 1282 | goto bread_err1; |
a562a63b | 1283 | h_size = INT_GET(rhead->h_size, ARCH_CONVERT); |
a562a63b NS |
1284 | if ((INT_GET(rhead->h_version, ARCH_CONVERT) |
1285 | & XLOG_VERSION_2) && | |
1286 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | |
1287 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | |
1288 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
1289 | hblks++; | |
1290 | xlog_put_bp(hbp); | |
1291 | hbp = xlog_get_bp(log, hblks); | |
1292 | } else { | |
1293 | hblks = 1; | |
1294 | } | |
73bf5988 | 1295 | } else { |
a562a63b NS |
1296 | ASSERT(log->l_sectbb_log == 0); |
1297 | hblks = 1; | |
1298 | hbp = xlog_get_bp(log, 1); | |
1299 | h_size = XLOG_BIG_RECORD_BSIZE; | |
73bf5988 | 1300 | } |
a562a63b NS |
1301 | |
1302 | if (!hbp) | |
1303 | return ENOMEM; | |
1304 | dbp = xlog_get_bp(log, BTOBB(h_size)); | |
1305 | if (!dbp) { | |
1306 | xlog_put_bp(hbp); | |
1307 | return ENOMEM; | |
d321ceac | 1308 | } |
a562a63b NS |
1309 | |
1310 | memset(rhash, 0, sizeof(rhash)); | |
1311 | if (tail_blk <= head_blk) { | |
1312 | for (blk_no = tail_blk; blk_no < head_blk; ) { | |
1313 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | |
1314 | goto bread_err2; | |
1315 | offset = xlog_align(log, blk_no, hblks, hbp); | |
1316 | rhead = (xlog_rec_header_t *)offset; | |
72c5917e NS |
1317 | error = xlog_valid_rec_header(log, rhead, blk_no); |
1318 | if (error) | |
a562a63b | 1319 | goto bread_err2; |
a562a63b | 1320 | |
a562a63b NS |
1321 | /* blocks in data section */ |
1322 | bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); | |
72c5917e NS |
1323 | error = xlog_bread(log, blk_no + hblks, bblks, dbp); |
1324 | if (error) | |
1325 | goto bread_err2; | |
1326 | offset = xlog_align(log, blk_no + hblks, bblks, dbp); | |
1327 | xlog_unpack_data(rhead, offset, log); | |
1328 | if ((error = xlog_recover_process_data(log, | |
a562a63b | 1329 | rhash, rhead, offset, pass))) |
72c5917e NS |
1330 | goto bread_err2; |
1331 | blk_no += bblks + hblks; | |
73bf5988 | 1332 | } |
a562a63b NS |
1333 | } else { |
1334 | /* | |
1335 | * Perform recovery around the end of the physical log. | |
1336 | * When the head is not on the same cycle number as the tail, | |
1337 | * we can't do a sequential recovery as above. | |
1338 | */ | |
1339 | blk_no = tail_blk; | |
1340 | while (blk_no < log->l_logBBsize) { | |
1341 | /* | |
1342 | * Check for header wrapping around physical end-of-log | |
1343 | */ | |
72c5917e NS |
1344 | offset = NULL; |
1345 | split_hblks = 0; | |
a562a63b | 1346 | wrapped_hblks = 0; |
72c5917e | 1347 | if (blk_no + hblks <= log->l_logBBsize) { |
a562a63b | 1348 | /* Read header in one read */ |
72c5917e NS |
1349 | error = xlog_bread(log, blk_no, hblks, hbp); |
1350 | if (error) | |
a562a63b NS |
1351 | goto bread_err2; |
1352 | offset = xlog_align(log, blk_no, hblks, hbp); | |
1353 | } else { | |
1354 | /* This LR is split across physical log end */ | |
a562a63b NS |
1355 | if (blk_no != log->l_logBBsize) { |
1356 | /* some data before physical log end */ | |
1357 | ASSERT(blk_no <= INT_MAX); | |
1358 | split_hblks = log->l_logBBsize - (int)blk_no; | |
1359 | ASSERT(split_hblks > 0); | |
1360 | if ((error = xlog_bread(log, blk_no, | |
1361 | split_hblks, hbp))) | |
1362 | goto bread_err2; | |
1363 | offset = xlog_align(log, blk_no, | |
1364 | split_hblks, hbp); | |
1365 | } | |
1366 | /* | |
1367 | * Note: this black magic still works with | |
1368 | * large sector sizes (non-512) only because: | |
1369 | * - we increased the buffer size originally | |
1370 | * by 1 sector giving us enough extra space | |
1371 | * for the second read; | |
1372 | * - the log start is guaranteed to be sector | |
1373 | * aligned; | |
1374 | * - we read the log end (LR header start) | |
1375 | * _first_, then the log start (LR header end) | |
1376 | * - order is important. | |
1377 | */ | |
1378 | bufaddr = XFS_BUF_PTR(hbp); | |
1379 | XFS_BUF_SET_PTR(hbp, | |
1380 | bufaddr + BBTOB(split_hblks), | |
1381 | BBTOB(hblks - split_hblks)); | |
1382 | wrapped_hblks = hblks - split_hblks; | |
72c5917e NS |
1383 | error = xlog_bread(log, 0, wrapped_hblks, hbp); |
1384 | if (error) | |
a562a63b | 1385 | goto bread_err2; |
b0e364f6 | 1386 | XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks)); |
a562a63b NS |
1387 | if (!offset) |
1388 | offset = xlog_align(log, 0, | |
1389 | wrapped_hblks, hbp); | |
1390 | } | |
1391 | rhead = (xlog_rec_header_t *)offset; | |
72c5917e NS |
1392 | error = xlog_valid_rec_header(log, rhead, |
1393 | split_hblks ? blk_no : 0); | |
1394 | if (error) | |
a562a63b | 1395 | goto bread_err2; |
72c5917e NS |
1396 | |
1397 | bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); | |
1398 | blk_no += hblks; | |
a562a63b NS |
1399 | |
1400 | /* Read in data for log record */ | |
72c5917e NS |
1401 | if (blk_no + bblks <= log->l_logBBsize) { |
1402 | error = xlog_bread(log, blk_no, bblks, dbp); | |
1403 | if (error) | |
a562a63b NS |
1404 | goto bread_err2; |
1405 | offset = xlog_align(log, blk_no, bblks, dbp); | |
1406 | } else { | |
1407 | /* This log record is split across the | |
1408 | * physical end of log */ | |
1409 | offset = NULL; | |
1410 | split_bblks = 0; | |
1411 | if (blk_no != log->l_logBBsize) { | |
1412 | /* some data is before the physical | |
1413 | * end of log */ | |
1414 | ASSERT(!wrapped_hblks); | |
1415 | ASSERT(blk_no <= INT_MAX); | |
1416 | split_bblks = | |
1417 | log->l_logBBsize - (int)blk_no; | |
1418 | ASSERT(split_bblks > 0); | |
1419 | if ((error = xlog_bread(log, blk_no, | |
1420 | split_bblks, dbp))) | |
1421 | goto bread_err2; | |
1422 | offset = xlog_align(log, blk_no, | |
1423 | split_bblks, dbp); | |
1424 | } | |
1425 | /* | |
1426 | * Note: this black magic still works with | |
1427 | * large sector sizes (non-512) only because: | |
1428 | * - we increased the buffer size originally | |
1429 | * by 1 sector giving us enough extra space | |
1430 | * for the second read; | |
1431 | * - the log start is guaranteed to be sector | |
1432 | * aligned; | |
1433 | * - we read the log end (LR header start) | |
1434 | * _first_, then the log start (LR header end) | |
1435 | * - order is important. | |
1436 | */ | |
1437 | bufaddr = XFS_BUF_PTR(dbp); | |
1438 | XFS_BUF_SET_PTR(dbp, | |
1439 | bufaddr + BBTOB(split_bblks), | |
1440 | BBTOB(bblks - split_bblks)); | |
1441 | if ((error = xlog_bread(log, wrapped_hblks, | |
1442 | bblks - split_bblks, dbp))) | |
1443 | goto bread_err2; | |
b0e364f6 | 1444 | XFS_BUF_SET_PTR(dbp, bufaddr, h_size); |
a562a63b NS |
1445 | if (!offset) |
1446 | offset = xlog_align(log, wrapped_hblks, | |
1447 | bblks - split_bblks, dbp); | |
1448 | } | |
1449 | xlog_unpack_data(rhead, offset, log); | |
1450 | if ((error = xlog_recover_process_data(log, rhash, | |
72c5917e | 1451 | rhead, offset, pass))) |
a562a63b NS |
1452 | goto bread_err2; |
1453 | blk_no += bblks; | |
d321ceac | 1454 | } |
d321ceac | 1455 | |
a562a63b NS |
1456 | ASSERT(blk_no >= log->l_logBBsize); |
1457 | blk_no -= log->l_logBBsize; | |
1458 | ||
1459 | /* read first part of physical log */ | |
1460 | while (blk_no < head_blk) { | |
1461 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | |
1462 | goto bread_err2; | |
1463 | offset = xlog_align(log, blk_no, hblks, hbp); | |
1464 | rhead = (xlog_rec_header_t *)offset; | |
72c5917e NS |
1465 | error = xlog_valid_rec_header(log, rhead, blk_no); |
1466 | if (error) | |
1467 | goto bread_err2; | |
a562a63b | 1468 | bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); |
a562a63b NS |
1469 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) |
1470 | goto bread_err2; | |
1471 | offset = xlog_align(log, blk_no+hblks, bblks, dbp); | |
1472 | xlog_unpack_data(rhead, offset, log); | |
1473 | if ((error = xlog_recover_process_data(log, rhash, | |
72c5917e | 1474 | rhead, offset, pass))) |
a562a63b | 1475 | goto bread_err2; |
72c5917e | 1476 | blk_no += bblks + hblks; |
a562a63b | 1477 | } |
5000d01d | 1478 | } |
d321ceac | 1479 | |
a562a63b NS |
1480 | bread_err2: |
1481 | xlog_put_bp(dbp); | |
1482 | bread_err1: | |
1483 | xlog_put_bp(hbp); | |
1484 | return error; | |
d321ceac | 1485 | } |