]>
Commit | Line | Data |
---|---|---|
d321ceac | 1 | /* |
0d3e0b37 | 2 | * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. |
5000d01d | 3 | * |
d321ceac NS |
4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | |
6 | * published by the Free Software Foundation. | |
5000d01d | 7 | * |
d321ceac NS |
8 | * This program is distributed in the hope that it would be useful, but |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
5000d01d | 11 | * |
d321ceac NS |
12 | * Further, this software is distributed without any warranty that it is |
13 | * free of the rightful claim of any third person regarding infringement | |
5000d01d | 14 | * or the like. Any license provided herein, whether implied or |
d321ceac NS |
15 | * otherwise, applies only to this software file. Patent licenses, if |
16 | * any, provided herein do not apply to combinations of this program with | |
17 | * other software, or any other product whatsoever. | |
5000d01d | 18 | * |
d321ceac NS |
19 | * You should have received a copy of the GNU General Public License along |
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | |
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | |
5000d01d | 22 | * |
d321ceac NS |
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, |
24 | * Mountain View, CA 94043, or: | |
5000d01d SL |
25 | * |
26 | * http://www.sgi.com | |
27 | * | |
28 | * For further information regarding this notice, see: | |
29 | * | |
d321ceac NS |
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ |
31 | */ | |
32 | ||
33 | #include <libxlog.h> | |
34 | ||
35 | /* | |
36 | * This routine finds (to an approximation) the first block in the physical | |
5000d01d | 37 | * log which contains the given cycle. It uses a binary search algorithm. |
d321ceac NS |
38 | * Note that the algorithm can not be perfect because the disk will not |
39 | * necessarily be perfect. | |
40 | */ | |
41 | int | |
42 | xlog_find_cycle_start(xlog_t *log, | |
5000d01d | 43 | xfs_buf_t *bp, |
d321ceac NS |
44 | xfs_daddr_t first_blk, |
45 | xfs_daddr_t *last_blk, | |
46 | uint cycle) | |
47 | { | |
48 | xfs_daddr_t mid_blk; | |
49 | uint mid_cycle; | |
50 | int error; | |
51 | ||
52 | mid_blk = BLK_AVG(first_blk, *last_blk); | |
53 | while (mid_blk != first_blk && mid_blk != *last_blk) { | |
54 | if ((error = xlog_bread(log, mid_blk, 1, bp))) | |
55 | return error; | |
56 | mid_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
57 | if (mid_cycle == cycle) { | |
58 | *last_blk = mid_blk; | |
59 | /* last_half_cycle == mid_cycle */ | |
60 | } else { | |
61 | first_blk = mid_blk; | |
62 | /* first_half_cycle == mid_cycle */ | |
63 | } | |
64 | mid_blk = BLK_AVG(first_blk, *last_blk); | |
65 | } | |
66 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || | |
67 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); | |
68 | ||
69 | return 0; | |
70 | } /* xlog_find_cycle_start */ | |
71 | ||
72 | ||
73 | /* | |
74 | * Check that the range of blocks does not contain the cycle number | |
75 | * given. The scan needs to occur from front to back and the ptr into the | |
76 | * region must be updated since a later routine will need to perform another | |
77 | * test. If the region is completely good, we end up returning the same | |
78 | * last block number. | |
79 | * | |
ce029dc1 | 80 | * Set blkno to -1 if we encounter no errors. This is an invalid block number |
d321ceac NS |
81 | * since we don't ever expect logs to get this large. |
82 | */ | |
83 | ||
ce029dc1 | 84 | STATIC int |
5000d01d SL |
85 | xlog_find_verify_cycle( xlog_t *log, |
86 | xfs_daddr_t start_blk, | |
87 | int nbblks, | |
88 | uint stop_on_cycle_no, | |
ce029dc1 | 89 | xfs_daddr_t *new_blk) |
d321ceac | 90 | { |
ce029dc1 | 91 | xfs_daddr_t i, j; |
d321ceac | 92 | uint cycle; |
5000d01d SL |
93 | xfs_buf_t *bp; |
94 | char *buf = NULL; | |
95 | int error = 0; | |
85a875e9 ES |
96 | xfs_daddr_t bufblks; |
97 | ||
98 | bufblks = 1 << ffs(nbblks); | |
d321ceac NS |
99 | |
100 | while (!(bp = xlog_get_bp(bufblks, log->l_mp))) { | |
5000d01d | 101 | /* can't get enough memory to do everything in one big buffer */ |
d321ceac | 102 | bufblks >>= 1; |
5000d01d SL |
103 | if (!bufblks) |
104 | return ENOMEM; | |
105 | } | |
106 | ||
d321ceac NS |
107 | |
108 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { | |
109 | int bcount = min(bufblks, (start_blk + nbblks - i)); | |
110 | ||
5000d01d SL |
111 | if ((error = xlog_bread(log, i, bcount, bp))) |
112 | goto out; | |
d321ceac NS |
113 | |
114 | buf = XFS_BUF_PTR(bp); | |
115 | for (j = 0; j < bcount; j++) { | |
116 | cycle = GET_CYCLE(buf, ARCH_CONVERT); | |
117 | if (cycle == stop_on_cycle_no) { | |
e56fcdce | 118 | *new_blk = i+j; |
d321ceac NS |
119 | goto out; |
120 | } | |
5000d01d SL |
121 | |
122 | buf += BBSIZE; | |
d321ceac NS |
123 | } |
124 | } | |
125 | ||
ce029dc1 | 126 | *new_blk = -1; |
d321ceac NS |
127 | |
128 | out: | |
129 | xlog_put_bp(bp); | |
130 | ||
131 | return error; | |
132 | } /* xlog_find_verify_cycle */ | |
133 | ||
134 | ||
135 | /* | |
136 | * Potentially backup over partial log record write. | |
137 | * | |
138 | * In the typical case, last_blk is the number of the block directly after | |
139 | * a good log record. Therefore, we subtract one to get the block number | |
140 | * of the last block in the given buffer. extra_bblks contains the number | |
141 | * of blocks we would have read on a previous read. This happens when the | |
142 | * last log record is split over the end of the physical log. | |
143 | * | |
144 | * extra_bblks is the number of blocks potentially verified on a previous | |
145 | * call to this routine. | |
146 | */ | |
147 | ||
148 | STATIC int | |
149 | xlog_find_verify_log_record(xlog_t *log, | |
5000d01d SL |
150 | xfs_daddr_t start_blk, |
151 | xfs_daddr_t *last_blk, | |
d321ceac NS |
152 | int extra_bblks) |
153 | { | |
5000d01d | 154 | xfs_daddr_t i; |
d321ceac | 155 | xfs_buf_t *bp; |
5000d01d SL |
156 | char *buf = NULL; |
157 | xlog_rec_header_t *head = NULL; | |
158 | int error = 0; | |
159 | int smallmem = 0; | |
160 | int num_blks = *last_blk - start_blk; | |
73bf5988 | 161 | int xhdrs; |
d321ceac NS |
162 | |
163 | ASSERT(start_blk != 0 || *last_blk != start_blk); | |
164 | ||
165 | if (!(bp = xlog_get_bp(num_blks, log->l_mp))) { | |
5000d01d SL |
166 | if (!(bp = xlog_get_bp(1, log->l_mp))) |
167 | return ENOMEM; | |
168 | smallmem = 1; | |
169 | buf = XFS_BUF_PTR(bp); | |
d321ceac NS |
170 | } else { |
171 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) | |
172 | goto out; | |
5000d01d | 173 | buf = XFS_BUF_PTR(bp) + (num_blks - 1) * BBSIZE; |
d321ceac | 174 | } |
5000d01d | 175 | |
d321ceac NS |
176 | |
177 | for (i=(*last_blk)-1; i>=0; i--) { | |
178 | if (i < start_blk) { | |
179 | /* legal log record not found */ | |
180 | xlog_warn("XFS: Log inconsistent (didn't find previous header)"); | |
d321ceac | 181 | ASSERT(0); |
d321ceac NS |
182 | error = XFS_ERROR(EIO); |
183 | goto out; | |
184 | } | |
185 | ||
186 | if (smallmem && (error = xlog_bread(log, i, 1, bp))) | |
187 | goto out; | |
5000d01d SL |
188 | head = (xlog_rec_header_t*)buf; |
189 | ||
d321ceac NS |
190 | if (INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) |
191 | break; | |
5000d01d SL |
192 | |
193 | if (!smallmem) | |
194 | buf -= BBSIZE; | |
d321ceac NS |
195 | } |
196 | ||
197 | /* | |
198 | * We hit the beginning of the physical log & still no header. Return | |
199 | * to caller. If caller can handle a return of -1, then this routine | |
200 | * will be called again for the end of the physical log. | |
201 | */ | |
202 | if (i == -1) { | |
5000d01d | 203 | error = -1; |
d321ceac NS |
204 | goto out; |
205 | } | |
206 | ||
207 | /* we have the final block of the good log (the first block | |
208 | * of the log record _before_ the head. So we check the uuid. | |
209 | */ | |
5000d01d | 210 | |
d321ceac | 211 | if ((error = xlog_header_check_mount(log->l_mp, head))) |
5000d01d SL |
212 | goto out; |
213 | ||
d321ceac NS |
214 | /* |
215 | * We may have found a log record header before we expected one. | |
216 | * last_blk will be the 1st block # with a given cycle #. We may end | |
5000d01d SL |
217 | * up reading an entire log record. In this case, we don't want to |
218 | * reset last_blk. Only when last_blk points in the middle of a log | |
d321ceac NS |
219 | * record do we update last_blk. |
220 | */ | |
73bf5988 SL |
221 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
222 | int h_size = INT_GET(head->h_size, ARCH_CONVERT); | |
223 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; | |
224 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
225 | xhdrs++; | |
226 | } else { | |
227 | xhdrs = 1; | |
228 | } | |
229 | ||
5000d01d SL |
230 | if (*last_blk - i + extra_bblks |
231 | != BTOBB(INT_GET(head->h_len, ARCH_CONVERT))+xhdrs) | |
d321ceac NS |
232 | *last_blk = i; |
233 | ||
234 | out: | |
235 | xlog_put_bp(bp); | |
236 | ||
237 | return error; | |
238 | } /* xlog_find_verify_log_record */ | |
239 | ||
240 | /* | |
241 | * Head is defined to be the point of the log where the next log write | |
242 | * write could go. This means that incomplete LR writes at the end are | |
243 | * eliminated when calculating the head. We aren't guaranteed that previous | |
5000d01d | 244 | * LR have complete transactions. We only know that a cycle number of |
d321ceac NS |
245 | * current cycle number -1 won't be present in the log if we start writing |
246 | * from our current block number. | |
247 | * | |
248 | * last_blk contains the block number of the first block with a given | |
249 | * cycle number. | |
250 | * | |
251 | * Also called from xfs_log_print.c | |
252 | * | |
253 | * Return: zero if normal, non-zero if error. | |
254 | */ | |
255 | int | |
256 | xlog_find_head(xlog_t *log, | |
257 | xfs_daddr_t *return_head_blk) | |
258 | { | |
5000d01d | 259 | xfs_buf_t *bp; |
d321ceac | 260 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; |
5000d01d | 261 | int num_scan_bblks; |
d321ceac NS |
262 | uint first_half_cycle, last_half_cycle; |
263 | uint stop_on_cycle; | |
5000d01d | 264 | int error, log_bbnum = log->l_logBBsize; |
d321ceac NS |
265 | |
266 | /* Is the end of the log device zeroed? */ | |
267 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { | |
268 | *return_head_blk = first_blk; | |
5000d01d SL |
269 | |
270 | /* is the whole lot zeroed? */ | |
271 | if (!first_blk) { | |
272 | /* Linux XFS shouldn't generate totally zeroed logs - | |
273 | * mkfs etc write a dummy unmount record to a fresh | |
274 | * log so we can store the uuid in there | |
275 | */ | |
276 | xlog_warn("XFS: totally zeroed log\n"); | |
277 | } | |
278 | ||
d321ceac NS |
279 | return 0; |
280 | } else if (error) { | |
5000d01d | 281 | xlog_warn("XFS: empty log check failed"); |
d321ceac NS |
282 | return error; |
283 | } | |
284 | ||
285 | first_blk = 0; /* get cycle # of 1st block */ | |
286 | bp = xlog_get_bp(1,log->l_mp); | |
287 | if (!bp) | |
ce029dc1 | 288 | return ENOMEM; |
d321ceac NS |
289 | if ((error = xlog_bread(log, 0, 1, bp))) |
290 | goto bp_err; | |
291 | first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
292 | ||
293 | last_blk = head_blk = log_bbnum-1; /* get cycle # of last block */ | |
294 | if ((error = xlog_bread(log, last_blk, 1, bp))) | |
295 | goto bp_err; | |
296 | last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
297 | ASSERT(last_half_cycle != 0); | |
298 | ||
299 | /* | |
300 | * If the 1st half cycle number is equal to the last half cycle number, | |
301 | * then the entire log is stamped with the same cycle number. In this | |
5000d01d | 302 | * case, head_blk can't be set to zero (which makes sense). The below |
d321ceac NS |
303 | * math doesn't work out properly with head_blk equal to zero. Instead, |
304 | * we set it to log_bbnum which is an illegal block number, but this | |
305 | * value makes the math correct. If head_blk doesn't changed through | |
306 | * all the tests below, *head_blk is set to zero at the very end rather | |
5000d01d | 307 | * than log_bbnum. In a sense, log_bbnum and zero are the same block |
d321ceac NS |
308 | * in a circular file. |
309 | */ | |
310 | if (first_half_cycle == last_half_cycle) { | |
311 | /* | |
312 | * In this case we believe that the entire log should have cycle | |
313 | * number last_half_cycle. We need to scan backwards from the | |
314 | * end verifying that there are no holes still containing | |
5000d01d | 315 | * last_half_cycle - 1. If we find such a hole, then the start |
d321ceac | 316 | * of that hole will be the new head. The simple case looks like |
5000d01d | 317 | * x | x ... | x - 1 | x |
d321ceac | 318 | * Another case that fits this picture would be |
5000d01d | 319 | * x | x + 1 | x ... | x |
d321ceac NS |
320 | * In this case the head really is somwhere at the end of the |
321 | * log, as one of the latest writes at the beginning was incomplete. | |
322 | * One more case is | |
5000d01d | 323 | * x | x + 1 | x ... | x - 1 | x |
d321ceac NS |
324 | * This is really the combination of the above two cases, and the |
325 | * head has to end up at the start of the x-1 hole at the end of | |
326 | * the log. | |
5000d01d | 327 | * |
d321ceac NS |
328 | * In the 256k log case, we will read from the beginning to the |
329 | * end of the log and search for cycle numbers equal to x-1. We | |
330 | * don't worry about the x+1 blocks that we encounter, because | |
331 | * we know that they cannot be the head since the log started with | |
332 | * x. | |
333 | */ | |
334 | head_blk = log_bbnum; | |
335 | stop_on_cycle = last_half_cycle - 1; | |
336 | } else { | |
337 | /* | |
338 | * In this case we want to find the first block with cycle number | |
339 | * matching last_half_cycle. We expect the log to be some | |
340 | * variation on | |
5000d01d | 341 | * x + 1 ... | x ... |
d321ceac | 342 | * The first block with cycle number x (last_half_cycle) will be |
5000d01d | 343 | * where the new head belongs. First we do a binary search for |
d321ceac NS |
344 | * the first occurrence of last_half_cycle. The binary search |
345 | * may not be totally accurate, so then we scan back from there | |
346 | * looking for occurrences of last_half_cycle before us. If | |
347 | * that backwards scan wraps around the beginning of the log, | |
348 | * then we look for occurrences of last_half_cycle - 1 at the | |
349 | * end of the log. The cases we're looking for look like | |
5000d01d SL |
350 | * x + 1 ... | x | x + 1 | x ... |
351 | * ^ binary search stopped here | |
d321ceac | 352 | * or |
5000d01d SL |
353 | * x + 1 ... | x ... | x - 1 | x |
354 | * <---------> less than scan distance | |
d321ceac NS |
355 | */ |
356 | stop_on_cycle = last_half_cycle; | |
357 | if ((error = xlog_find_cycle_start(log, bp, first_blk, | |
358 | &head_blk, last_half_cycle))) | |
359 | goto bp_err; | |
360 | } | |
361 | ||
362 | /* | |
5000d01d | 363 | * Now validate the answer. Scan back some number of maximum possible |
d321ceac NS |
364 | * blocks and make sure each one has the expected cycle number. The |
365 | * maximum is determined by the total possible amount of buffering | |
366 | * in the in-core log. The following number can be made tighter if | |
367 | * we actually look at the block size of the filesystem. | |
368 | */ | |
73bf5988 | 369 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
d321ceac NS |
370 | if (head_blk >= num_scan_bblks) { |
371 | /* | |
372 | * We are guaranteed that the entire check can be performed | |
373 | * in one buffer. | |
374 | */ | |
375 | start_blk = head_blk - num_scan_bblks; | |
ce029dc1 ES |
376 | if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks, |
377 | stop_on_cycle, &new_blk))) | |
606d804d | 378 | goto bp_err; |
ce029dc1 ES |
379 | if (new_blk != -1) |
380 | head_blk = new_blk; | |
d321ceac | 381 | } else { /* need to read 2 parts of log */ |
5000d01d | 382 | /* |
d321ceac | 383 | * We are going to scan backwards in the log in two parts. First |
5000d01d | 384 | * we scan the physical end of the log. In this part of the log, |
d321ceac NS |
385 | * we are looking for blocks with cycle number last_half_cycle - 1. |
386 | * If we find one, then we know that the log starts there, as we've | |
387 | * found a hole that didn't get written in going around the end | |
5000d01d SL |
388 | * of the physical log. The simple case for this is |
389 | * x + 1 ... | x ... | x - 1 | x | |
390 | * <---------> less than scan distance | |
d321ceac NS |
391 | * If all of the blocks at the end of the log have cycle number |
392 | * last_half_cycle, then we check the blocks at the start of the | |
393 | * log looking for occurrences of last_half_cycle. If we find one, | |
394 | * then our current estimate for the location of the first | |
395 | * occurrence of last_half_cycle is wrong and we move back to the | |
396 | * hole we've found. This case looks like | |
5000d01d SL |
397 | * x + 1 ... | x | x + 1 | x ... |
398 | * ^ binary search stopped here | |
d321ceac | 399 | * Another case we need to handle that only occurs in 256k logs is |
5000d01d SL |
400 | * x + 1 ... | x ... | x+1 | x ... |
401 | * ^ binary search stops here | |
d321ceac NS |
402 | * In a 256k log, the scan at the end of the log will see the x+1 |
403 | * blocks. We need to skip past those since that is certainly not | |
5000d01d | 404 | * the head of the log. By searching for last_half_cycle-1 we |
d321ceac NS |
405 | * accomplish that. |
406 | */ | |
407 | start_blk = log_bbnum - num_scan_bblks + head_blk; | |
408 | ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks-head_blk >= 0); | |
ce029dc1 ES |
409 | if ((error = xlog_find_verify_cycle(log, start_blk, |
410 | num_scan_bblks-(int)head_blk, (stop_on_cycle - 1), | |
411 | &new_blk))) | |
5000d01d | 412 | goto bp_err; |
ce029dc1 | 413 | if (new_blk != -1) { |
d321ceac NS |
414 | head_blk = new_blk; |
415 | goto bad_blk; | |
416 | } | |
417 | ||
418 | /* | |
419 | * Scan beginning of log now. The last part of the physical log | |
420 | * is good. This scan needs to verify that it doesn't find the | |
421 | * last_half_cycle. | |
422 | */ | |
423 | start_blk = 0; | |
424 | ASSERT(head_blk <= INT_MAX); | |
ce029dc1 ES |
425 | if ((error = xlog_find_verify_cycle(log, start_blk, (int) head_blk, |
426 | stop_on_cycle, &new_blk))) | |
427 | goto bp_err; | |
428 | if (new_blk != -1) | |
d321ceac NS |
429 | head_blk = new_blk; |
430 | } | |
431 | ||
432 | bad_blk: | |
433 | /* | |
434 | * Now we need to make sure head_blk is not pointing to a block in | |
435 | * the middle of a log record. | |
436 | */ | |
437 | num_scan_bblks = BTOBB(XLOG_MAX_RECORD_BSIZE); | |
438 | if (head_blk >= num_scan_bblks) { | |
5000d01d | 439 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ |
d321ceac NS |
440 | |
441 | /* start ptr at last block ptr before head_blk */ | |
442 | if ((error = xlog_find_verify_log_record(log, | |
443 | start_blk, | |
444 | &head_blk, | |
445 | 0)) == -1) { | |
446 | error = XFS_ERROR(EIO); | |
447 | goto bp_err; | |
448 | } else if (error) | |
449 | goto bp_err; | |
450 | } else { | |
451 | start_blk = 0; | |
452 | ASSERT(head_blk <= INT_MAX); | |
453 | if ((error = xlog_find_verify_log_record(log, | |
454 | start_blk, | |
455 | &head_blk, | |
456 | 0)) == -1) { | |
457 | /* We hit the beginning of the log during our search */ | |
458 | start_blk = log_bbnum - num_scan_bblks + head_blk; | |
459 | new_blk = log_bbnum; | |
460 | ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0); | |
461 | ASSERT(head_blk <= INT_MAX); | |
462 | if ((error = xlog_find_verify_log_record(log, | |
463 | start_blk, | |
464 | &new_blk, | |
465 | (int)head_blk)) == -1) { | |
466 | error = XFS_ERROR(EIO); | |
467 | goto bp_err; | |
468 | } else if (error) | |
469 | goto bp_err; | |
470 | if (new_blk != log_bbnum) | |
471 | head_blk = new_blk; | |
472 | } else if (error) | |
473 | goto bp_err; | |
474 | } | |
475 | ||
476 | xlog_put_bp(bp); | |
477 | if (head_blk == log_bbnum) | |
478 | *return_head_blk = 0; | |
479 | else | |
480 | *return_head_blk = head_blk; | |
481 | /* | |
482 | * When returning here, we have a good block number. Bad block | |
483 | * means that during a previous crash, we didn't have a clean break | |
5000d01d | 484 | * from cycle number N to cycle number N-1. In this case, we need |
d321ceac NS |
485 | * to find the first block with cycle number N-1. |
486 | */ | |
487 | return 0; | |
488 | ||
489 | bp_err: | |
490 | xlog_put_bp(bp); | |
491 | ||
5000d01d SL |
492 | if (error) |
493 | xlog_warn("XFS: failed to find log head"); | |
494 | ||
d321ceac NS |
495 | return error; |
496 | } /* xlog_find_head */ | |
497 | ||
498 | /* | |
499 | * Find the sync block number or the tail of the log. | |
500 | * | |
501 | * This will be the block number of the last record to have its | |
502 | * associated buffers synced to disk. Every log record header has | |
503 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy | |
5000d01d | 504 | * to get a sync block number. The only concern is to figure out which |
d321ceac NS |
505 | * log record header to believe. |
506 | * | |
507 | * The following algorithm uses the log record header with the largest | |
5000d01d | 508 | * lsn. The entire log record does not need to be valid. We only care |
d321ceac NS |
509 | * that the header is valid. |
510 | * | |
511 | * We could speed up search by using current head_blk buffer, but it is not | |
512 | * available. | |
513 | */ | |
514 | int | |
515 | xlog_find_tail(xlog_t *log, | |
516 | xfs_daddr_t *head_blk, | |
517 | xfs_daddr_t *tail_blk, | |
518 | int readonly) | |
519 | { | |
520 | xlog_rec_header_t *rhead; | |
521 | xlog_op_header_t *op_head; | |
522 | xfs_buf_t *bp; | |
523 | int error, i, found; | |
524 | xfs_daddr_t umount_data_blk; | |
525 | xfs_daddr_t after_umount_blk; | |
526 | xfs_lsn_t tail_lsn; | |
73bf5988 | 527 | int hblks; |
5000d01d | 528 | |
d321ceac NS |
529 | found = error = 0; |
530 | ||
531 | /* | |
5000d01d | 532 | * Find previous log record |
d321ceac NS |
533 | */ |
534 | if ((error = xlog_find_head(log, head_blk))) | |
535 | return error; | |
536 | ||
537 | bp = xlog_get_bp(1,log->l_mp); | |
538 | if (!bp) | |
ce029dc1 | 539 | return ENOMEM; |
d321ceac NS |
540 | if (*head_blk == 0) { /* special case */ |
541 | if ((error = xlog_bread(log, 0, 1, bp))) | |
542 | goto bread_err; | |
543 | if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) == 0) { | |
544 | *tail_blk = 0; | |
545 | /* leave all other log inited values alone */ | |
546 | goto exit; | |
547 | } | |
548 | } | |
549 | ||
550 | /* | |
551 | * Search backwards looking for log record header block | |
552 | */ | |
553 | ASSERT(*head_blk < INT_MAX); | |
554 | for (i=(int)(*head_blk)-1; i>=0; i--) { | |
555 | if ((error = xlog_bread(log, i, 1, bp))) | |
556 | goto bread_err; | |
557 | if (INT_GET(*(uint *)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) { | |
558 | found = 1; | |
559 | break; | |
560 | } | |
561 | } | |
562 | /* | |
563 | * If we haven't found the log record header block, start looking | |
564 | * again from the end of the physical log. XXXmiken: There should be | |
565 | * a check here to make sure we didn't search more than N blocks in | |
566 | * the previous code. | |
567 | */ | |
568 | if (!found) { | |
569 | for (i=log->l_logBBsize-1; i>=(int)(*head_blk); i--) { | |
570 | if ((error = xlog_bread(log, i, 1, bp))) | |
571 | goto bread_err; | |
572 | if (INT_GET(*(uint*)(XFS_BUF_PTR(bp)), ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) { | |
573 | found = 2; | |
574 | break; | |
575 | } | |
576 | } | |
577 | } | |
578 | if (!found) { | |
579 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); | |
580 | ASSERT(0); | |
581 | return XFS_ERROR(EIO); | |
582 | } | |
583 | ||
584 | /* find blk_no of tail of log */ | |
585 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(bp); | |
586 | *tail_blk = BLOCK_LSN(rhead->h_tail_lsn, ARCH_CONVERT); | |
587 | ||
588 | /* | |
589 | * Reset log values according to the state of the log when we | |
590 | * crashed. In the case where head_blk == 0, we bump curr_cycle | |
591 | * one because the next write starts a new cycle rather than | |
592 | * continuing the cycle of the last good log record. At this | |
593 | * point we have guaranteed that all partial log records have been | |
594 | * accounted for. Therefore, we know that the last good log record | |
595 | * written was complete and ended exactly on the end boundary | |
596 | * of the physical log. | |
597 | */ | |
598 | log->l_prev_block = i; | |
599 | log->l_curr_block = (int)*head_blk; | |
600 | log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT); | |
601 | if (found == 2) | |
602 | log->l_curr_cycle++; | |
603 | log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT); | |
604 | log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT); | |
605 | log->l_grant_reserve_cycle = log->l_curr_cycle; | |
606 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | |
607 | log->l_grant_write_cycle = log->l_curr_cycle; | |
608 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | |
609 | ||
610 | /* | |
611 | * Look for unmount record. If we find it, then we know there | |
5000d01d | 612 | * was a clean unmount. Since 'i' could be the last block in |
d321ceac NS |
613 | * the physical log, we convert to a log block before comparing |
614 | * to the head_blk. | |
615 | * | |
616 | * Save the current tail lsn to use to pass to | |
617 | * xlog_clear_stale_blocks() below. We won't want to clear the | |
618 | * unmount record if there is one, so we pass the lsn of the | |
619 | * unmount record rather than the block after it. | |
620 | */ | |
73bf5988 SL |
621 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
622 | int h_size = INT_GET(rhead->h_size, ARCH_CONVERT); | |
623 | int h_version = INT_GET(rhead->h_version, ARCH_CONVERT); | |
5000d01d | 624 | if ((h_version && XLOG_VERSION_2) && |
73bf5988 SL |
625 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
626 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | |
627 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
628 | hblks++; | |
629 | } else { | |
630 | hblks = 1; | |
631 | } | |
632 | } else { | |
633 | hblks = 1; | |
634 | } | |
635 | after_umount_blk = (i + hblks + | |
636 | (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize; | |
d321ceac NS |
637 | tail_lsn = log->l_tail_lsn; |
638 | if (*head_blk == after_umount_blk && INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) { | |
73bf5988 | 639 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
d321ceac NS |
640 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { |
641 | goto bread_err; | |
642 | } | |
643 | op_head = (xlog_op_header_t *)XFS_BUF_PTR(bp); | |
644 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | |
645 | /* | |
646 | * Set tail and last sync so that newly written | |
647 | * log records will point recovery to after the | |
648 | * current unmount record. | |
649 | */ | |
650 | ASSIGN_ANY_LSN(log->l_tail_lsn, log->l_curr_cycle, | |
651 | after_umount_blk, ARCH_NOCONVERT); | |
652 | ASSIGN_ANY_LSN(log->l_last_sync_lsn, log->l_curr_cycle, | |
653 | after_umount_blk, ARCH_NOCONVERT); | |
654 | *tail_blk = after_umount_blk; | |
655 | } | |
656 | } | |
657 | ||
658 | #ifdef __KERNEL__ | |
659 | /* | |
660 | * Make sure that there are no blocks in front of the head | |
661 | * with the same cycle number as the head. This can happen | |
662 | * because we allow multiple outstanding log writes concurrently, | |
663 | * and the later writes might make it out before earlier ones. | |
664 | * | |
665 | * We use the lsn from before modifying it so that we'll never | |
666 | * overwrite the unmount record after a clean unmount. | |
667 | * | |
668 | * Do this only if we are going to recover the filesystem | |
669 | */ | |
670 | if (!readonly) | |
671 | error = xlog_clear_stale_blocks(log, tail_lsn); | |
672 | #endif | |
673 | ||
674 | bread_err: | |
675 | exit: | |
676 | xlog_put_bp(bp); | |
677 | ||
5000d01d SL |
678 | if (error) |
679 | xlog_warn("XFS: failed to locate log tail"); | |
d321ceac NS |
680 | |
681 | return error; | |
682 | } /* xlog_find_tail */ | |
683 | ||
d321ceac NS |
684 | /* |
685 | * Is the log zeroed at all? | |
686 | * | |
687 | * The last binary search should be changed to perform an X block read | |
5000d01d | 688 | * once X becomes small enough. You can then search linearly through |
d321ceac NS |
689 | * the X blocks. This will cut down on the number of reads we need to do. |
690 | * | |
691 | * If the log is partially zeroed, this routine will pass back the blkno | |
692 | * of the first block with cycle number 0. It won't have a complete LR | |
693 | * preceding it. | |
694 | * | |
695 | * Return: | |
696 | * 0 => the log is completely written to | |
697 | * -1 => use *blk_no as the first block of the log | |
698 | * >0 => error has occurred | |
699 | */ | |
700 | int | |
701 | xlog_find_zeroed(struct log *log, | |
5000d01d | 702 | xfs_daddr_t *blk_no) |
d321ceac NS |
703 | { |
704 | xfs_buf_t *bp; | |
5000d01d | 705 | uint first_cycle, last_cycle; |
d321ceac | 706 | xfs_daddr_t new_blk, last_blk, start_blk; |
5000d01d SL |
707 | xfs_daddr_t num_scan_bblks; |
708 | int error, log_bbnum = log->l_logBBsize; | |
d321ceac NS |
709 | |
710 | error = 0; | |
711 | /* check totally zeroed log */ | |
712 | bp = xlog_get_bp(1,log->l_mp); | |
713 | if (!bp) | |
ce029dc1 | 714 | return ENOMEM; |
d321ceac NS |
715 | if ((error = xlog_bread(log, 0, 1, bp))) |
716 | goto bp_err; | |
717 | first_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
718 | if (first_cycle == 0) { /* completely zeroed log */ | |
719 | *blk_no = 0; | |
720 | xlog_put_bp(bp); | |
721 | return -1; | |
722 | } | |
723 | ||
724 | /* check partially zeroed log */ | |
725 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) | |
726 | goto bp_err; | |
727 | last_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); | |
728 | if (last_cycle != 0) { /* log completely written to */ | |
729 | xlog_put_bp(bp); | |
730 | return 0; | |
731 | } else if (first_cycle != 1) { | |
732 | /* | |
733 | * If the cycle of the last block is zero, the cycle of | |
5000d01d SL |
734 | * the first block must be 1. If it's not, maybe we're |
735 | * not looking at a log... Bail out. | |
d321ceac | 736 | */ |
5000d01d | 737 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); |
d321ceac NS |
738 | return XFS_ERROR(EINVAL); |
739 | } | |
5000d01d | 740 | |
d321ceac NS |
741 | /* we have a partially zeroed log */ |
742 | last_blk = log_bbnum-1; | |
743 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) | |
744 | goto bp_err; | |
745 | ||
746 | /* | |
5000d01d | 747 | * Validate the answer. Because there is no way to guarantee that |
d321ceac NS |
748 | * the entire log is made up of log records which are the same size, |
749 | * we scan over the defined maximum blocks. At this point, the maximum | |
750 | * is not chosen to mean anything special. XXXmiken | |
751 | */ | |
73bf5988 | 752 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
d321ceac | 753 | ASSERT(num_scan_bblks <= INT_MAX); |
5000d01d | 754 | |
d321ceac NS |
755 | if (last_blk < num_scan_bblks) |
756 | num_scan_bblks = last_blk; | |
757 | start_blk = last_blk - num_scan_bblks; | |
5000d01d | 758 | |
d321ceac NS |
759 | /* |
760 | * We search for any instances of cycle number 0 that occur before | |
761 | * our current estimate of the head. What we're trying to detect is | |
5000d01d SL |
762 | * 1 ... | 0 | 1 | 0... |
763 | * ^ binary search ends here | |
d321ceac | 764 | */ |
ce029dc1 ES |
765 | if ((error = xlog_find_verify_cycle(log, start_blk, |
766 | (int)num_scan_bblks, 0, &new_blk))) | |
606d804d | 767 | goto bp_err; |
ce029dc1 ES |
768 | if (new_blk != -1) |
769 | last_blk = new_blk; | |
d321ceac NS |
770 | |
771 | /* | |
772 | * Potentially backup over partial log record write. We don't need | |
773 | * to search the end of the log because we know it is zero. | |
774 | */ | |
5000d01d | 775 | if ((error = xlog_find_verify_log_record(log, start_blk, |
79c48ada ES |
776 | &last_blk, 0)) == -1) { |
777 | error = XFS_ERROR(EIO); | |
778 | goto bp_err; | |
779 | } else if (error) | |
d321ceac NS |
780 | goto bp_err; |
781 | ||
782 | *blk_no = last_blk; | |
783 | bp_err: | |
784 | xlog_put_bp(bp); | |
785 | if (error) | |
786 | return error; | |
787 | return -1; | |
788 | } /* xlog_find_zeroed */ | |
789 | ||
790 | /* stuff for transactional view */ | |
791 | STATIC void | |
792 | xlog_unpack_data(xlog_rec_header_t *rhead, | |
793 | xfs_caddr_t dp, | |
794 | xlog_t *log) | |
795 | { | |
73bf5988 SL |
796 | int i, j, k; |
797 | union ich { | |
798 | xlog_rec_header_t hic_header; | |
799 | xlog_rec_ext_header_t hic_xheader; | |
800 | char hic_sector[XLOG_HEADER_SIZE]; | |
801 | } *xhdr; | |
802 | ||
d321ceac NS |
803 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) |
804 | uint *up = (uint *)dp; | |
805 | uint chksum = 0; | |
806 | #endif | |
807 | ||
73bf5988 SL |
808 | for (i=0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && |
809 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | |
5ce1d1f7 | 810 | *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; |
d321ceac NS |
811 | dp += BBSIZE; |
812 | } | |
73bf5988 SL |
813 | |
814 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { | |
815 | xhdr = (union ich*)rhead; | |
816 | for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { | |
817 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | |
818 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | |
819 | *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; | |
820 | dp += BBSIZE; | |
821 | } | |
822 | } | |
823 | ||
d321ceac NS |
824 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) |
825 | /* divide length by 4 to get # words */ | |
826 | for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) { | |
827 | chksum ^= INT_GET(*up, ARCH_CONVERT); | |
828 | up++; | |
829 | } | |
830 | if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) { | |
831 | if (!INT_ISZERO(rhead->h_chksum, ARCH_CONVERT) || | |
832 | ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { | |
833 | cmn_err(CE_DEBUG, | |
5000d01d | 834 | "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)", |
d321ceac NS |
835 | INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum); |
836 | cmn_err(CE_DEBUG, | |
837 | "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); | |
73bf5988 | 838 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { |
5000d01d | 839 | cmn_err(CE_DEBUG, |
73bf5988 SL |
840 | "XFS: LogR this is a LogV2 filesystem\n"); |
841 | } | |
d321ceac NS |
842 | log->l_flags |= XLOG_CHKSUM_MISMATCH; |
843 | } | |
5000d01d | 844 | } |
d321ceac NS |
845 | #endif /* DEBUG && XFS_LOUD_RECOVERY */ |
846 | } /* xlog_unpack_data */ | |
847 | ||
d321ceac NS |
848 | STATIC xlog_recover_t * |
849 | xlog_recover_find_tid(xlog_recover_t *q, | |
850 | xlog_tid_t tid) | |
851 | { | |
852 | xlog_recover_t *p = q; | |
853 | ||
854 | while (p != NULL) { | |
855 | if (p->r_log_tid == tid) | |
856 | break; | |
857 | p = p->r_next; | |
858 | } | |
859 | return p; | |
860 | } /* xlog_recover_find_tid */ | |
861 | ||
862 | STATIC void | |
863 | xlog_recover_put_hashq(xlog_recover_t **q, | |
864 | xlog_recover_t *trans) | |
865 | { | |
866 | trans->r_next = *q; | |
867 | *q = trans; | |
868 | } /* xlog_recover_put_hashq */ | |
869 | ||
870 | STATIC void | |
871 | xlog_recover_new_tid(xlog_recover_t **q, | |
872 | xlog_tid_t tid, | |
873 | xfs_lsn_t lsn) | |
874 | { | |
875 | xlog_recover_t *trans; | |
876 | ||
877 | trans = kmem_zalloc(sizeof(xlog_recover_t), 0); | |
878 | trans->r_log_tid = tid; | |
879 | trans->r_lsn = lsn; | |
880 | xlog_recover_put_hashq(q, trans); | |
881 | } /* xlog_recover_new_tid */ | |
882 | ||
883 | ||
884 | STATIC int | |
885 | xlog_recover_unlink_tid(xlog_recover_t **q, | |
886 | xlog_recover_t *trans) | |
887 | { | |
888 | xlog_recover_t *tp; | |
889 | int found = 0; | |
890 | ||
891 | ASSERT(trans != 0); | |
892 | if (trans == *q) { | |
893 | *q = (*q)->r_next; | |
894 | } else { | |
895 | tp = *q; | |
896 | while (tp != 0) { | |
897 | if (tp->r_next == trans) { | |
898 | found = 1; | |
899 | break; | |
900 | } | |
901 | tp = tp->r_next; | |
902 | } | |
903 | if (!found) { | |
904 | xlog_warn( | |
905 | "XFS: xlog_recover_unlink_tid: trans not found"); | |
906 | ASSERT(0); | |
907 | return XFS_ERROR(EIO); | |
908 | } | |
909 | tp->r_next = tp->r_next->r_next; | |
910 | } | |
911 | return 0; | |
912 | } /* xlog_recover_unlink_tid */ | |
913 | ||
914 | /* | |
915 | * Free up any resources allocated by the transaction | |
916 | * | |
917 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | |
918 | */ | |
919 | STATIC void | |
5000d01d | 920 | xlog_recover_free_trans(xlog_recover_t *trans) |
d321ceac NS |
921 | { |
922 | xlog_recover_item_t *first_item, *item, *free_item; | |
923 | int i; | |
924 | ||
925 | item = first_item = trans->r_itemq; | |
926 | do { | |
927 | free_item = item; | |
928 | item = item->ri_next; | |
929 | /* Free the regions in the item. */ | |
930 | for (i = 0; i < free_item->ri_cnt; i++) { | |
931 | kmem_free(free_item->ri_buf[i].i_addr, | |
932 | free_item->ri_buf[i].i_len); | |
933 | } | |
934 | /* Free the item itself */ | |
935 | kmem_free(free_item->ri_buf, | |
936 | (free_item->ri_total * sizeof(xfs_log_iovec_t))); | |
937 | kmem_free(free_item, sizeof(xlog_recover_item_t)); | |
938 | } while (first_item != item); | |
939 | /* Free the transaction recover structure */ | |
940 | kmem_free(trans, sizeof(xlog_recover_t)); | |
941 | } /* xlog_recover_free_trans */ | |
942 | ||
943 | ||
944 | STATIC int | |
945 | xlog_recover_commit_trans(xlog_t *log, | |
946 | xlog_recover_t **q, | |
947 | xlog_recover_t *trans, | |
948 | int pass) | |
949 | { | |
950 | int error; | |
951 | ||
952 | if ((error = xlog_recover_unlink_tid(q, trans))) | |
953 | return error; | |
954 | if ((error = xlog_recover_do_trans(log, trans, pass))) | |
955 | return error; | |
956 | xlog_recover_free_trans(trans); /* no error */ | |
957 | return 0; | |
958 | } /* xlog_recover_commit_trans */ | |
959 | ||
960 | STATIC void | |
961 | xlog_recover_insert_item_backq(xlog_recover_item_t **q, | |
962 | xlog_recover_item_t *item) | |
963 | { | |
964 | if (*q == 0) { | |
965 | item->ri_prev = item->ri_next = item; | |
966 | *q = item; | |
967 | } else { | |
968 | item->ri_next = *q; | |
969 | item->ri_prev = (*q)->ri_prev; | |
970 | (*q)->ri_prev = item; | |
971 | item->ri_prev->ri_next = item; | |
972 | } | |
973 | } /* xlog_recover_insert_item_backq */ | |
974 | ||
975 | STATIC void | |
976 | xlog_recover_add_item(xlog_recover_item_t **itemq) | |
977 | { | |
978 | xlog_recover_item_t *item; | |
979 | ||
980 | item = kmem_zalloc(sizeof(xlog_recover_item_t), 0); | |
981 | xlog_recover_insert_item_backq(itemq, item); | |
982 | } /* xlog_recover_add_item */ | |
983 | ||
5000d01d | 984 | /* The next region to add is the start of a new region. It could be |
d321ceac NS |
985 | * a whole region or it could be the first part of a new region. Because |
986 | * of this, the assumption here is that the type and size fields of all | |
987 | * format structures fit into the first 32 bits of the structure. | |
988 | * | |
989 | * This works because all regions must be 32 bit aligned. Therefore, we | |
990 | * either have both fields or we have neither field. In the case we have | |
991 | * neither field, the data part of the region is zero length. We only have | |
992 | * a log_op_header and can throw away the header since a new one will appear | |
993 | * later. If we have at least 4 bytes, then we can determine how many regions | |
994 | * will appear in the current log item. | |
995 | */ | |
996 | STATIC int | |
997 | xlog_recover_add_to_trans(xlog_recover_t *trans, | |
998 | xfs_caddr_t dp, | |
999 | int len) | |
1000 | { | |
1001 | xfs_inode_log_format_t *in_f; /* any will do */ | |
1002 | xlog_recover_item_t *item; | |
1003 | xfs_caddr_t ptr; | |
1004 | ||
1005 | if (!len) | |
1006 | return 0; | |
1007 | ptr = kmem_zalloc(len, 0); | |
1008 | bcopy(dp, ptr, len); | |
5000d01d | 1009 | |
d321ceac NS |
1010 | in_f = (xfs_inode_log_format_t *)ptr; |
1011 | item = trans->r_itemq; | |
1012 | if (item == 0) { | |
1013 | ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); | |
1014 | if (len == sizeof(xfs_trans_header_t)) | |
1015 | xlog_recover_add_item(&trans->r_itemq); | |
1016 | bcopy(dp, &trans->r_theader, len); /* s, d, l */ | |
1017 | return 0; | |
1018 | } | |
1019 | if (item->ri_prev->ri_total != 0 && | |
1020 | item->ri_prev->ri_total == item->ri_prev->ri_cnt) { | |
1021 | xlog_recover_add_item(&trans->r_itemq); | |
1022 | } | |
1023 | item = trans->r_itemq; | |
1024 | item = item->ri_prev; | |
1025 | ||
1026 | if (item->ri_total == 0) { /* first region to be added */ | |
1027 | item->ri_total = in_f->ilf_size; | |
1028 | ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); | |
1029 | item->ri_buf = kmem_zalloc((item->ri_total * | |
1030 | sizeof(xfs_log_iovec_t)), 0); | |
1031 | } | |
1032 | ASSERT(item->ri_total > item->ri_cnt); | |
1033 | /* Description region is ri_buf[0] */ | |
1034 | item->ri_buf[item->ri_cnt].i_addr = ptr; | |
1035 | item->ri_buf[item->ri_cnt].i_len = len; | |
1036 | item->ri_cnt++; | |
1037 | return 0; | |
1038 | } /* xlog_recover_add_to_trans */ | |
1039 | ||
1040 | STATIC int | |
1041 | xlog_recover_add_to_cont_trans(xlog_recover_t *trans, | |
1042 | xfs_caddr_t dp, | |
1043 | int len) | |
1044 | { | |
1045 | xlog_recover_item_t *item; | |
1046 | xfs_caddr_t ptr, old_ptr; | |
1047 | int old_len; | |
5000d01d | 1048 | |
d321ceac NS |
1049 | item = trans->r_itemq; |
1050 | if (item == 0) { | |
1051 | /* finish copying rest of trans header */ | |
1052 | xlog_recover_add_item(&trans->r_itemq); | |
1053 | ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len; | |
1054 | bcopy(dp, ptr, len); /* s, d, l */ | |
1055 | return 0; | |
1056 | } | |
1057 | item = item->ri_prev; | |
1058 | ||
1059 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | |
1060 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | |
1061 | ||
5000d01d SL |
1062 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0); |
1063 | bcopy(dp , &ptr[old_len], len); /* s, d, l */ | |
d321ceac NS |
1064 | item->ri_buf[item->ri_cnt-1].i_len += len; |
1065 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | |
1066 | return 0; | |
1067 | } /* xlog_recover_add_to_cont_trans */ | |
1068 | ||
1069 | STATIC int | |
1070 | xlog_recover_unmount_trans(xlog_recover_t *trans) | |
1071 | { | |
1072 | /* Do nothing now */ | |
1073 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); | |
1074 | return( 0 ); | |
1075 | } /* xlog_recover_unmount_trans */ | |
1076 | ||
1077 | ||
1078 | STATIC int | |
1079 | xlog_recover_process_data(xlog_t *log, | |
1080 | xlog_recover_t *rhash[], | |
1081 | xlog_rec_header_t *rhead, | |
1082 | xfs_caddr_t dp, | |
1083 | int pass) | |
1084 | { | |
1085 | xfs_caddr_t lp = dp+INT_GET(rhead->h_len, ARCH_CONVERT); | |
1086 | int num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); | |
1087 | xlog_op_header_t *ohead; | |
1088 | xlog_recover_t *trans; | |
1089 | xlog_tid_t tid; | |
1090 | int error; | |
1091 | unsigned long hash; | |
1092 | uint flags; | |
5000d01d | 1093 | |
d321ceac NS |
1094 | /* check the log format matches our own - else we can't recover */ |
1095 | if (xlog_header_check_recover(log->l_mp, rhead)) | |
1096 | return (XFS_ERROR(EIO)); | |
5000d01d | 1097 | |
73bf5988 | 1098 | while ((dp < lp) && num_logops) { |
d321ceac NS |
1099 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); |
1100 | ohead = (xlog_op_header_t *)dp; | |
1101 | dp += sizeof(xlog_op_header_t); | |
1102 | if (ohead->oh_clientid != XFS_TRANSACTION && | |
1103 | ohead->oh_clientid != XFS_LOG) { | |
1104 | xlog_warn("XFS: xlog_recover_process_data: bad clientid"); | |
1105 | ASSERT(0); | |
1106 | return (XFS_ERROR(EIO)); | |
5000d01d | 1107 | } |
d321ceac NS |
1108 | tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); |
1109 | hash = XLOG_RHASH(tid); | |
1110 | trans = xlog_recover_find_tid(rhash[hash], tid); | |
1111 | if (trans == NULL) { /* not found; add new tid */ | |
1112 | if (ohead->oh_flags & XLOG_START_TRANS) | |
1113 | xlog_recover_new_tid(&rhash[hash], tid, INT_GET(rhead->h_lsn, ARCH_CONVERT)); | |
1114 | } else { | |
1115 | ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); | |
1116 | flags = ohead->oh_flags & ~XLOG_END_TRANS; | |
1117 | if (flags & XLOG_WAS_CONT_TRANS) | |
1118 | flags &= ~XLOG_CONTINUE_TRANS; | |
1119 | switch (flags) { | |
1120 | case XLOG_COMMIT_TRANS: { | |
1121 | error = xlog_recover_commit_trans(log, &rhash[hash], | |
1122 | trans, pass); | |
1123 | break; | |
1124 | } | |
1125 | case XLOG_UNMOUNT_TRANS: { | |
1126 | error = xlog_recover_unmount_trans(trans); | |
1127 | break; | |
1128 | } | |
1129 | case XLOG_WAS_CONT_TRANS: { | |
1130 | error = xlog_recover_add_to_cont_trans(trans, dp, | |
1131 | INT_GET(ohead->oh_len, ARCH_CONVERT)); | |
1132 | break; | |
1133 | } | |
1134 | case XLOG_START_TRANS : { | |
1135 | xlog_warn("XFS: xlog_recover_process_data: bad transaction"); | |
1136 | ASSERT(0); | |
1137 | error = XFS_ERROR(EIO); | |
1138 | break; | |
1139 | } | |
1140 | case 0: | |
1141 | case XLOG_CONTINUE_TRANS: { | |
1142 | error = xlog_recover_add_to_trans(trans, dp, | |
1143 | INT_GET(ohead->oh_len, ARCH_CONVERT)); | |
1144 | break; | |
1145 | } | |
1146 | default: { | |
1147 | xlog_warn("XFS: xlog_recover_process_data: bad flag"); | |
1148 | ASSERT(0); | |
1149 | error = XFS_ERROR(EIO); | |
1150 | break; | |
1151 | } | |
1152 | } /* switch */ | |
1153 | if (error) | |
1154 | return error; | |
1155 | } /* if */ | |
1156 | dp += INT_GET(ohead->oh_len, ARCH_CONVERT); | |
1157 | num_logops--; | |
1158 | } | |
1159 | return( 0 ); | |
1160 | } /* xlog_recover_process_data */ | |
1161 | ||
1162 | /* | |
1163 | * Read the log from tail to head and process the log records found. | |
1164 | * Handle the two cases where the tail and head are in the same cycle | |
1165 | * and where the active portion of the log wraps around the end of | |
5000d01d | 1166 | * the physical log separately. The pass parameter is passed through |
d321ceac NS |
1167 | * to the routines called to process the data and is not looked at |
1168 | * here. | |
1169 | */ | |
1170 | int | |
1171 | xlog_do_recovery_pass(xlog_t *log, | |
1172 | xfs_daddr_t head_blk, | |
1173 | xfs_daddr_t tail_blk, | |
1174 | int pass) | |
1175 | { | |
1176 | xlog_rec_header_t *rhead; | |
1177 | xfs_daddr_t blk_no; | |
1178 | xfs_caddr_t bufaddr; | |
1179 | xfs_buf_t *hbp, *dbp; | |
73bf5988 | 1180 | int error, h_size; |
5000d01d SL |
1181 | int bblks, split_bblks; |
1182 | int hblks, split_hblks, wrapped_hblks; | |
d321ceac NS |
1183 | xlog_recover_t *rhash[XLOG_RHASH_SIZE]; |
1184 | ||
1185 | error = 0; | |
73bf5988 SL |
1186 | |
1187 | ||
1188 | /* | |
1189 | * Read the header of the tail block and get the iclog buffer size from | |
5000d01d | 1190 | * h_size. Use this to tell how many sectors make up the log header. |
73bf5988 SL |
1191 | */ |
1192 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { | |
1193 | /* | |
1194 | * When using variable length iclogs, read first sector of iclog | |
5000d01d | 1195 | * header and extract the header size from it. Get a new hbp that |
73bf5988 SL |
1196 | * is the correct size. |
1197 | */ | |
1198 | hbp = xlog_get_bp(1, log->l_mp); | |
1199 | if (!hbp) | |
1200 | return ENOMEM; | |
1201 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) | |
1202 | goto bread_err1; | |
1203 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); | |
1204 | ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == | |
1205 | XLOG_HEADER_MAGIC_NUM); | |
1206 | if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) { | |
1207 | xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number."); | |
1208 | error = XFS_ERROR(EIO); | |
1209 | goto bread_err1; | |
1210 | } | |
1211 | h_size = INT_GET(rhead->h_size, ARCH_CONVERT); | |
1212 | ||
1213 | if ((INT_GET(rhead->h_version, ARCH_CONVERT) & XLOG_VERSION_2) && | |
1214 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | |
1215 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | |
1216 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | |
1217 | hblks++; | |
1218 | xlog_put_bp(hbp); | |
1219 | hbp = xlog_get_bp(hblks, log->l_mp); | |
1220 | } else { | |
1221 | hblks=1; | |
1222 | } | |
1223 | } else { | |
1224 | hblks=1; | |
1225 | hbp = xlog_get_bp(1, log->l_mp); | |
1226 | h_size = XLOG_BIG_RECORD_BSIZE; | |
1227 | } | |
1228 | ||
d321ceac | 1229 | if (!hbp) |
ce029dc1 | 1230 | return ENOMEM; |
73bf5988 | 1231 | dbp = xlog_get_bp(BTOBB(h_size),log->l_mp); |
d321ceac NS |
1232 | if (!dbp) { |
1233 | xlog_put_bp(hbp); | |
ce029dc1 | 1234 | return ENOMEM; |
d321ceac | 1235 | } |
73bf5988 | 1236 | |
d321ceac NS |
1237 | bzero(rhash, sizeof(rhash)); |
1238 | if (tail_blk <= head_blk) { | |
1239 | for (blk_no = tail_blk; blk_no < head_blk; ) { | |
73bf5988 SL |
1240 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) |
1241 | goto bread_err2; | |
d321ceac NS |
1242 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); |
1243 | ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); | |
1244 | ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); | |
5f651f11 NS |
1245 | bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */ |
1246 | ||
1247 | if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) || | |
1248 | (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) || | |
1249 | (bblks <= 0) || | |
1250 | (blk_no > log->l_logBBsize)) { | |
1251 | error = EFSCORRUPTED; | |
1252 | goto bread_err2; | |
1253 | } | |
1254 | ||
73bf5988 SL |
1255 | if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) { |
1256 | xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number."); | |
1257 | error = XFS_ERROR(EIO); | |
1258 | goto bread_err2; | |
1259 | } | |
d321ceac NS |
1260 | bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */ |
1261 | if (bblks > 0) { | |
73bf5988 SL |
1262 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) |
1263 | goto bread_err2; | |
d321ceac NS |
1264 | xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); |
1265 | if ((error = xlog_recover_process_data(log, rhash, | |
1266 | rhead, XFS_BUF_PTR(dbp), | |
1267 | pass))) | |
73bf5988 | 1268 | goto bread_err2; |
d321ceac | 1269 | } |
73bf5988 | 1270 | blk_no += (bblks+hblks); |
d321ceac NS |
1271 | } |
1272 | } else { | |
1273 | /* | |
5000d01d | 1274 | * Perform recovery around the end of the physical log. When the head |
d321ceac NS |
1275 | * is not on the same cycle number as the tail, we can't do a sequential |
1276 | * recovery as above. | |
1277 | */ | |
1278 | blk_no = tail_blk; | |
1279 | while (blk_no < log->l_logBBsize) { | |
73bf5988 SL |
1280 | /* |
1281 | * Check for header wrapping around physical end-of-log | |
1282 | */ | |
1283 | wrapped_hblks = 0; | |
1284 | if (blk_no+hblks <= log->l_logBBsize) { | |
1285 | /* Read header in one read */ | |
1286 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | |
1287 | goto bread_err2; | |
1288 | } else { | |
1289 | /* This log record is split across physical end of log */ | |
1290 | split_hblks = 0; | |
1291 | if (blk_no != log->l_logBBsize) { | |
1292 | /* some data is before physical end of log */ | |
1293 | ASSERT(blk_no <= INT_MAX); | |
1294 | split_hblks = log->l_logBBsize - (int)blk_no; | |
1295 | ASSERT(split_hblks > 0); | |
1296 | if ((error = xlog_bread(log, blk_no, split_hblks, hbp))) | |
1297 | goto bread_err2; | |
1298 | } | |
1299 | bufaddr = XFS_BUF_PTR(hbp); | |
1300 | XFS_BUF_SET_PTR(hbp, bufaddr + BBTOB(split_hblks), | |
1301 | BBTOB(hblks - split_hblks)); | |
1302 | wrapped_hblks = hblks - split_hblks; | |
1303 | if ((error = xlog_bread(log, 0, wrapped_hblks, hbp))) | |
1304 | goto bread_err2; | |
1305 | XFS_BUF_SET_PTR(hbp, bufaddr, hblks); | |
1306 | } | |
d321ceac NS |
1307 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); |
1308 | ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); | |
5000d01d | 1309 | ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); |
d321ceac NS |
1310 | bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); |
1311 | ||
1312 | /* LR body must have data or it wouldn't have been written */ | |
1313 | ASSERT(bblks > 0); | |
73bf5988 | 1314 | blk_no += hblks; /* successfully read header */ |
d321ceac NS |
1315 | |
1316 | if ((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) || | |
1317 | (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) || | |
73bf5988 | 1318 | (bblks <= 0)) { |
d321ceac | 1319 | error = EFSCORRUPTED; |
73bf5988 | 1320 | goto bread_err2; |
d321ceac | 1321 | } |
5000d01d | 1322 | |
d321ceac NS |
1323 | /* Read in data for log record */ |
1324 | if (blk_no+bblks <= log->l_logBBsize) { | |
1325 | if ((error = xlog_bread(log, blk_no, bblks, dbp))) | |
73bf5988 | 1326 | goto bread_err2; |
d321ceac NS |
1327 | } else { |
1328 | /* This log record is split across physical end of log */ | |
1329 | split_bblks = 0; | |
1330 | if (blk_no != log->l_logBBsize) { | |
1331 | ||
1332 | /* some data is before physical end of log */ | |
1333 | ASSERT(blk_no <= INT_MAX); | |
1334 | split_bblks = log->l_logBBsize - (int)blk_no; | |
1335 | ASSERT(split_bblks > 0); | |
1336 | if ((error = xlog_bread(log, blk_no, split_bblks, dbp))) | |
73bf5988 | 1337 | goto bread_err2; |
d321ceac NS |
1338 | } |
1339 | bufaddr = XFS_BUF_PTR(dbp); | |
1340 | XFS_BUF_SET_PTR(dbp, bufaddr + BBTOB(split_bblks), | |
1341 | BBTOB(bblks - split_bblks)); | |
73bf5988 SL |
1342 | if ((error = xlog_bread(log, wrapped_hblks, |
1343 | bblks - split_bblks, dbp))) | |
1344 | goto bread_err2; | |
1345 | XFS_BUF_SET_PTR(dbp, bufaddr, XLOG_BIG_RECORD_BSIZE); | |
d321ceac NS |
1346 | } |
1347 | xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); | |
1348 | if ((error = xlog_recover_process_data(log, rhash, | |
1349 | rhead, XFS_BUF_PTR(dbp), | |
1350 | pass))) | |
73bf5988 | 1351 | goto bread_err2; |
d321ceac NS |
1352 | blk_no += bblks; |
1353 | } | |
1354 | ||
1355 | ASSERT(blk_no >= log->l_logBBsize); | |
1356 | blk_no -= log->l_logBBsize; | |
1357 | ||
1358 | /* read first part of physical log */ | |
1359 | while (blk_no < head_blk) { | |
73bf5988 SL |
1360 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) |
1361 | goto bread_err2; | |
d321ceac NS |
1362 | rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); |
1363 | ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); | |
1364 | ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); | |
1365 | bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); | |
1366 | ASSERT(bblks > 0); | |
73bf5988 SL |
1367 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) |
1368 | goto bread_err2; | |
d321ceac NS |
1369 | xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); |
1370 | if ((error = xlog_recover_process_data(log, rhash, | |
1371 | rhead, XFS_BUF_PTR(dbp), | |
1372 | pass))) | |
73bf5988 SL |
1373 | goto bread_err2; |
1374 | blk_no += (bblks+hblks); | |
5000d01d | 1375 | } |
d321ceac NS |
1376 | } |
1377 | ||
73bf5988 | 1378 | bread_err2: |
d321ceac | 1379 | xlog_put_bp(dbp); |
73bf5988 | 1380 | bread_err1: |
d321ceac NS |
1381 | xlog_put_bp(hbp); |
1382 | ||
1383 | return error; | |
1384 | } |