]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/xfs_inode_buf.c
libxfs: modify verifiers to differentiate CRC from other errors
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_inode_buf.c
1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <xfs.h>
20
21 /*
22 * Check that none of the inode's in the buffer have a next
23 * unlinked field of 0.
24 */
25 #if defined(DEBUG)
26 void
27 xfs_inobp_check(
28 xfs_mount_t *mp,
29 xfs_buf_t *bp)
30 {
31 int i;
32 int j;
33 xfs_dinode_t *dip;
34
35 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
36
37 for (i = 0; i < j; i++) {
38 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
39 i * mp->m_sb.sb_inodesize);
40 if (!dip->di_next_unlinked) {
41 xfs_alert(mp,
42 "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
43 i, (long long)bp->b_bn);
44 }
45 }
46 }
47 #endif
48
49 /*
50 * If we are doing readahead on an inode buffer, we might be in log recovery
51 * reading an inode allocation buffer that hasn't yet been replayed, and hence
52 * has not had the inode cores stamped into it. Hence for readahead, the buffer
53 * may be potentially invalid.
54 *
55 * If the readahead buffer is invalid, we don't want to mark it with an error,
56 * but we do want to clear the DONE status of the buffer so that a followup read
57 * will re-read it from disk. This will ensure that we don't get an unnecessary
58 * warnings during log recovery and we don't get unnecessary panics on debug
59 * kernels.
60 */
61 static void
62 xfs_inode_buf_verify(
63 struct xfs_buf *bp,
64 bool readahead)
65 {
66 struct xfs_mount *mp = bp->b_target->bt_mount;
67 int i;
68 int ni;
69
70 /*
71 * Validate the magic number and version of every inode in the buffer
72 */
73 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
74 for (i = 0; i < ni; i++) {
75 int di_ok;
76 xfs_dinode_t *dip;
77
78 dip = (struct xfs_dinode *)xfs_buf_offset(bp,
79 (i << mp->m_sb.sb_inodelog));
80 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
81 XFS_DINODE_GOOD_VERSION(dip->di_version);
82 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
83 XFS_ERRTAG_ITOBP_INOTOBP,
84 XFS_RANDOM_ITOBP_INOTOBP))) {
85 if (readahead) {
86 bp->b_flags &= ~XBF_DONE;
87 return;
88 }
89
90 xfs_buf_ioerror(bp, EFSCORRUPTED);
91 xfs_verifier_error(bp);
92 #ifdef DEBUG
93 xfs_alert(mp,
94 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
95 (unsigned long long)bp->b_bn, i,
96 be16_to_cpu(dip->di_magic));
97 #endif
98 }
99 }
100 xfs_inobp_check(mp, bp);
101 }
102
103
104 static void
105 xfs_inode_buf_read_verify(
106 struct xfs_buf *bp)
107 {
108 xfs_inode_buf_verify(bp, false);
109 }
110
111 static void
112 xfs_inode_buf_readahead_verify(
113 struct xfs_buf *bp)
114 {
115 xfs_inode_buf_verify(bp, true);
116 }
117
118 static void
119 xfs_inode_buf_write_verify(
120 struct xfs_buf *bp)
121 {
122 xfs_inode_buf_verify(bp, false);
123 }
124
125 const struct xfs_buf_ops xfs_inode_buf_ops = {
126 .verify_read = xfs_inode_buf_read_verify,
127 .verify_write = xfs_inode_buf_write_verify,
128 };
129
130 const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
131 .verify_read = xfs_inode_buf_readahead_verify,
132 .verify_write = xfs_inode_buf_write_verify,
133 };
134
135
136 /*
137 * This routine is called to map an inode to the buffer containing the on-disk
138 * version of the inode. It returns a pointer to the buffer containing the
139 * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
140 * pointer to the on-disk inode within that buffer.
141 *
142 * If a non-zero error is returned, then the contents of bpp and dipp are
143 * undefined.
144 */
145 int
146 xfs_imap_to_bp(
147 struct xfs_mount *mp,
148 struct xfs_trans *tp,
149 struct xfs_imap *imap,
150 struct xfs_dinode **dipp,
151 struct xfs_buf **bpp,
152 uint buf_flags,
153 uint iget_flags)
154 {
155 struct xfs_buf *bp;
156 int error;
157
158 buf_flags |= XBF_UNMAPPED;
159 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
160 (int)imap->im_len, buf_flags, &bp,
161 &xfs_inode_buf_ops);
162 if (error) {
163 if (error == EAGAIN) {
164 ASSERT(buf_flags & XBF_TRYLOCK);
165 return error;
166 }
167
168 if (error == EFSCORRUPTED &&
169 (iget_flags & XFS_IGET_UNTRUSTED))
170 return XFS_ERROR(EINVAL);
171
172 xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
173 __func__, error);
174 return error;
175 }
176
177 *bpp = bp;
178 *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
179 return 0;
180 }
181
182 void
183 xfs_dinode_from_disk(
184 xfs_icdinode_t *to,
185 xfs_dinode_t *from)
186 {
187 to->di_magic = be16_to_cpu(from->di_magic);
188 to->di_mode = be16_to_cpu(from->di_mode);
189 to->di_version = from ->di_version;
190 to->di_format = from->di_format;
191 to->di_onlink = be16_to_cpu(from->di_onlink);
192 to->di_uid = be32_to_cpu(from->di_uid);
193 to->di_gid = be32_to_cpu(from->di_gid);
194 to->di_nlink = be32_to_cpu(from->di_nlink);
195 to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
196 to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
197 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
198 to->di_flushiter = be16_to_cpu(from->di_flushiter);
199 to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
200 to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
201 to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
202 to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
203 to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
204 to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
205 to->di_size = be64_to_cpu(from->di_size);
206 to->di_nblocks = be64_to_cpu(from->di_nblocks);
207 to->di_extsize = be32_to_cpu(from->di_extsize);
208 to->di_nextents = be32_to_cpu(from->di_nextents);
209 to->di_anextents = be16_to_cpu(from->di_anextents);
210 to->di_forkoff = from->di_forkoff;
211 to->di_aformat = from->di_aformat;
212 to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
213 to->di_dmstate = be16_to_cpu(from->di_dmstate);
214 to->di_flags = be16_to_cpu(from->di_flags);
215 to->di_gen = be32_to_cpu(from->di_gen);
216
217 if (to->di_version == 3) {
218 to->di_changecount = be64_to_cpu(from->di_changecount);
219 to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
220 to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
221 to->di_flags2 = be64_to_cpu(from->di_flags2);
222 to->di_ino = be64_to_cpu(from->di_ino);
223 to->di_lsn = be64_to_cpu(from->di_lsn);
224 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
225 uuid_copy(&to->di_uuid, &from->di_uuid);
226 }
227 }
228
229 void
230 xfs_dinode_to_disk(
231 xfs_dinode_t *to,
232 xfs_icdinode_t *from)
233 {
234 to->di_magic = cpu_to_be16(from->di_magic);
235 to->di_mode = cpu_to_be16(from->di_mode);
236 to->di_version = from ->di_version;
237 to->di_format = from->di_format;
238 to->di_onlink = cpu_to_be16(from->di_onlink);
239 to->di_uid = cpu_to_be32(from->di_uid);
240 to->di_gid = cpu_to_be32(from->di_gid);
241 to->di_nlink = cpu_to_be32(from->di_nlink);
242 to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
243 to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
244 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
245 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
246 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
247 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
248 to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
249 to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
250 to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
251 to->di_size = cpu_to_be64(from->di_size);
252 to->di_nblocks = cpu_to_be64(from->di_nblocks);
253 to->di_extsize = cpu_to_be32(from->di_extsize);
254 to->di_nextents = cpu_to_be32(from->di_nextents);
255 to->di_anextents = cpu_to_be16(from->di_anextents);
256 to->di_forkoff = from->di_forkoff;
257 to->di_aformat = from->di_aformat;
258 to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
259 to->di_dmstate = cpu_to_be16(from->di_dmstate);
260 to->di_flags = cpu_to_be16(from->di_flags);
261 to->di_gen = cpu_to_be32(from->di_gen);
262
263 if (from->di_version == 3) {
264 to->di_changecount = cpu_to_be64(from->di_changecount);
265 to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
266 to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
267 to->di_flags2 = cpu_to_be64(from->di_flags2);
268 to->di_ino = cpu_to_be64(from->di_ino);
269 to->di_lsn = cpu_to_be64(from->di_lsn);
270 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
271 uuid_copy(&to->di_uuid, &from->di_uuid);
272 to->di_flushiter = 0;
273 } else {
274 to->di_flushiter = cpu_to_be16(from->di_flushiter);
275 }
276 }
277
278 bool
279 xfs_dinode_verify(
280 struct xfs_mount *mp,
281 xfs_ino_t ino,
282 struct xfs_dinode *dip)
283 {
284 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
285 return false;
286
287 /* only version 3 or greater inodes are extensively verified here */
288 if (dip->di_version < 3)
289 return true;
290
291 if (!xfs_sb_version_hascrc(&mp->m_sb))
292 return false;
293 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
294 XFS_DINODE_CRC_OFF))
295 return false;
296 if (be64_to_cpu(dip->di_ino) != ino)
297 return false;
298 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
299 return false;
300 return true;
301 }
302
303 void
304 xfs_dinode_calc_crc(
305 struct xfs_mount *mp,
306 struct xfs_dinode *dip)
307 {
308 __uint32_t crc;
309
310 if (dip->di_version < 3)
311 return;
312
313 ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
314 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
315 XFS_DINODE_CRC_OFF);
316 dip->di_crc = xfs_end_cksum(crc);
317 }
318
319 /*
320 * Read the disk inode attributes into the in-core inode structure.
321 */
322 int
323 xfs_iread(
324 xfs_mount_t *mp,
325 xfs_trans_t *tp,
326 xfs_inode_t *ip,
327 uint iget_flags)
328 {
329 xfs_buf_t *bp;
330 xfs_dinode_t *dip;
331 int error;
332
333 /*
334 * Fill in the location information in the in-core inode.
335 */
336 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
337 if (error)
338 return error;
339
340 /*
341 * Get pointers to the on-disk inode and the buffer containing it.
342 */
343 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
344 if (error)
345 return error;
346
347 /* even unallocated inodes are verified */
348 if (!xfs_dinode_verify(mp, ip->i_ino, dip)) {
349 xfs_alert(mp, "%s: validation failed for inode %lld failed",
350 __func__, ip->i_ino);
351
352 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
353 error = XFS_ERROR(EFSCORRUPTED);
354 goto out_brelse;
355 }
356
357 /*
358 * If the on-disk inode is already linked to a directory
359 * entry, copy all of the inode into the in-core inode.
360 * xfs_iformat_fork() handles copying in the inode format
361 * specific information.
362 * Otherwise, just get the truly permanent information.
363 */
364 if (dip->di_mode) {
365 xfs_dinode_from_disk(&ip->i_d, dip);
366 error = xfs_iformat_fork(ip, dip);
367 if (error) {
368 #ifdef DEBUG
369 xfs_alert(mp, "%s: xfs_iformat() returned error %d",
370 __func__, error);
371 #endif /* DEBUG */
372 goto out_brelse;
373 }
374 } else {
375 /*
376 * Partial initialisation of the in-core inode. Just the bits
377 * that xfs_ialloc won't overwrite or relies on being correct.
378 */
379 ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
380 ip->i_d.di_version = dip->di_version;
381 ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
382 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
383
384 if (dip->di_version == 3) {
385 ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
386 uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
387 }
388
389 /*
390 * Make sure to pull in the mode here as well in
391 * case the inode is released without being used.
392 * This ensures that xfs_inactive() will see that
393 * the inode is already free and not try to mess
394 * with the uninitialized part of it.
395 */
396 ip->i_d.di_mode = 0;
397 }
398
399 /*
400 * The inode format changed when we moved the link count and
401 * made it 32 bits long. If this is an old format inode,
402 * convert it in memory to look like a new one. If it gets
403 * flushed to disk we will convert back before flushing or
404 * logging it. We zero out the new projid field and the old link
405 * count field. We'll handle clearing the pad field (the remains
406 * of the old uuid field) when we actually convert the inode to
407 * the new format. We don't change the version number so that we
408 * can distinguish this from a real new format inode.
409 */
410 if (ip->i_d.di_version == 1) {
411 ip->i_d.di_nlink = ip->i_d.di_onlink;
412 ip->i_d.di_onlink = 0;
413 xfs_set_projid(&ip->i_d, 0);
414 }
415
416 ip->i_delayed_blks = 0;
417
418 /*
419 * Mark the buffer containing the inode as something to keep
420 * around for a while. This helps to keep recently accessed
421 * meta-data in-core longer.
422 */
423 xfs_buf_set_ref(bp, XFS_INO_REF);
424
425 /*
426 * Use xfs_trans_brelse() to release the buffer containing the on-disk
427 * inode, because it was acquired with xfs_trans_read_buf() in
428 * xfs_imap_to_bp() above. If tp is NULL, this is just a normal
429 * brelse(). If we're within a transaction, then xfs_trans_brelse()
430 * will only release the buffer if it is not dirty within the
431 * transaction. It will be OK to release the buffer in this case,
432 * because inodes on disk are never destroyed and we will be locking the
433 * new in-core inode before putting it in the cache where other
434 * processes can find it. Thus we don't have to worry about the inode
435 * being changed just because we released the buffer.
436 */
437 out_brelse:
438 xfs_trans_brelse(tp, bp);
439 return error;
440 }