]>
Commit | Line | Data |
---|---|---|
2bd0ea18 | 1 | /* |
5e656dbb | 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
da23017d | 3 | * All Rights Reserved. |
5000d01d | 4 | * |
da23017d NS |
5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | |
2bd0ea18 | 7 | * published by the Free Software Foundation. |
5000d01d | 8 | * |
da23017d NS |
9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
5000d01d | 13 | * |
da23017d NS |
14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
2bd0ea18 NS |
17 | */ |
18 | ||
19 | #include <xfs.h> | |
20 | ||
5e656dbb BN |
21 | kmem_zone_t *xfs_inode_zone; |
22 | ||
a2ceac1f DC |
23 | /* |
24 | * Used in xfs_itruncate_extents(). This is the maximum number of extents | |
25 | * freed from a file in a single transaction. | |
26 | */ | |
27 | #define XFS_ITRUNC_MAX_EXTENTS 2 | |
28 | ||
5e656dbb BN |
29 | /* |
30 | * Check that none of the inode's in the buffer have a next | |
31 | * unlinked field of 0. | |
32 | */ | |
33 | #if defined(DEBUG) | |
2bd0ea18 NS |
34 | void |
35 | xfs_inobp_check( | |
36 | xfs_mount_t *mp, | |
37 | xfs_buf_t *bp) | |
38 | { | |
39 | int i; | |
40 | int j; | |
41 | xfs_dinode_t *dip; | |
42 | ||
43 | j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; | |
44 | ||
45 | for (i = 0; i < j; i++) { | |
46 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | |
47 | i * mp->m_sb.sb_inodesize); | |
46eca962 | 48 | if (!dip->di_next_unlinked) { |
a2ceac1f DC |
49 | xfs_alert(mp, |
50 | "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", | |
2bd0ea18 | 51 | bp); |
46eca962 | 52 | ASSERT(dip->di_next_unlinked); |
2bd0ea18 NS |
53 | } |
54 | } | |
55 | } | |
56 | #endif | |
57 | ||
a2ceac1f DC |
58 | static void |
59 | xfs_inode_buf_verify( | |
60 | struct xfs_buf *bp) | |
5e656dbb | 61 | { |
a2ceac1f | 62 | struct xfs_mount *mp = bp->b_target->bt_mount; |
5e656dbb BN |
63 | int i; |
64 | int ni; | |
5e656dbb BN |
65 | |
66 | /* | |
67 | * Validate the magic number and version of every inode in the buffer | |
5e656dbb | 68 | */ |
a2ceac1f | 69 | ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; |
5e656dbb BN |
70 | for (i = 0; i < ni; i++) { |
71 | int di_ok; | |
72 | xfs_dinode_t *dip; | |
73 | ||
a2ceac1f | 74 | dip = (struct xfs_dinode *)xfs_buf_offset(bp, |
5e656dbb | 75 | (i << mp->m_sb.sb_inodelog)); |
a2ceac1f | 76 | di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && |
56b2de80 | 77 | XFS_DINODE_GOOD_VERSION(dip->di_version); |
5e656dbb BN |
78 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, |
79 | XFS_ERRTAG_ITOBP_INOTOBP, | |
80 | XFS_RANDOM_ITOBP_INOTOBP))) { | |
a2ceac1f DC |
81 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
82 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, | |
83 | mp, dip); | |
5e656dbb | 84 | #ifdef DEBUG |
a2ceac1f DC |
85 | xfs_emerg(mp, |
86 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", | |
87 | (unsigned long long)bp->b_bn, i, | |
56b2de80 | 88 | be16_to_cpu(dip->di_magic)); |
a2ceac1f | 89 | ASSERT(0); |
5e656dbb | 90 | #endif |
5e656dbb BN |
91 | } |
92 | } | |
5e656dbb | 93 | xfs_inobp_check(mp, bp); |
a2ceac1f | 94 | } |
5e656dbb | 95 | |
5e656dbb | 96 | |
a2ceac1f DC |
97 | static void |
98 | xfs_inode_buf_read_verify( | |
99 | struct xfs_buf *bp) | |
100 | { | |
101 | xfs_inode_buf_verify(bp); | |
5e656dbb | 102 | } |
2bd0ea18 | 103 | |
a2ceac1f DC |
104 | static void |
105 | xfs_inode_buf_write_verify( | |
106 | struct xfs_buf *bp) | |
56b2de80 | 107 | { |
a2ceac1f | 108 | xfs_inode_buf_verify(bp); |
56b2de80 DC |
109 | } |
110 | ||
a2ceac1f DC |
111 | const struct xfs_buf_ops xfs_inode_buf_ops = { |
112 | .verify_read = xfs_inode_buf_read_verify, | |
113 | .verify_write = xfs_inode_buf_write_verify, | |
114 | }; | |
115 | ||
56b2de80 | 116 | |
2bd0ea18 | 117 | /* |
a2ceac1f DC |
118 | * This routine is called to map an inode to the buffer containing the on-disk |
119 | * version of the inode. It returns a pointer to the buffer containing the | |
120 | * on-disk inode in the bpp parameter, and in the dipp parameter it returns a | |
121 | * pointer to the on-disk inode within that buffer. | |
2bd0ea18 | 122 | * |
a2ceac1f DC |
123 | * If a non-zero error is returned, then the contents of bpp and dipp are |
124 | * undefined. | |
2bd0ea18 NS |
125 | */ |
126 | int | |
a2ceac1f DC |
127 | xfs_imap_to_bp( |
128 | struct xfs_mount *mp, | |
129 | struct xfs_trans *tp, | |
130 | struct xfs_imap *imap, | |
131 | struct xfs_dinode **dipp, | |
132 | struct xfs_buf **bpp, | |
133 | uint buf_flags, | |
134 | uint iget_flags) | |
2bd0ea18 | 135 | { |
a2ceac1f DC |
136 | struct xfs_buf *bp; |
137 | int error; | |
2bd0ea18 | 138 | |
a2ceac1f DC |
139 | buf_flags |= XBF_UNMAPPED; |
140 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | |
141 | (int)imap->im_len, buf_flags, &bp, | |
142 | &xfs_inode_buf_ops); | |
143 | if (error) { | |
144 | if (error == EAGAIN) { | |
145 | ASSERT(buf_flags & XBF_TRYLOCK); | |
146 | return error; | |
147 | } | |
2bd0ea18 | 148 | |
a2ceac1f DC |
149 | if (error == EFSCORRUPTED && |
150 | (iget_flags & XFS_IGET_UNTRUSTED)) | |
151 | return XFS_ERROR(EINVAL); | |
2bd0ea18 | 152 | |
a2ceac1f DC |
153 | xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", |
154 | __func__, error); | |
155 | return error; | |
2bd0ea18 | 156 | } |
2bd0ea18 | 157 | |
2bd0ea18 | 158 | *bpp = bp; |
a2ceac1f | 159 | *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); |
2bd0ea18 NS |
160 | return 0; |
161 | } | |
162 | ||
5000d01d | 163 | void |
5e656dbb BN |
164 | xfs_dinode_from_disk( |
165 | xfs_icdinode_t *to, | |
56b2de80 | 166 | xfs_dinode_t *from) |
2bd0ea18 | 167 | { |
5e656dbb BN |
168 | to->di_magic = be16_to_cpu(from->di_magic); |
169 | to->di_mode = be16_to_cpu(from->di_mode); | |
170 | to->di_version = from ->di_version; | |
171 | to->di_format = from->di_format; | |
172 | to->di_onlink = be16_to_cpu(from->di_onlink); | |
173 | to->di_uid = be32_to_cpu(from->di_uid); | |
174 | to->di_gid = be32_to_cpu(from->di_gid); | |
175 | to->di_nlink = be32_to_cpu(from->di_nlink); | |
22bc10ed AM |
176 | to->di_projid_lo = be16_to_cpu(from->di_projid_lo); |
177 | to->di_projid_hi = be16_to_cpu(from->di_projid_hi); | |
5e656dbb BN |
178 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
179 | to->di_flushiter = be16_to_cpu(from->di_flushiter); | |
180 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); | |
181 | to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); | |
182 | to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); | |
183 | to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); | |
184 | to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); | |
185 | to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); | |
186 | to->di_size = be64_to_cpu(from->di_size); | |
187 | to->di_nblocks = be64_to_cpu(from->di_nblocks); | |
188 | to->di_extsize = be32_to_cpu(from->di_extsize); | |
189 | to->di_nextents = be32_to_cpu(from->di_nextents); | |
190 | to->di_anextents = be16_to_cpu(from->di_anextents); | |
191 | to->di_forkoff = from->di_forkoff; | |
192 | to->di_aformat = from->di_aformat; | |
193 | to->di_dmevmask = be32_to_cpu(from->di_dmevmask); | |
194 | to->di_dmstate = be16_to_cpu(from->di_dmstate); | |
195 | to->di_flags = be16_to_cpu(from->di_flags); | |
196 | to->di_gen = be32_to_cpu(from->di_gen); | |
41ce5f36 DC |
197 | |
198 | if (to->di_version == 3) { | |
199 | to->di_changecount = be64_to_cpu(from->di_changecount); | |
200 | to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); | |
201 | to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); | |
202 | to->di_flags2 = be64_to_cpu(from->di_flags2); | |
203 | to->di_ino = be64_to_cpu(from->di_ino); | |
204 | to->di_lsn = be64_to_cpu(from->di_lsn); | |
205 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | |
206 | platform_uuid_copy(&to->di_uuid, &from->di_uuid); | |
207 | } | |
5e656dbb BN |
208 | } |
209 | ||
210 | void | |
211 | xfs_dinode_to_disk( | |
56b2de80 | 212 | xfs_dinode_t *to, |
5e656dbb BN |
213 | xfs_icdinode_t *from) |
214 | { | |
215 | to->di_magic = cpu_to_be16(from->di_magic); | |
216 | to->di_mode = cpu_to_be16(from->di_mode); | |
217 | to->di_version = from ->di_version; | |
218 | to->di_format = from->di_format; | |
219 | to->di_onlink = cpu_to_be16(from->di_onlink); | |
220 | to->di_uid = cpu_to_be32(from->di_uid); | |
221 | to->di_gid = cpu_to_be32(from->di_gid); | |
222 | to->di_nlink = cpu_to_be32(from->di_nlink); | |
22bc10ed AM |
223 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); |
224 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); | |
5e656dbb BN |
225 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
226 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | |
227 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); | |
228 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); | |
229 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); | |
230 | to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); | |
231 | to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); | |
232 | to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); | |
233 | to->di_size = cpu_to_be64(from->di_size); | |
234 | to->di_nblocks = cpu_to_be64(from->di_nblocks); | |
235 | to->di_extsize = cpu_to_be32(from->di_extsize); | |
236 | to->di_nextents = cpu_to_be32(from->di_nextents); | |
237 | to->di_anextents = cpu_to_be16(from->di_anextents); | |
238 | to->di_forkoff = from->di_forkoff; | |
239 | to->di_aformat = from->di_aformat; | |
240 | to->di_dmevmask = cpu_to_be32(from->di_dmevmask); | |
241 | to->di_dmstate = cpu_to_be16(from->di_dmstate); | |
242 | to->di_flags = cpu_to_be16(from->di_flags); | |
243 | to->di_gen = cpu_to_be32(from->di_gen); | |
41ce5f36 DC |
244 | |
245 | if (from->di_version == 3) { | |
246 | to->di_changecount = cpu_to_be64(from->di_changecount); | |
247 | to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); | |
248 | to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); | |
249 | to->di_flags2 = cpu_to_be64(from->di_flags2); | |
250 | to->di_ino = cpu_to_be64(from->di_ino); | |
251 | to->di_lsn = cpu_to_be64(from->di_lsn); | |
252 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | |
253 | platform_uuid_copy(&to->di_uuid, &from->di_uuid); | |
254 | } | |
255 | } | |
256 | ||
257 | static bool | |
258 | xfs_dinode_verify( | |
259 | struct xfs_mount *mp, | |
260 | struct xfs_inode *ip, | |
261 | struct xfs_dinode *dip) | |
262 | { | |
263 | if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) | |
264 | return false; | |
265 | ||
266 | /* only version 3 or greater inodes are extensively verified here */ | |
267 | if (dip->di_version < 3) | |
268 | return true; | |
269 | ||
270 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | |
271 | return false; | |
272 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, | |
273 | offsetof(struct xfs_dinode, di_crc))) | |
274 | return false; | |
275 | if (be64_to_cpu(dip->di_ino) != ip->i_ino) | |
276 | return false; | |
277 | if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid)) | |
278 | return false; | |
279 | return true; | |
280 | } | |
41ce5f36 DC |
281 | void |
282 | xfs_dinode_calc_crc( | |
283 | struct xfs_mount *mp, | |
284 | struct xfs_dinode *dip) | |
285 | { | |
286 | __uint32_t crc; | |
287 | ||
288 | if (dip->di_version < 3) | |
289 | return; | |
290 | ||
291 | ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); | |
292 | crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, | |
293 | offsetof(struct xfs_dinode, di_crc)); | |
294 | dip->di_crc = xfs_end_cksum(crc); | |
2bd0ea18 NS |
295 | } |
296 | ||
a2ceac1f DC |
297 | /* |
298 | * Read the disk inode attributes into the in-core inode structure. | |
299 | */ | |
300 | int | |
301 | xfs_iread( | |
302 | xfs_mount_t *mp, | |
303 | xfs_trans_t *tp, | |
304 | xfs_inode_t *ip, | |
305 | uint iget_flags) | |
306 | { | |
307 | xfs_buf_t *bp; | |
308 | xfs_dinode_t *dip; | |
309 | int error; | |
310 | ||
311 | /* | |
312 | * Fill in the location information in the in-core inode. | |
313 | */ | |
314 | error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); | |
315 | if (error) | |
316 | return error; | |
317 | ||
318 | /* | |
319 | * Get pointers to the on-disk inode and the buffer containing it. | |
320 | */ | |
321 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); | |
322 | if (error) | |
323 | return error; | |
324 | ||
41ce5f36 DC |
325 | /* even unallocated inodes are verified */ |
326 | if (!xfs_dinode_verify(mp, ip, dip)) { | |
327 | xfs_alert(mp, "%s: validation failed for inode %lld failed", | |
328 | __func__, ip->i_ino); | |
329 | ||
330 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); | |
331 | error = XFS_ERROR(EFSCORRUPTED); | |
a2ceac1f DC |
332 | goto out_brelse; |
333 | } | |
334 | ||
335 | /* | |
336 | * If the on-disk inode is already linked to a directory | |
337 | * entry, copy all of the inode into the in-core inode. | |
5d90ab5a | 338 | * xfs_iformat_fork() handles copying in the inode format |
a2ceac1f DC |
339 | * specific information. |
340 | * Otherwise, just get the truly permanent information. | |
341 | */ | |
342 | if (dip->di_mode) { | |
343 | xfs_dinode_from_disk(&ip->i_d, dip); | |
5d90ab5a | 344 | error = xfs_iformat_fork(ip, dip); |
a2ceac1f DC |
345 | if (error) { |
346 | #ifdef DEBUG | |
347 | xfs_alert(mp, "%s: xfs_iformat() returned error %d", | |
348 | __func__, error); | |
349 | #endif /* DEBUG */ | |
350 | goto out_brelse; | |
351 | } | |
352 | } else { | |
41ce5f36 DC |
353 | /* |
354 | * Partial initialisation of the in-core inode. Just the bits | |
355 | * that xfs_ialloc won't overwrite or relies on being correct. | |
356 | */ | |
a2ceac1f DC |
357 | ip->i_d.di_magic = be16_to_cpu(dip->di_magic); |
358 | ip->i_d.di_version = dip->di_version; | |
359 | ip->i_d.di_gen = be32_to_cpu(dip->di_gen); | |
360 | ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); | |
41ce5f36 DC |
361 | |
362 | if (dip->di_version == 3) { | |
363 | ip->i_d.di_ino = be64_to_cpu(dip->di_ino); | |
364 | uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); | |
365 | } | |
366 | ||
a2ceac1f DC |
367 | /* |
368 | * Make sure to pull in the mode here as well in | |
369 | * case the inode is released without being used. | |
370 | * This ensures that xfs_inactive() will see that | |
371 | * the inode is already free and not try to mess | |
372 | * with the uninitialized part of it. | |
373 | */ | |
374 | ip->i_d.di_mode = 0; | |
375 | } | |
376 | ||
377 | /* | |
378 | * The inode format changed when we moved the link count and | |
379 | * made it 32 bits long. If this is an old format inode, | |
380 | * convert it in memory to look like a new one. If it gets | |
381 | * flushed to disk we will convert back before flushing or | |
382 | * logging it. We zero out the new projid field and the old link | |
383 | * count field. We'll handle clearing the pad field (the remains | |
384 | * of the old uuid field) when we actually convert the inode to | |
385 | * the new format. We don't change the version number so that we | |
386 | * can distinguish this from a real new format inode. | |
387 | */ | |
388 | if (ip->i_d.di_version == 1) { | |
389 | ip->i_d.di_nlink = ip->i_d.di_onlink; | |
390 | ip->i_d.di_onlink = 0; | |
391 | xfs_set_projid(&ip->i_d, 0); | |
392 | } | |
393 | ||
394 | ip->i_delayed_blks = 0; | |
395 | ||
396 | /* | |
397 | * Mark the buffer containing the inode as something to keep | |
398 | * around for a while. This helps to keep recently accessed | |
399 | * meta-data in-core longer. | |
400 | */ | |
401 | xfs_buf_set_ref(bp, XFS_INO_REF); | |
402 | ||
403 | /* | |
404 | * Use xfs_trans_brelse() to release the buffer containing the | |
405 | * on-disk inode, because it was acquired with xfs_trans_read_buf() | |
406 | * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal | |
407 | * brelse(). If we're within a transaction, then xfs_trans_brelse() | |
408 | * will only release the buffer if it is not dirty within the | |
409 | * transaction. It will be OK to release the buffer in this case, | |
410 | * because inodes on disk are never destroyed and we will be | |
411 | * locking the new in-core inode before putting it in the hash | |
412 | * table where other processes can find it. Thus we don't have | |
413 | * to worry about the inode being changed just because we released | |
414 | * the buffer. | |
415 | */ | |
416 | out_brelse: | |
417 | xfs_trans_brelse(tp, bp); | |
418 | return error; | |
419 | } |