2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 * Calculate the worst case log unit reservation for a given superblock
26 * configuration. Copied and munged from the kernel code, and assumes a
27 * worse case header usage (maximum log buffer sizes)
30 xfs_log_calc_unit_res(
35 int iclog_header_size
;
39 if (xfs_sb_version_haslogv2(&mp
->m_sb
)) {
40 iclog_size
= XLOG_MAX_RECORD_BSIZE
;
41 iclog_header_size
= BBTOB(iclog_size
/ XLOG_HEADER_CYCLE_SIZE
);
43 iclog_size
= XLOG_BIG_RECORD_BSIZE
;
44 iclog_header_size
= BBSIZE
;
48 * Permanent reservations have up to 'cnt'-1 active log operations
49 * in the log. A unit in this case is the amount of space for one
50 * of these log operations. Normal reservations have a cnt of 1
51 * and their unit amount is the total amount of space required.
53 * The following lines of code account for non-transaction data
54 * which occupy space in the on-disk log.
56 * Normal form of a transaction is:
57 * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
58 * and then there are LR hdrs, split-recs and roundoff at end of syncs.
60 * We need to account for all the leadup data and trailer data
61 * around the transaction data.
62 * And then we need to account for the worst case in terms of using
64 * The worst case will happen if:
65 * - the placement of the transaction happens to be such that the
66 * roundoff is at its maximum
67 * - the transaction data is synced before the commit record is synced
68 * i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
69 * Therefore the commit record is in its own Log Record.
70 * This can happen as the commit record is called with its
71 * own region to xlog_write().
72 * This then means that in the worst case, roundoff can happen for
73 * the commit-rec as well.
74 * The commit-rec is smaller than padding in this scenario and so it is
75 * not added separately.
78 /* for trans header */
79 unit_bytes
+= sizeof(xlog_op_header_t
);
80 unit_bytes
+= sizeof(xfs_trans_header_t
);
83 unit_bytes
+= sizeof(xlog_op_header_t
);
86 * for LR headers - the space for data in an iclog is the size minus
87 * the space used for the headers. If we use the iclog size, then we
88 * undercalculate the number of headers required.
90 * Furthermore - the addition of op headers for split-recs might
91 * increase the space required enough to require more log and op
92 * headers, so take that into account too.
94 * IMPORTANT: This reservation makes the assumption that if this
95 * transaction is the first in an iclog and hence has the LR headers
96 * accounted to it, then the remaining space in the iclog is
97 * exclusively for this transaction. i.e. if the transaction is larger
98 * than the iclog, it will be the only thing in that iclog.
99 * Fundamentally, this means we must pass the entire log vector to
100 * xlog_write to guarantee this.
102 iclog_space
= iclog_size
- iclog_header_size
;
103 num_headers
= howmany(unit_bytes
, iclog_space
);
105 /* for split-recs - ophdrs added when data split over LRs */
106 unit_bytes
+= sizeof(xlog_op_header_t
) * num_headers
;
108 /* add extra header reservations if we overrun */
109 while (!num_headers
||
110 howmany(unit_bytes
, iclog_space
) > num_headers
) {
111 unit_bytes
+= sizeof(xlog_op_header_t
);
114 unit_bytes
+= iclog_header_size
* num_headers
;
116 /* for commit-rec LR header - note: padding will subsume the ophdr */
117 unit_bytes
+= iclog_header_size
;
119 /* for roundoff padding for transaction data and one for commit record */
120 if (xfs_sb_version_haslogv2(&mp
->m_sb
) && mp
->m_sb
.sb_logsunit
> 1) {
121 /* log su roundoff */
122 unit_bytes
+= 2 * mp
->m_sb
.sb_logsunit
;
125 unit_bytes
+= 2 * BBSIZE
;
132 * Change the requested timestamp in the given inode.
134 * This was once shared with the kernel, but has diverged to the point
135 * where it's no longer worth the hassle of maintaining common code.
138 libxfs_trans_ichgtime(
139 struct xfs_trans
*tp
,
140 struct xfs_inode
*ip
,
146 gettimeofday(&stv
, (struct timezone
*)0);
147 tv
.tv_sec
= stv
.tv_sec
;
148 tv
.tv_nsec
= stv
.tv_usec
* 1000;
149 if (flags
& XFS_ICHGTIME_MOD
) {
150 ip
->i_d
.di_mtime
.t_sec
= (__int32_t
)tv
.tv_sec
;
151 ip
->i_d
.di_mtime
.t_nsec
= (__int32_t
)tv
.tv_nsec
;
153 if (flags
& XFS_ICHGTIME_CHG
) {
154 ip
->i_d
.di_ctime
.t_sec
= (__int32_t
)tv
.tv_sec
;
155 ip
->i_d
.di_ctime
.t_nsec
= (__int32_t
)tv
.tv_nsec
;
157 if (flags
& XFS_ICHGTIME_CREATE
) {
158 ip
->i_d
.di_crtime
.t_sec
= (__int32_t
)tv
.tv_sec
;
159 ip
->i_d
.di_crtime
.t_nsec
= (__int32_t
)tv
.tv_nsec
;
164 * Allocate an inode on disk and return a copy of its in-core version.
165 * Set mode, nlink, and rdev appropriately within the inode.
166 * The uid and gid for the inode are set according to the contents of
167 * the given cred structure.
169 * This was once shared with the kernel, but has diverged to the point
170 * where it's no longer worth the hassle of maintaining common code.
182 xfs_buf_t
**ialloc_context
,
191 * Call the space management code to pick
192 * the on-disk inode to be allocated.
194 error
= xfs_dialloc(tp
, pip
? pip
->i_ino
: 0, mode
, okalloc
,
195 ialloc_context
, &ino
);
198 if (*ialloc_context
|| ino
== NULLFSINO
) {
202 ASSERT(*ialloc_context
== NULL
);
204 error
= xfs_trans_iget(tp
->t_mountp
, tp
, ino
, 0, 0, &ip
);
209 ip
->i_d
.di_mode
= (__uint16_t
)mode
;
210 ip
->i_d
.di_onlink
= 0;
211 ip
->i_d
.di_nlink
= nlink
;
212 ASSERT(ip
->i_d
.di_nlink
== nlink
);
213 ip
->i_d
.di_uid
= cr
->cr_uid
;
214 ip
->i_d
.di_gid
= cr
->cr_gid
;
215 xfs_set_projid(&ip
->i_d
, pip
? 0 : fsx
->fsx_projid
);
216 memset(&(ip
->i_d
.di_pad
[0]), 0, sizeof(ip
->i_d
.di_pad
));
217 xfs_trans_ichgtime(tp
, ip
, XFS_ICHGTIME_CHG
| XFS_ICHGTIME_MOD
);
220 * We only support filesystems that understand v2 format inodes. So if
221 * this is currently an old format inode, then change the inode version
222 * number now. This way we only do the conversion here rather than here
223 * and in the flush/logging code.
225 if (ip
->i_d
.di_version
== 1) {
226 ip
->i_d
.di_version
= 2;
228 * old link count, projid_lo/hi field, pad field
233 if (pip
&& (pip
->i_d
.di_mode
& S_ISGID
)) {
234 ip
->i_d
.di_gid
= pip
->i_d
.di_gid
;
235 if ((pip
->i_d
.di_mode
& S_ISGID
) && (mode
& S_IFMT
) == S_IFDIR
)
236 ip
->i_d
.di_mode
|= S_ISGID
;
240 ip
->i_d
.di_nextents
= 0;
241 ASSERT(ip
->i_d
.di_nblocks
== 0);
243 * di_gen will have been taken care of in xfs_iread.
245 ip
->i_d
.di_extsize
= pip
? 0 : fsx
->fsx_extsize
;
246 ip
->i_d
.di_dmevmask
= 0;
247 ip
->i_d
.di_dmstate
= 0;
248 ip
->i_d
.di_flags
= pip
? 0 : fsx
->fsx_xflags
;
250 if (ip
->i_d
.di_version
== 3) {
251 ASSERT(ip
->i_d
.di_ino
== ino
);
252 ASSERT(uuid_equal(&ip
->i_d
.di_uuid
, &mp
->m_sb
.sb_uuid
));
254 ip
->i_d
.di_changecount
= 1;
256 ip
->i_d
.di_flags2
= 0;
257 memset(&(ip
->i_d
.di_pad2
[0]), 0, sizeof(ip
->i_d
.di_pad2
));
258 ip
->i_d
.di_crtime
= ip
->i_d
.di_mtime
;
261 flags
= XFS_ILOG_CORE
;
262 switch (mode
& S_IFMT
) {
265 /* doesn't make sense to set an rdev for these */
270 ip
->i_d
.di_format
= XFS_DINODE_FMT_DEV
;
271 ip
->i_df
.if_u2
.if_rdev
= rdev
;
272 flags
|= XFS_ILOG_DEV
;
276 if (pip
&& (pip
->i_d
.di_flags
& XFS_DIFLAG_ANY
)) {
279 if ((mode
& S_IFMT
) == S_IFDIR
) {
280 if (pip
->i_d
.di_flags
& XFS_DIFLAG_RTINHERIT
)
281 di_flags
|= XFS_DIFLAG_RTINHERIT
;
282 if (pip
->i_d
.di_flags
& XFS_DIFLAG_EXTSZINHERIT
) {
283 di_flags
|= XFS_DIFLAG_EXTSZINHERIT
;
284 ip
->i_d
.di_extsize
= pip
->i_d
.di_extsize
;
287 if (pip
->i_d
.di_flags
& XFS_DIFLAG_RTINHERIT
) {
288 di_flags
|= XFS_DIFLAG_REALTIME
;
290 if (pip
->i_d
.di_flags
& XFS_DIFLAG_EXTSZINHERIT
) {
291 di_flags
|= XFS_DIFLAG_EXTSIZE
;
292 ip
->i_d
.di_extsize
= pip
->i_d
.di_extsize
;
295 if (pip
->i_d
.di_flags
& XFS_DIFLAG_PROJINHERIT
)
296 di_flags
|= XFS_DIFLAG_PROJINHERIT
;
297 ip
->i_d
.di_flags
|= di_flags
;
301 ip
->i_d
.di_format
= XFS_DINODE_FMT_EXTENTS
;
302 ip
->i_df
.if_flags
= XFS_IFEXTENTS
;
303 ip
->i_df
.if_bytes
= ip
->i_df
.if_real_bytes
= 0;
304 ip
->i_df
.if_u1
.if_extents
= NULL
;
309 /* Attribute fork settings for new inode. */
310 ip
->i_d
.di_aformat
= XFS_DINODE_FMT_EXTENTS
;
311 ip
->i_d
.di_anextents
= 0;
314 * set up the inode ops structure that the libxfs code relies on
316 if (S_ISDIR(ip
->i_d
.di_mode
))
317 ip
->d_ops
= ip
->i_mount
->m_dir_inode_ops
;
319 ip
->d_ops
= ip
->i_mount
->m_nondir_inode_ops
;
322 * Log the new values stuffed into the inode.
324 xfs_trans_log_inode(tp
, ip
, flags
);
334 xfs_bmbt_rec_host_t
*ep
;
336 xfs_extnum_t nextents
;
338 printf("Inode %lx\n", (unsigned long)ip
);
339 printf(" i_ino %llx\n", (unsigned long long)ip
->i_ino
);
341 if (ip
->i_df
.if_flags
& XFS_IFEXTENTS
)
344 printf(" i_df.if_bytes %d\n", ip
->i_df
.if_bytes
);
345 printf(" i_df.if_u1.if_extents/if_data %lx\n",
346 (unsigned long)ip
->i_df
.if_u1
.if_extents
);
347 if (ip
->i_df
.if_flags
& XFS_IFEXTENTS
) {
348 nextents
= ip
->i_df
.if_bytes
/ (uint
)sizeof(*ep
);
349 for (ep
= ip
->i_df
.if_u1
.if_extents
, i
= 0; i
< nextents
;
353 xfs_bmbt_get_all(ep
, &rec
);
354 printf("\t%d: startoff %llu, startblock 0x%llx,"
355 " blockcount %llu, state %d\n",
356 i
, (unsigned long long)rec
.br_startoff
,
357 (unsigned long long)rec
.br_startblock
,
358 (unsigned long long)rec
.br_blockcount
,
362 printf(" i_df.if_broot %lx\n", (unsigned long)ip
->i_df
.if_broot
);
363 printf(" i_df.if_broot_bytes %x\n", ip
->i_df
.if_broot_bytes
);
366 printf("\nOn disk portion\n");
367 printf(" di_magic %x\n", dip
->di_magic
);
368 printf(" di_mode %o\n", dip
->di_mode
);
369 printf(" di_version %x\n", (uint
)dip
->di_version
);
370 switch (ip
->i_d
.di_format
) {
371 case XFS_DINODE_FMT_LOCAL
:
372 printf(" Inline inode\n");
374 case XFS_DINODE_FMT_EXTENTS
:
375 printf(" Extents inode\n");
377 case XFS_DINODE_FMT_BTREE
:
378 printf(" B-tree inode\n");
381 printf(" Other inode\n");
384 printf(" di_nlink %x\n", dip
->di_nlink
);
385 printf(" di_uid %d\n", dip
->di_uid
);
386 printf(" di_gid %d\n", dip
->di_gid
);
387 printf(" di_nextents %d\n", dip
->di_nextents
);
388 printf(" di_size %llu\n", (unsigned long long)dip
->di_size
);
389 printf(" di_gen %x\n", dip
->di_gen
);
390 printf(" di_extsize %d\n", dip
->di_extsize
);
391 printf(" di_flags %x\n", dip
->di_flags
);
392 printf(" di_nblocks %llu\n", (unsigned long long)dip
->di_nblocks
);
396 * Writes a modified inode's changes out to the inode's on disk home.
397 * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
400 libxfs_iflush_int(xfs_inode_t
*ip
, xfs_buf_t
*bp
)
402 xfs_inode_log_item_t
*iip
;
406 ASSERT(XFS_BUF_FSPRIVATE(bp
, void *) != NULL
);
407 ASSERT(ip
->i_d
.di_format
!= XFS_DINODE_FMT_BTREE
||
408 ip
->i_d
.di_nextents
> ip
->i_df
.if_ext_max
);
409 ASSERT(ip
->i_d
.di_version
> 1);
414 /* set *dip = inode's place in the buffer */
415 dip
= (xfs_dinode_t
*)xfs_buf_offset(bp
, ip
->i_imap
.im_boffset
);
417 ASSERT(ip
->i_d
.di_magic
== XFS_DINODE_MAGIC
);
418 if ((ip
->i_d
.di_mode
& S_IFMT
) == S_IFREG
) {
419 ASSERT( (ip
->i_d
.di_format
== XFS_DINODE_FMT_EXTENTS
) ||
420 (ip
->i_d
.di_format
== XFS_DINODE_FMT_BTREE
) );
422 else if ((ip
->i_d
.di_mode
& S_IFMT
) == S_IFDIR
) {
423 ASSERT( (ip
->i_d
.di_format
== XFS_DINODE_FMT_EXTENTS
) ||
424 (ip
->i_d
.di_format
== XFS_DINODE_FMT_BTREE
) ||
425 (ip
->i_d
.di_format
== XFS_DINODE_FMT_LOCAL
) );
427 ASSERT(ip
->i_d
.di_nextents
+ip
->i_d
.di_anextents
<= ip
->i_d
.di_nblocks
);
428 ASSERT(ip
->i_d
.di_forkoff
<= mp
->m_sb
.sb_inodesize
);
430 /* bump the change count on v3 inodes */
431 if (ip
->i_d
.di_version
== 3)
432 ip
->i_d
.di_changecount
++;
435 * Copy the dirty parts of the inode into the on-disk
436 * inode. We always copy out the core of the inode,
437 * because if the inode is dirty at all the core must
440 xfs_dinode_to_disk(dip
, &ip
->i_d
);
442 xfs_iflush_fork(ip
, dip
, iip
, XFS_DATA_FORK
);
444 xfs_iflush_fork(ip
, dip
, iip
, XFS_ATTR_FORK
);
446 /* update the lsn in the on disk inode if required */
447 if (ip
->i_d
.di_version
== 3)
448 dip
->di_lsn
= cpu_to_be64(iip
->ili_item
.li_lsn
);
450 /* generate the checksum. */
451 xfs_dinode_calc_crc(mp
, dip
);
457 * Utility routine common used to apply a delta to a field in the
458 * in-core superblock.
459 * Switch on the field indicated and apply the delta to that field.
460 * Fields are not allowed to dip below zero, so if the delta would
461 * do this do not apply it and return EINVAL.
463 * Originally derived from xfs_mod_incore_sb_unlocked().
466 libxfs_mod_incore_sb(
468 xfs_sb_field_t field
,
472 long long lcounter
; /* long counter for 64 bit fields */
475 case XFS_SBS_FDBLOCKS
:
476 lcounter
= (long long)mp
->m_sb
.sb_fdblocks
;
479 return XFS_ERROR(ENOSPC
);
480 mp
->m_sb
.sb_fdblocks
= lcounter
;
484 return XFS_ERROR(EINVAL
);
491 xfs_bmap_free_t
*flist
,
494 xfs_bmap_free_item_t
*free
; /* free extent list item */
495 xfs_bmap_free_item_t
*next
; /* next item on free list */
498 if (flist
->xbf_count
== 0) {
503 for (free
= flist
->xbf_first
; free
!= NULL
; free
= next
) {
504 next
= free
->xbfi_next
;
505 if ((error
= xfs_free_extent(*tp
, free
->xbfi_startblock
,
506 free
->xbfi_blockcount
)))
508 xfs_bmap_del_free(flist
, NULL
, free
);
515 * This routine allocates disk space for the given file.
516 * Originally derived from xfs_alloc_file_space().
519 libxfs_alloc_file_space(
528 xfs_filblks_t datablocks
;
529 xfs_filblks_t allocated_fsb
;
530 xfs_filblks_t allocatesize_fsb
;
531 xfs_fsblock_t firstfsb
;
532 xfs_bmap_free_t free_list
;
533 xfs_bmbt_irec_t
*imapp
;
534 xfs_bmbt_irec_t imaps
[1];
537 xfs_fileoff_t startoffset_fsb
;
550 xfs_bmapi_flags
= alloc_type
? XFS_BMAPI_PREALLOC
: 0;
552 startoffset_fsb
= XFS_B_TO_FSBT(mp
, offset
);
553 allocatesize_fsb
= XFS_B_TO_FSB(mp
, count
);
555 /* allocate file space until done or until there is an error */
556 while (allocatesize_fsb
&& !error
) {
557 datablocks
= allocatesize_fsb
;
559 tp
= xfs_trans_alloc(mp
, XFS_TRANS_DIOSTRAT
);
560 resblks
= (uint
)XFS_DIOSTRAT_SPACE_RES(mp
, datablocks
);
561 error
= xfs_trans_reserve(tp
, &M_RES(mp
)->tr_write
,
564 * Check for running out of space
568 * Free the transaction structure.
570 ASSERT(error
== ENOSPC
);
571 xfs_trans_cancel(tp
, 0);
574 xfs_trans_ijoin(tp
, ip
, 0);
576 xfs_bmap_init(&free_list
, &firstfsb
);
577 error
= xfs_bmapi_write(tp
, ip
, startoffset_fsb
, allocatesize_fsb
,
578 xfs_bmapi_flags
, &firstfsb
, 0, imapp
,
579 &reccount
, &free_list
);
584 /* complete the transaction */
585 error
= xfs_bmap_finish(&tp
, &free_list
, &committed
);
589 error
= xfs_trans_commit(tp
, 0);
593 allocated_fsb
= imapp
->br_blockcount
;
597 startoffset_fsb
+= allocated_fsb
;
598 allocatesize_fsb
-= allocated_fsb
;
602 error0
: /* Cancel bmap, cancel trans */
603 xfs_bmap_cancel(&free_list
);
604 xfs_trans_cancel(tp
, 0);
609 libxfs_log2_roundup(unsigned int i
)
613 for (rval
= 0; rval
< NBBY
* sizeof(i
); rval
++) {
614 if ((1 << rval
) >= i
)
621 * Wrapper around call to libxfs_ialloc. Takes care of committing and
622 * allocating a new transaction as needed.
624 * Originally there were two copies of this code - one in mkfs, the
625 * other in repair - now there is just the one.
638 xfs_buf_t
*ialloc_context
;
643 ialloc_context
= (xfs_buf_t
*)0;
644 error
= libxfs_ialloc(*tp
, pip
, mode
, nlink
, rdev
, cr
, fsx
,
645 1, &ialloc_context
, &ip
);
650 if (!ialloc_context
&& !ip
) {
652 return XFS_ERROR(ENOSPC
);
655 if (ialloc_context
) {
656 struct xfs_trans_res tres
;
658 xfs_trans_bhold(*tp
, ialloc_context
);
659 tres
.tr_logres
= (*tp
)->t_log_res
;
660 tres
.tr_logcount
= (*tp
)->t_log_count
;
662 ntp
= xfs_trans_dup(*tp
);
663 xfs_trans_commit(*tp
, 0);
665 tres
.tr_logflags
= XFS_TRANS_PERM_LOG_RES
;
666 error
= xfs_trans_reserve(*tp
, &tres
, 0, 0);
668 fprintf(stderr
, _("%s: cannot reserve space: %s\n"),
669 progname
, strerror(error
));
672 xfs_trans_bjoin(*tp
, ialloc_context
);
673 error
= libxfs_ialloc(*tp
, pip
, mode
, nlink
, rdev
, cr
,
674 fsx
, 1, &ialloc_context
, &ip
);
686 * Userspace versions of common diagnostic routines (varargs fun).
689 libxfs_fs_repair_cmn_err(int level
, xfs_mount_t
*mp
, char *fmt
, ...)
694 vfprintf(stderr
, fmt
, ap
);
695 fprintf(stderr
, " This is a bug.\n");
696 fprintf(stderr
, "%s version %s\n", progname
, VERSION
);
697 fprintf(stderr
, "Please capture the filesystem metadata with "
698 "xfs_metadump and\nreport it to xfs@oss.sgi.com.\n");
703 libxfs_fs_cmn_err(int level
, xfs_mount_t
*mp
, char *fmt
, ...)
708 vfprintf(stderr
, fmt
, ap
);
714 cmn_err(int level
, char *fmt
, ...)
719 vfprintf(stderr
, fmt
, ap
);
725 * Warnings specifically for verifier errors. Differentiate CRC vs. invalid
726 * values, and omit the stack trace unless the error level is tuned high.
732 xfs_alert(NULL
, "Metadata %s detected at block 0x%llx/0x%x",
733 bp
->b_error
== EFSBADCRC
? "CRC error" : "corruption",
734 bp
->b_bn
, BBTOB(bp
->b_length
));