2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 * Calculate the worst case log unit reservation for a given superblock
26 * configuration. Copied and munged from the kernel code, and assumes a
27 * worse case header usage (maximum log buffer sizes)
30 xfs_log_calc_unit_res(
35 int iclog_header_size
;
39 if (xfs_sb_version_haslogv2(&mp
->m_sb
)) {
40 iclog_size
= XLOG_MAX_RECORD_BSIZE
;
41 iclog_header_size
= BBTOB(iclog_size
/ XLOG_HEADER_CYCLE_SIZE
);
43 iclog_size
= XLOG_BIG_RECORD_BSIZE
;
44 iclog_header_size
= BBSIZE
;
48 * Permanent reservations have up to 'cnt'-1 active log operations
49 * in the log. A unit in this case is the amount of space for one
50 * of these log operations. Normal reservations have a cnt of 1
51 * and their unit amount is the total amount of space required.
53 * The following lines of code account for non-transaction data
54 * which occupy space in the on-disk log.
56 * Normal form of a transaction is:
57 * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
58 * and then there are LR hdrs, split-recs and roundoff at end of syncs.
60 * We need to account for all the leadup data and trailer data
61 * around the transaction data.
62 * And then we need to account for the worst case in terms of using
64 * The worst case will happen if:
65 * - the placement of the transaction happens to be such that the
66 * roundoff is at its maximum
67 * - the transaction data is synced before the commit record is synced
68 * i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
69 * Therefore the commit record is in its own Log Record.
70 * This can happen as the commit record is called with its
71 * own region to xlog_write().
72 * This then means that in the worst case, roundoff can happen for
73 * the commit-rec as well.
74 * The commit-rec is smaller than padding in this scenario and so it is
75 * not added separately.
78 /* for trans header */
79 unit_bytes
+= sizeof(xlog_op_header_t
);
80 unit_bytes
+= sizeof(xfs_trans_header_t
);
83 unit_bytes
+= sizeof(xlog_op_header_t
);
86 * for LR headers - the space for data in an iclog is the size minus
87 * the space used for the headers. If we use the iclog size, then we
88 * undercalculate the number of headers required.
90 * Furthermore - the addition of op headers for split-recs might
91 * increase the space required enough to require more log and op
92 * headers, so take that into account too.
94 * IMPORTANT: This reservation makes the assumption that if this
95 * transaction is the first in an iclog and hence has the LR headers
96 * accounted to it, then the remaining space in the iclog is
97 * exclusively for this transaction. i.e. if the transaction is larger
98 * than the iclog, it will be the only thing in that iclog.
99 * Fundamentally, this means we must pass the entire log vector to
100 * xlog_write to guarantee this.
102 iclog_space
= iclog_size
- iclog_header_size
;
103 num_headers
= howmany(unit_bytes
, iclog_space
);
105 /* for split-recs - ophdrs added when data split over LRs */
106 unit_bytes
+= sizeof(xlog_op_header_t
) * num_headers
;
108 /* add extra header reservations if we overrun */
109 while (!num_headers
||
110 howmany(unit_bytes
, iclog_space
) > num_headers
) {
111 unit_bytes
+= sizeof(xlog_op_header_t
);
114 unit_bytes
+= iclog_header_size
* num_headers
;
116 /* for commit-rec LR header - note: padding will subsume the ophdr */
117 unit_bytes
+= iclog_header_size
;
119 /* for roundoff padding for transaction data and one for commit record */
120 if (xfs_sb_version_haslogv2(&mp
->m_sb
) && mp
->m_sb
.sb_logsunit
> 1) {
121 /* log su roundoff */
122 unit_bytes
+= 2 * mp
->m_sb
.sb_logsunit
;
125 unit_bytes
+= 2 * BBSIZE
;
132 * Change the requested timestamp in the given inode.
134 * This was once shared with the kernel, but has diverged to the point
135 * where it's no longer worth the hassle of maintaining common code.
138 libxfs_trans_ichgtime(
139 struct xfs_trans
*tp
,
140 struct xfs_inode
*ip
,
146 gettimeofday(&stv
, (struct timezone
*)0);
147 tv
.tv_sec
= stv
.tv_sec
;
148 tv
.tv_nsec
= stv
.tv_usec
* 1000;
149 if (flags
& XFS_ICHGTIME_MOD
) {
150 ip
->i_d
.di_mtime
.t_sec
= (__int32_t
)tv
.tv_sec
;
151 ip
->i_d
.di_mtime
.t_nsec
= (__int32_t
)tv
.tv_nsec
;
153 if (flags
& XFS_ICHGTIME_CHG
) {
154 ip
->i_d
.di_ctime
.t_sec
= (__int32_t
)tv
.tv_sec
;
155 ip
->i_d
.di_ctime
.t_nsec
= (__int32_t
)tv
.tv_nsec
;
157 if (flags
& XFS_ICHGTIME_CREATE
) {
158 ip
->i_d
.di_crtime
.t_sec
= (__int32_t
)tv
.tv_sec
;
159 ip
->i_d
.di_crtime
.t_nsec
= (__int32_t
)tv
.tv_nsec
;
164 * Allocate an inode on disk and return a copy of its in-core version.
165 * Set mode, nlink, and rdev appropriately within the inode.
166 * The uid and gid for the inode are set according to the contents of
167 * the given cred structure.
169 * This was once shared with the kernel, but has diverged to the point
170 * where it's no longer worth the hassle of maintaining common code.
182 xfs_buf_t
**ialloc_context
,
191 * Call the space management code to pick
192 * the on-disk inode to be allocated.
194 error
= xfs_dialloc(tp
, pip
? pip
->i_ino
: 0, mode
, okalloc
,
195 ialloc_context
, &ino
);
198 if (*ialloc_context
|| ino
== NULLFSINO
) {
202 ASSERT(*ialloc_context
== NULL
);
204 error
= xfs_trans_iget(tp
->t_mountp
, tp
, ino
, 0, 0, &ip
);
209 ip
->i_d
.di_mode
= (__uint16_t
)mode
;
210 ip
->i_d
.di_onlink
= 0;
211 ip
->i_d
.di_nlink
= nlink
;
212 ASSERT(ip
->i_d
.di_nlink
== nlink
);
213 ip
->i_d
.di_uid
= cr
->cr_uid
;
214 ip
->i_d
.di_gid
= cr
->cr_gid
;
215 xfs_set_projid(&ip
->i_d
, pip
? 0 : fsx
->fsx_projid
);
216 memset(&(ip
->i_d
.di_pad
[0]), 0, sizeof(ip
->i_d
.di_pad
));
217 xfs_trans_ichgtime(tp
, ip
, XFS_ICHGTIME_CHG
| XFS_ICHGTIME_MOD
);
220 * If the superblock version is up to where we support new format
221 * inodes and this is currently an old format inode, then change
222 * the inode version number now. This way we only do the conversion
223 * here rather than here and in the flush/logging code.
225 if (xfs_sb_version_hasnlink(&tp
->t_mountp
->m_sb
) &&
226 ip
->i_d
.di_version
== 1) {
227 ip
->i_d
.di_version
= 2;
229 * old link count, projid_lo/hi field, pad field
234 if (pip
&& (pip
->i_d
.di_mode
& S_ISGID
)) {
235 ip
->i_d
.di_gid
= pip
->i_d
.di_gid
;
236 if ((pip
->i_d
.di_mode
& S_ISGID
) && (mode
& S_IFMT
) == S_IFDIR
)
237 ip
->i_d
.di_mode
|= S_ISGID
;
241 ip
->i_d
.di_nextents
= 0;
242 ASSERT(ip
->i_d
.di_nblocks
== 0);
244 * di_gen will have been taken care of in xfs_iread.
246 ip
->i_d
.di_extsize
= pip
? 0 : fsx
->fsx_extsize
;
247 ip
->i_d
.di_dmevmask
= 0;
248 ip
->i_d
.di_dmstate
= 0;
249 ip
->i_d
.di_flags
= pip
? 0 : fsx
->fsx_xflags
;
251 if (ip
->i_d
.di_version
== 3) {
252 ASSERT(ip
->i_d
.di_ino
== ino
);
253 ASSERT(uuid_equal(&ip
->i_d
.di_uuid
, &mp
->m_sb
.sb_uuid
));
255 ip
->i_d
.di_changecount
= 1;
257 ip
->i_d
.di_flags2
= 0;
258 memset(&(ip
->i_d
.di_pad2
[0]), 0, sizeof(ip
->i_d
.di_pad2
));
259 ip
->i_d
.di_crtime
= ip
->i_d
.di_mtime
;
262 flags
= XFS_ILOG_CORE
;
263 switch (mode
& S_IFMT
) {
266 /* doesn't make sense to set an rdev for these */
271 ip
->i_d
.di_format
= XFS_DINODE_FMT_DEV
;
272 ip
->i_df
.if_u2
.if_rdev
= rdev
;
273 flags
|= XFS_ILOG_DEV
;
277 if (pip
&& (pip
->i_d
.di_flags
& XFS_DIFLAG_ANY
)) {
280 if ((mode
& S_IFMT
) == S_IFDIR
) {
281 if (pip
->i_d
.di_flags
& XFS_DIFLAG_RTINHERIT
)
282 di_flags
|= XFS_DIFLAG_RTINHERIT
;
283 if (pip
->i_d
.di_flags
& XFS_DIFLAG_EXTSZINHERIT
) {
284 di_flags
|= XFS_DIFLAG_EXTSZINHERIT
;
285 ip
->i_d
.di_extsize
= pip
->i_d
.di_extsize
;
288 if (pip
->i_d
.di_flags
& XFS_DIFLAG_RTINHERIT
) {
289 di_flags
|= XFS_DIFLAG_REALTIME
;
291 if (pip
->i_d
.di_flags
& XFS_DIFLAG_EXTSZINHERIT
) {
292 di_flags
|= XFS_DIFLAG_EXTSIZE
;
293 ip
->i_d
.di_extsize
= pip
->i_d
.di_extsize
;
296 if (pip
->i_d
.di_flags
& XFS_DIFLAG_PROJINHERIT
)
297 di_flags
|= XFS_DIFLAG_PROJINHERIT
;
298 ip
->i_d
.di_flags
|= di_flags
;
302 ip
->i_d
.di_format
= XFS_DINODE_FMT_EXTENTS
;
303 ip
->i_df
.if_flags
= XFS_IFEXTENTS
;
304 ip
->i_df
.if_bytes
= ip
->i_df
.if_real_bytes
= 0;
305 ip
->i_df
.if_u1
.if_extents
= NULL
;
310 /* Attribute fork settings for new inode. */
311 ip
->i_d
.di_aformat
= XFS_DINODE_FMT_EXTENTS
;
312 ip
->i_d
.di_anextents
= 0;
315 * Log the new values stuffed into the inode.
317 xfs_trans_log_inode(tp
, ip
, flags
);
327 xfs_bmbt_rec_host_t
*ep
;
329 xfs_extnum_t nextents
;
331 printf("Inode %lx\n", (unsigned long)ip
);
332 printf(" i_ino %llx\n", (unsigned long long)ip
->i_ino
);
334 if (ip
->i_df
.if_flags
& XFS_IFEXTENTS
)
337 printf(" i_df.if_bytes %d\n", ip
->i_df
.if_bytes
);
338 printf(" i_df.if_u1.if_extents/if_data %lx\n",
339 (unsigned long)ip
->i_df
.if_u1
.if_extents
);
340 if (ip
->i_df
.if_flags
& XFS_IFEXTENTS
) {
341 nextents
= ip
->i_df
.if_bytes
/ (uint
)sizeof(*ep
);
342 for (ep
= ip
->i_df
.if_u1
.if_extents
, i
= 0; i
< nextents
;
346 xfs_bmbt_get_all(ep
, &rec
);
347 printf("\t%d: startoff %llu, startblock 0x%llx,"
348 " blockcount %llu, state %d\n",
349 i
, (unsigned long long)rec
.br_startoff
,
350 (unsigned long long)rec
.br_startblock
,
351 (unsigned long long)rec
.br_blockcount
,
355 printf(" i_df.if_broot %lx\n", (unsigned long)ip
->i_df
.if_broot
);
356 printf(" i_df.if_broot_bytes %x\n", ip
->i_df
.if_broot_bytes
);
359 printf("\nOn disk portion\n");
360 printf(" di_magic %x\n", dip
->di_magic
);
361 printf(" di_mode %o\n", dip
->di_mode
);
362 printf(" di_version %x\n", (uint
)dip
->di_version
);
363 switch (ip
->i_d
.di_format
) {
364 case XFS_DINODE_FMT_LOCAL
:
365 printf(" Inline inode\n");
367 case XFS_DINODE_FMT_EXTENTS
:
368 printf(" Extents inode\n");
370 case XFS_DINODE_FMT_BTREE
:
371 printf(" B-tree inode\n");
374 printf(" Other inode\n");
377 printf(" di_nlink %x\n", dip
->di_nlink
);
378 printf(" di_uid %d\n", dip
->di_uid
);
379 printf(" di_gid %d\n", dip
->di_gid
);
380 printf(" di_nextents %d\n", dip
->di_nextents
);
381 printf(" di_size %llu\n", (unsigned long long)dip
->di_size
);
382 printf(" di_gen %x\n", dip
->di_gen
);
383 printf(" di_extsize %d\n", dip
->di_extsize
);
384 printf(" di_flags %x\n", dip
->di_flags
);
385 printf(" di_nblocks %llu\n", (unsigned long long)dip
->di_nblocks
);
389 * Writes a modified inode's changes out to the inode's on disk home.
390 * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
393 libxfs_iflush_int(xfs_inode_t
*ip
, xfs_buf_t
*bp
)
395 xfs_inode_log_item_t
*iip
;
399 ASSERT(XFS_BUF_FSPRIVATE(bp
, void *) != NULL
);
400 ASSERT(ip
->i_d
.di_format
!= XFS_DINODE_FMT_BTREE
||
401 ip
->i_d
.di_nextents
> ip
->i_df
.if_ext_max
);
406 /* set *dip = inode's place in the buffer */
407 dip
= (xfs_dinode_t
*)xfs_buf_offset(bp
, ip
->i_imap
.im_boffset
);
409 ASSERT(ip
->i_d
.di_magic
== XFS_DINODE_MAGIC
);
410 if ((ip
->i_d
.di_mode
& S_IFMT
) == S_IFREG
) {
411 ASSERT( (ip
->i_d
.di_format
== XFS_DINODE_FMT_EXTENTS
) ||
412 (ip
->i_d
.di_format
== XFS_DINODE_FMT_BTREE
) );
414 else if ((ip
->i_d
.di_mode
& S_IFMT
) == S_IFDIR
) {
415 ASSERT( (ip
->i_d
.di_format
== XFS_DINODE_FMT_EXTENTS
) ||
416 (ip
->i_d
.di_format
== XFS_DINODE_FMT_BTREE
) ||
417 (ip
->i_d
.di_format
== XFS_DINODE_FMT_LOCAL
) );
419 ASSERT(ip
->i_d
.di_nextents
+ip
->i_d
.di_anextents
<= ip
->i_d
.di_nblocks
);
420 ASSERT(ip
->i_d
.di_forkoff
<= mp
->m_sb
.sb_inodesize
);
422 /* bump the change count on v3 inodes */
423 if (ip
->i_d
.di_version
== 3)
424 ip
->i_d
.di_changecount
++;
427 * Copy the dirty parts of the inode into the on-disk
428 * inode. We always copy out the core of the inode,
429 * because if the inode is dirty at all the core must
432 xfs_dinode_to_disk(dip
, &ip
->i_d
);
435 * If this is really an old format inode and the superblock version
436 * has not been updated to support only new format inodes, then
437 * convert back to the old inode format. If the superblock version
438 * has been updated, then make the conversion permanent.
440 ASSERT(ip
->i_d
.di_version
== 1 ||
441 xfs_sb_version_hasnlink(&mp
->m_sb
));
442 if (ip
->i_d
.di_version
== 1) {
443 if (!xfs_sb_version_hasnlink(&mp
->m_sb
)) {
447 ASSERT(ip
->i_d
.di_nlink
<= XFS_MAXLINK_1
);
448 dip
->di_onlink
= cpu_to_be16(ip
->i_d
.di_nlink
);
451 * The superblock version has already been bumped,
452 * so just make the conversion to the new inode
455 ip
->i_d
.di_version
= 2;
457 ip
->i_d
.di_onlink
= 0;
459 memset(&(ip
->i_d
.di_pad
[0]), 0, sizeof(ip
->i_d
.di_pad
));
460 memset(&(dip
->di_pad
[0]), 0, sizeof(dip
->di_pad
));
461 ASSERT(xfs_get_projid(&ip
->i_d
) == 0);
465 xfs_iflush_fork(ip
, dip
, iip
, XFS_DATA_FORK
, bp
);
467 xfs_iflush_fork(ip
, dip
, iip
, XFS_ATTR_FORK
, bp
);
469 /* update the lsn in the on disk inode if required */
470 if (ip
->i_d
.di_version
== 3)
471 dip
->di_lsn
= cpu_to_be64(iip
->ili_item
.li_lsn
);
473 /* generate the checksum. */
474 xfs_dinode_calc_crc(mp
, dip
);
480 * Utility routine common used to apply a delta to a field in the
481 * in-core superblock.
482 * Switch on the field indicated and apply the delta to that field.
483 * Fields are not allowed to dip below zero, so if the delta would
484 * do this do not apply it and return EINVAL.
486 * Originally derived from xfs_mod_incore_sb_unlocked().
489 libxfs_mod_incore_sb(
491 xfs_sb_field_t field
,
495 long long lcounter
; /* long counter for 64 bit fields */
498 case XFS_SBS_FDBLOCKS
:
499 lcounter
= (long long)mp
->m_sb
.sb_fdblocks
;
502 return XFS_ERROR(ENOSPC
);
503 mp
->m_sb
.sb_fdblocks
= lcounter
;
507 return XFS_ERROR(EINVAL
);
514 xfs_bmap_free_t
*flist
,
517 xfs_bmap_free_item_t
*free
; /* free extent list item */
518 xfs_bmap_free_item_t
*next
; /* next item on free list */
521 if (flist
->xbf_count
== 0) {
526 for (free
= flist
->xbf_first
; free
!= NULL
; free
= next
) {
527 next
= free
->xbfi_next
;
528 if ((error
= xfs_free_extent(*tp
, free
->xbfi_startblock
,
529 free
->xbfi_blockcount
)))
531 xfs_bmap_del_free(flist
, NULL
, free
);
538 * This routine allocates disk space for the given file.
539 * Originally derived from xfs_alloc_file_space().
542 libxfs_alloc_file_space(
551 xfs_filblks_t datablocks
;
552 xfs_filblks_t allocated_fsb
;
553 xfs_filblks_t allocatesize_fsb
;
554 xfs_fsblock_t firstfsb
;
555 xfs_bmap_free_t free_list
;
556 xfs_bmbt_irec_t
*imapp
;
557 xfs_bmbt_irec_t imaps
[1];
560 xfs_fileoff_t startoffset_fsb
;
573 xfs_bmapi_flags
= alloc_type
? XFS_BMAPI_PREALLOC
: 0;
575 startoffset_fsb
= XFS_B_TO_FSBT(mp
, offset
);
576 allocatesize_fsb
= XFS_B_TO_FSB(mp
, count
);
578 /* allocate file space until done or until there is an error */
579 while (allocatesize_fsb
&& !error
) {
580 datablocks
= allocatesize_fsb
;
582 tp
= xfs_trans_alloc(mp
, XFS_TRANS_DIOSTRAT
);
583 resblks
= (uint
)XFS_DIOSTRAT_SPACE_RES(mp
, datablocks
);
584 error
= xfs_trans_reserve(tp
, &M_RES(mp
)->tr_write
,
587 * Check for running out of space
591 * Free the transaction structure.
593 ASSERT(error
== ENOSPC
);
594 xfs_trans_cancel(tp
, 0);
597 xfs_trans_ijoin(tp
, ip
, 0);
599 xfs_bmap_init(&free_list
, &firstfsb
);
600 error
= xfs_bmapi_write(tp
, ip
, startoffset_fsb
, allocatesize_fsb
,
601 xfs_bmapi_flags
, &firstfsb
, 0, imapp
,
602 &reccount
, &free_list
);
607 /* complete the transaction */
608 error
= xfs_bmap_finish(&tp
, &free_list
, &committed
);
612 error
= xfs_trans_commit(tp
, 0);
616 allocated_fsb
= imapp
->br_blockcount
;
620 startoffset_fsb
+= allocated_fsb
;
621 allocatesize_fsb
-= allocated_fsb
;
625 error0
: /* Cancel bmap, cancel trans */
626 xfs_bmap_cancel(&free_list
);
627 xfs_trans_cancel(tp
, 0);
632 libxfs_log2_roundup(unsigned int i
)
636 for (rval
= 0; rval
< NBBY
* sizeof(i
); rval
++) {
637 if ((1 << rval
) >= i
)
644 * Wrapper around call to libxfs_ialloc. Takes care of committing and
645 * allocating a new transaction as needed.
647 * Originally there were two copies of this code - one in mkfs, the
648 * other in repair - now there is just the one.
661 xfs_buf_t
*ialloc_context
;
666 ialloc_context
= (xfs_buf_t
*)0;
667 error
= libxfs_ialloc(*tp
, pip
, mode
, nlink
, rdev
, cr
, fsx
,
668 1, &ialloc_context
, &ip
);
673 if (!ialloc_context
&& !ip
) {
675 return XFS_ERROR(ENOSPC
);
678 if (ialloc_context
) {
679 struct xfs_trans_res tres
;
681 xfs_trans_bhold(*tp
, ialloc_context
);
682 tres
.tr_logres
= (*tp
)->t_log_res
;
683 tres
.tr_logcount
= (*tp
)->t_log_count
;
685 ntp
= xfs_trans_dup(*tp
);
686 xfs_trans_commit(*tp
, 0);
688 tres
.tr_logflags
= XFS_TRANS_PERM_LOG_RES
;
689 error
= xfs_trans_reserve(*tp
, &tres
, 0, 0);
691 fprintf(stderr
, _("%s: cannot reserve space: %s\n"),
692 progname
, strerror(error
));
695 xfs_trans_bjoin(*tp
, ialloc_context
);
696 error
= libxfs_ialloc(*tp
, pip
, mode
, nlink
, rdev
, cr
,
697 fsx
, 1, &ialloc_context
, &ip
);
711 * Userspace versions of common diagnostic routines (varargs fun).
714 libxfs_fs_repair_cmn_err(int level
, xfs_mount_t
*mp
, char *fmt
, ...)
719 vfprintf(stderr
, fmt
, ap
);
720 fprintf(stderr
, " This is a bug.\n");
721 fprintf(stderr
, "%s version %s\n", progname
, VERSION
);
722 fprintf(stderr
, "Please capture the filesystem metadata with "
723 "xfs_metadump and\nreport it to xfs@oss.sgi.com.\n");
728 libxfs_fs_cmn_err(int level
, xfs_mount_t
*mp
, char *fmt
, ...)
733 vfprintf(stderr
, fmt
, ap
);
739 cmn_err(int level
, char *fmt
, ...)
744 vfprintf(stderr
, fmt
, ap
);
750 * Warnings specifically for verifier errors. Differentiate CRC vs. invalid
751 * values, and omit the stack trace unless the error level is tuned high.
757 xfs_alert(NULL
, "Metadata %s detected at block 0x%llx/0x%x",
758 bp
->b_error
== EFSBADCRC
? "CRC error" : "corruption",
759 bp
->b_bn
, BBTOB(bp
->b_length
));