From: Dave Chinner Date: Thu, 9 May 2013 12:24:15 +0000 (-0500) Subject: xfsprogs: update libxfs to 3.9-rc1 + xfsdev X-Git-Tag: v3.2.0-alpha1~139 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=49f693fa26783bb95136cdb3479902324be3f587;p=thirdparty%2Fxfsprogs-dev.git xfsprogs: update libxfs to 3.9-rc1 + xfsdev Update the code from 3.8-rc2 to 3.9-rc1 and the current xfs-dev tree (to pick up xfs_bmap.c rework). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Rich Johnston --- diff --git a/db/check.c b/db/check.c index ff24e339d..353530bb1 100644 --- a/db/check.c +++ b/db/check.c @@ -2666,7 +2666,8 @@ process_inode( error++; return; } - if ((unsigned int)XFS_DFORK_ASIZE(dip, mp) >= XFS_LITINO(mp)) { + if ((unsigned int)XFS_DFORK_ASIZE(dip, mp) >= + XFS_LITINO(mp, idic.di_version)) { if (v) dbprintf(_("bad fork offset %d for inode %lld\n"), idic.di_forkoff, id->ino); diff --git a/db/metadump.c b/db/metadump.c index 5739f86bb..0635e7bc8 100644 --- a/db/metadump.c +++ b/db/metadump.c @@ -1637,7 +1637,8 @@ process_inode( nametable_clear(); /* copy extended attributes if they exist and forkoff is valid */ - if (success && XFS_DFORK_DSIZE(dip, mp) < XFS_LITINO(mp)) { + if (success && + XFS_DFORK_DSIZE(dip, mp) < XFS_LITINO(mp, dip->di_version)) { attr_data.remote_val_count = 0; switch (dip->di_aformat) { case XFS_DINODE_FMT_LOCAL: diff --git a/include/libxfs.h b/include/libxfs.h index 0e2fc5fcd..b60bcf555 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -610,7 +610,7 @@ int libxfs_attr_remove(struct xfs_inode *, const unsigned char *, int); /* xfs_bmap.c */ xfs_bmbt_rec_host_t *xfs_bmap_search_extents(xfs_inode_t *, xfs_fileoff_t, int, int *, xfs_extnum_t *, xfs_bmbt_irec_t *, - xfs_bmbt_irec_t *); + xfs_bmbt_irec_t *); void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); /* xfs_attr_leaf.h */ diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h index 1d9643b3d..88a3368ef 100644 --- a/include/xfs_dinode.h +++ b/include/xfs_dinode.h @@ -104,7 +104,7 @@ typedef enum xfs_dinode_fmt { /* * Inode size for given fs. */ -#define XFS_LITINO(mp) \ +#define XFS_LITINO(mp, version) \ ((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode))) #define XFS_BROOT_SIZE_ADJ \ @@ -119,10 +119,10 @@ typedef enum xfs_dinode_fmt { #define XFS_DFORK_DSIZE(dip,mp) \ (XFS_DFORK_Q(dip) ? \ XFS_DFORK_BOFF(dip) : \ - XFS_LITINO(mp)) + XFS_LITINO(mp, (dip)->di_version)) #define XFS_DFORK_ASIZE(dip,mp) \ (XFS_DFORK_Q(dip) ? \ - XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : \ + XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \ 0) #define XFS_DFORK_SIZE(dip,mp,w) \ ((w) == XFS_DATA_FORK ? \ diff --git a/include/xfs_inode.h b/include/xfs_inode.h index 437b3af99..4733f85d5 100644 --- a/include/xfs_inode.h +++ b/include/xfs_inode.h @@ -180,10 +180,11 @@ typedef struct xfs_icdinode { #define XFS_IFORK_DSIZE(ip) \ (XFS_IFORK_Q(ip) ? \ XFS_IFORK_BOFF(ip) : \ - XFS_LITINO((ip)->i_mount)) + XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version)) #define XFS_IFORK_ASIZE(ip) \ (XFS_IFORK_Q(ip) ? \ - XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ + XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \ + XFS_IFORK_BOFF(ip) : \ 0) #define XFS_IFORK_SIZE(ip,w) \ ((w) == XFS_DATA_FORK ? \ @@ -419,6 +420,7 @@ static inline void xfs_iflock(struct xfs_inode *ip) static inline void xfs_ifunlock(struct xfs_inode *ip) { xfs_iflags_clear(ip, XFS_IFLOCK); + smp_mb(); wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); } diff --git a/include/xfs_mount.h b/include/xfs_mount.h index c267379c0..28bbf46a0 100644 --- a/include/xfs_mount.h +++ b/include/xfs_mount.h @@ -41,6 +41,12 @@ typedef struct xfs_trans_reservations { uint tr_growrtalloc; /* grow realtime allocations */ uint tr_growrtzero; /* grow realtime zeroing */ uint tr_growrtfree; /* grow realtime freeing */ + uint tr_qm_sbchange; /* change quota flags */ + uint tr_qm_setqlim; /* adjust quota limits */ + uint tr_qm_dqalloc; /* allocate quota on disk */ + uint tr_qm_quotaoff; /* turn quota off */ + uint tr_qm_equotaoff;/* end of turn quota off */ + uint tr_sb; /* modify superblock */ } xfs_trans_reservations_t; #ifndef __KERNEL__ @@ -201,7 +207,6 @@ typedef struct xfs_mount { trimming */ __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ - struct shrinker m_inode_shrink; /* inode reclaim shrinker */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ diff --git a/include/xfs_types.h b/include/xfs_types.h index e9bd5c3b6..dd6bf7122 100644 --- a/include/xfs_types.h +++ b/include/xfs_types.h @@ -32,7 +32,6 @@ typedef unsigned int __uint32_t; typedef signed long long int __int64_t; typedef unsigned long long int __uint64_t; -typedef enum { B_FALSE,B_TRUE } boolean_t; typedef __uint32_t inst_t; /* an instruction */ typedef __s64 xfs_off_t; /* type */ diff --git a/libxfs/xfs.h b/libxfs/xfs.h index 62132843d..9fbe261a4 100644 --- a/libxfs/xfs.h +++ b/libxfs/xfs.h @@ -107,10 +107,10 @@ typedef __uint32_t inst_t; /* an instruction */ #define rcu_read_unlock() ((void) 0) /* - * random32 is used for di_gen inode allocation, it must be zero for libxfs + * prandom_u32 is used for di_gen inode allocation, it must be zero for libxfs * or all sorts of badness can occur! */ -#define random32() 0 +#define prandom_u32() 0 #define PAGE_CACHE_SIZE getpagesize() diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c index 61cdc6c04..e59fdaced 100644 --- a/libxfs/xfs_alloc.c +++ b/libxfs/xfs_alloc.c @@ -822,7 +822,7 @@ xfs_alloc_ag_vextent_near( */ int dofirst; /* set to do first algorithm */ - dofirst = random32() & 1; + dofirst = prandom_u32() & 1; #endif restart: diff --git a/libxfs/xfs_attr.c b/libxfs/xfs_attr.c index 42546a947..2adf92b18 100644 --- a/libxfs/xfs_attr.c +++ b/libxfs/xfs_attr.c @@ -275,6 +275,7 @@ xfs_attr_set_int( if (rsvd) args.trans->t_flags |= XFS_TRANS_RESERVE; + error = xfs_trans_reserve(args.trans, args.total, XFS_ATTRSETM_LOG_RES(mp) + XFS_ATTRSETRT_LOG_RES(mp) * args.total, diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c index 824c12295..426130f46 100644 --- a/libxfs/xfs_attr_leaf.c +++ b/libxfs/xfs_attr_leaf.c @@ -141,7 +141,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) int dsize; xfs_mount_t *mp = dp->i_mount; - offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ + /* rounded down */ + offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; switch (dp->i_d.di_format) { case XFS_DINODE_FMT_DEV: @@ -212,7 +213,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) minforkoff = roundup(minforkoff, 8) >> 3; /* attr fork btree root can have at least this many key/ptr pairs */ - maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); + maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) - + XFS_BMDR_SPACE_CALC(MINABTPTRS); maxforkoff = maxforkoff >> 3; /* rounded down */ if (offset >= maxforkoff) diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c index b328a0b89..c8232a956 100644 --- a/libxfs/xfs_bmap.c +++ b/libxfs/xfs_bmap.c @@ -15,152 +15,77 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - #include - kmem_zone_t *xfs_bmap_free_item_zone; /* - * Prototypes for internal bmap routines. - */ - -#ifdef DEBUG -STATIC void -xfs_bmap_check_leaf_extents( - struct xfs_btree_cur *cur, - struct xfs_inode *ip, - int whichfork); -#else -#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) -#endif - - -/* - * Called from xfs_bmap_add_attrfork to handle extents format files. - */ -STATIC int /* error */ -xfs_bmap_add_attrfork_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - xfs_bmap_free_t *flist, /* blocks to free at commit */ - int *flags); /* inode logging flags */ - -/* - * Called from xfs_bmap_add_attrfork to handle local format files. - */ -STATIC int /* error */ -xfs_bmap_add_attrfork_local( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - xfs_bmap_free_t *flist, /* blocks to free at commit */ - int *flags); /* inode logging flags */ - -/* - * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. - * It figures out where to ask the underlying allocator to put the new extent. - */ -STATIC int /* error */ -xfs_bmap_alloc( - xfs_bmalloca_t *ap); /* bmap alloc argument struct */ - -/* - * Transform a btree format file with only one leaf node, where the - * extents list will fit in the inode, into an extents format file. - * Since the file extents are already in-core, all we have to do is - * give up the space for the btree root and pitch the leaf block. - */ -STATIC int /* error */ -xfs_bmap_btree_to_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_btree_cur_t *cur, /* btree cursor */ - int *logflagsp, /* inode logging flags */ - int whichfork); /* data or attr fork */ - -/* - * Convert an extents-format file into a btree-format file. - * The new file will have a root block (in the inode) and a single child block. - */ -STATIC int /* error */ -xfs_bmap_extents_to_btree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first-block-allocated */ - xfs_bmap_free_t *flist, /* blocks freed in xaction */ - xfs_btree_cur_t **curp, /* cursor returned to caller */ - int wasdel, /* converting a delayed alloc */ - int *logflagsp, /* inode logging flags */ - int whichfork); /* data or attr fork */ - -/* - * Convert a local file to an extents file. - * This code is sort of bogus, since the file data needs to get - * logged so it won't be lost. The bmap-level manipulations are ok, though. - */ -STATIC int /* error */ -xfs_bmap_local_to_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated in xaction */ - xfs_extlen_t total, /* total blocks needed by transaction */ - int *logflagsp, /* inode logging flags */ - int whichfork); /* data or attr fork */ - -/* - * Compute the worst-case number of indirect blocks that will be used - * for ip's delayed extent of length "len". + * Miscellaneous helper functions */ -STATIC xfs_filblks_t -xfs_bmap_worst_indlen( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_filblks_t len); /* delayed extent length */ -#ifdef DEBUG /* - * Perform various validation checks on the values being returned - * from xfs_bmapi(). + * Compute and fill in the value of the maximum depth of a bmap btree + * in this filesystem. Done once, during mount. */ -STATIC void -xfs_bmap_validate_ret( - xfs_fileoff_t bno, - xfs_filblks_t len, - int flags, - xfs_bmbt_irec_t *mval, - int nmap, - int ret_nmap); -#else -#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) -#endif /* DEBUG */ - -STATIC int -xfs_bmap_count_tree( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ifork_t *ifp, - xfs_fsblock_t blockno, - int levelin, - int *count); - -STATIC void -xfs_bmap_count_leaves( - xfs_ifork_t *ifp, - xfs_extnum_t idx, - int numrecs, - int *count); +void +xfs_bmap_compute_maxlevels( + xfs_mount_t *mp, /* file system mount structure */ + int whichfork) /* data or attr fork */ +{ + int level; /* btree level */ + uint maxblocks; /* max blocks at this level */ + uint maxleafents; /* max leaf entries possible */ + int maxrootrecs; /* max records in root block */ + int minleafrecs; /* min records in leaf block */ + int minnoderecs; /* min records in node block */ + int sz; /* root block size */ -STATIC void -xfs_bmap_disk_count_leaves( - struct xfs_mount *mp, - struct xfs_btree_block *block, - int numrecs, - int *count); + /* + * The maximum number of extents in a file, hence the maximum + * number of leaf entries, is controlled by the type of di_nextents + * (a signed 32-bit number, xfs_extnum_t), or by di_anextents + * (a signed 16-bit number, xfs_aextnum_t). + * + * Note that we can no longer assume that if we are in ATTR1 that + * the fork offset of all the inodes will be + * (xfs_default_attroffset(ip) >> 3) because we could have mounted + * with ATTR2 and then mounted back with ATTR1, keeping the + * di_forkoff's fixed but probably at various positions. Therefore, + * for both ATTR1 and ATTR2 we have to assume the worst case scenario + * of a minimum size available. + */ + if (whichfork == XFS_DATA_FORK) { + maxleafents = MAXEXTNUM; + sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); + } else { + maxleafents = MAXAEXTNUM; + sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); + } + maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); + minleafrecs = mp->m_bmap_dmnr[0]; + minnoderecs = mp->m_bmap_dmnr[1]; + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) { + if (maxblocks <= maxrootrecs) + maxblocks = 1; + else + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + } + mp->m_bm_maxlevels[whichfork] = level; +} /* - * Bmap internal routines. + * Convert the given file system block to a disk block. We have to treat it + * differently based on whether the file is a real time file or not, because the + * bmap code does. */ +xfs_daddr_t +xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) +{ + return (XFS_IS_REALTIME_INODE(ip) ? \ + (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ + XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); +} STATIC int /* error */ xfs_bmbt_lookup_eq( @@ -230,3769 +155,3881 @@ xfs_bmbt_update( } /* - * Called from xfs_bmap_add_attrfork to handle btree format files. + * Compute the worst-case number of indirect blocks that will be used + * for ip's delayed extent of length "len". */ -STATIC int /* error */ -xfs_bmap_add_attrfork_btree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - xfs_bmap_free_t *flist, /* blocks to free at commit */ - int *flags) /* inode logging flags */ +STATIC xfs_filblks_t +xfs_bmap_worst_indlen( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_filblks_t len) /* delayed extent length */ { - xfs_btree_cur_t *cur; /* btree cursor */ - int error; /* error return value */ - xfs_mount_t *mp; /* file system mount struct */ - int stat; /* newroot status */ + int level; /* btree level number */ + int maxrecs; /* maximum record count at this level */ + xfs_mount_t *mp; /* mount structure */ + xfs_filblks_t rval; /* return value */ mp = ip->i_mount; - if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) - *flags |= XFS_ILOG_DBROOT; - else { - cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); - cur->bc_private.b.flist = flist; - cur->bc_private.b.firstblock = *firstblock; - if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) - goto error0; - /* must be at least one entry */ - XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); - if ((error = xfs_btree_new_iroot(cur, flags, &stat))) - goto error0; - if (stat == 0) { - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - return XFS_ERROR(ENOSPC); - } - *firstblock = cur->bc_private.b.firstblock; - cur->bc_private.b.allocated = 0; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + maxrecs = mp->m_bmap_dmxr[0]; + for (level = 0, rval = 0; + level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); + level++) { + len += maxrecs - 1; + do_div(len, maxrecs); + rval += len; + if (len == 1) + return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + level - 1; + if (level == 0) + maxrecs = mp->m_bmap_dmxr[1]; } - return 0; -error0: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; + return rval; } /* - * Called from xfs_bmap_add_attrfork to handle extents format files. + * Calculate the default attribute fork offset for newly created inodes. */ -STATIC int /* error */ -xfs_bmap_add_attrfork_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - xfs_bmap_free_t *flist, /* blocks to free at commit */ - int *flags) /* inode logging flags */ +uint +xfs_default_attroffset( + struct xfs_inode *ip) { - xfs_btree_cur_t *cur; /* bmap btree cursor */ - int error; /* error return value */ + struct xfs_mount *mp = ip->i_mount; + uint offset; - if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) - return 0; - cur = NULL; - error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, - flags, XFS_DATA_FORK); - if (cur) { - cur->bc_private.b.allocated = 0; - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + if (mp->m_sb.sb_inodesize == 256) { + offset = XFS_LITINO(mp, ip->i_d.di_version) - + XFS_BMDR_SPACE_CALC(MINABTPTRS); + } else { + offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); } - return error; + + ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version)); + return offset; } /* - * Called from xfs_bmap_add_attrfork to handle local format files. + * Helper routine to reset inode di_forkoff field when switching + * attribute fork from local to extent format - we reset it where + * possible to make space available for inline data fork extents. */ -STATIC int /* error */ -xfs_bmap_add_attrfork_local( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - xfs_bmap_free_t *flist, /* blocks to free at commit */ - int *flags) /* inode logging flags */ +STATIC void +xfs_bmap_forkoff_reset( + xfs_mount_t *mp, + xfs_inode_t *ip, + int whichfork) { - xfs_da_args_t dargs; /* args for dir/attr code */ - int error; /* error return value */ - xfs_mount_t *mp; /* mount structure pointer */ + if (whichfork == XFS_ATTR_FORK && + ip->i_d.di_format != XFS_DINODE_FMT_DEV && + ip->i_d.di_format != XFS_DINODE_FMT_UUID && + ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { + uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; - if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) - return 0; - if (S_ISDIR(ip->i_d.di_mode)) { - mp = ip->i_mount; - memset(&dargs, 0, sizeof(dargs)); - dargs.dp = ip; - dargs.firstblock = firstblock; - dargs.flist = flist; - dargs.total = mp->m_dirblkfsbs; - dargs.whichfork = XFS_DATA_FORK; - dargs.trans = tp; - error = xfs_dir2_sf_to_block(&dargs); - } else - error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, - XFS_DATA_FORK); - return error; + if (dfl_forkoff > ip->i_d.di_forkoff) + ip->i_d.di_forkoff = dfl_forkoff; + } } /* - * Convert a delayed allocation to a real allocation. + * Extent tree block counting routines. */ -STATIC int /* error */ -xfs_bmap_add_extent_delay_real( - struct xfs_bmalloca *bma) + +/* + * Count leaf blocks given a range of extent records. + */ +STATIC void +xfs_bmap_count_leaves( + xfs_ifork_t *ifp, + xfs_extnum_t idx, + int numrecs, + int *count) { - struct xfs_bmbt_irec *new = &bma->got; - int diff; /* temp value */ - xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ - int error; /* error return value */ - int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_fileoff_t new_endoff; /* end offset of new entry */ - xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ - /* left is 0, right is 1, prev is 2 */ - int rval=0; /* return value (logging flags) */ - int state = 0;/* state bits, accessed thru macros */ - xfs_filblks_t da_new; /* new count del alloc blocks used */ - xfs_filblks_t da_old; /* old count del alloc blocks used */ - xfs_filblks_t temp=0; /* value for da_new calculations */ - xfs_filblks_t temp2=0;/* value for da_new calculations */ - int tmp_rval; /* partial logging flags */ + int b; - ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); + for (b = 0; b < numrecs; b++) { + xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); + *count += xfs_bmbt_get_blockcount(frp); + } +} - ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); - ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!bma->cur || - (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); +/* + * Count leaf blocks given a range of extent records originally + * in btree format. + */ +STATIC void +xfs_bmap_disk_count_leaves( + struct xfs_mount *mp, + struct xfs_btree_block *block, + int numrecs, + int *count) +{ + int b; + xfs_bmbt_rec_t *frp; - XFS_STATS_INC(xs_add_exlist); + for (b = 1; b <= numrecs; b++) { + frp = XFS_BMBT_REC_ADDR(mp, block, b); + *count += xfs_bmbt_disk_get_blockcount(frp); + } +} -#define LEFT r[0] -#define RIGHT r[1] -#define PREV r[2] +/* + * Recursively walks each level of a btree + * to count total fsblocks is use. + */ +STATIC int /* error */ +xfs_bmap_count_tree( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fsblock_t blockno, /* file system block number */ + int levelin, /* level in btree */ + int *count) /* Count of blocks */ +{ + int error; + xfs_buf_t *bp, *nbp; + int level = levelin; + __be64 *pp; + xfs_fsblock_t bno = blockno; + xfs_fsblock_t nextbno; + struct xfs_btree_block *block, *nextblock; + int numrecs; - /* - * Set up a bunch of variables to make the tests simpler. - */ - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_get_all(ep, &PREV); - new_endoff = new->br_startoff + new->br_blockcount; - ASSERT(PREV.br_startoff <= new->br_startoff); - ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + return error; + *count += 1; + block = XFS_BUF_TO_BLOCK(bp); - da_old = startblockval(PREV.br_startblock); - da_new = 0; + if (--level) { + /* Not at node above leaves, count this level of nodes */ + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); + while (nextbno != NULLFSBLOCK) { + error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + return error; + *count += 1; + nextblock = XFS_BUF_TO_BLOCK(nbp); + nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); + xfs_trans_brelse(tp, nbp); + } - /* - * Set flags determining what part of the previous delayed allocation - * extent is being replaced by a real allocation. - */ - if (PREV.br_startoff == new->br_startoff) - state |= BMAP_LEFT_FILLING; - if (PREV.br_startoff + PREV.br_blockcount == new_endoff) - state |= BMAP_RIGHT_FILLING; + /* Dive to the next level */ + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + bno = be64_to_cpu(*pp); + if (unlikely((error = + xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { + xfs_trans_brelse(tp, bp); + XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", + XFS_ERRLEVEL_LOW, mp); + return XFS_ERROR(EFSCORRUPTED); + } + xfs_trans_brelse(tp, bp); + } else { + /* count all level 1 nodes and their leaves */ + for (;;) { + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); + numrecs = be16_to_cpu(block->bb_numrecs); + xfs_bmap_disk_count_leaves(mp, block, numrecs, count); + xfs_trans_brelse(tp, bp); + if (nextbno == NULLFSBLOCK) + break; + bno = nextbno; + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + return error; + *count += 1; + block = XFS_BUF_TO_BLOCK(bp); + } + } + return 0; +} - /* - * Check and set flags if this segment has a left neighbor. - * Don't set contiguous if the combined extent would be too large. - */ - if (bma->idx > 0) { - state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); +/* + * Count fsblocks of the given fork. + */ +int /* error */ +xfs_bmap_count_blocks( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + int whichfork, /* data or attr fork */ + int *count) /* out: count of blocks */ +{ + struct xfs_btree_block *block; /* current btree block */ + xfs_fsblock_t bno; /* block # of "block" */ + xfs_ifork_t *ifp; /* fork structure */ + int level; /* btree level, for checking */ + xfs_mount_t *mp; /* file system mount structure */ + __be64 *pp; /* pointer to block address */ - if (isnullstartblock(LEFT.br_startblock)) - state |= BMAP_LEFT_DELAY; + bno = NULLFSBLOCK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { + xfs_bmap_count_leaves(ifp, 0, + ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), + count); + return 0; } - if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && - LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && - LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && - LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) - state |= BMAP_LEFT_CONTIG; - /* - * Check and set flags if this segment has a right neighbor. - * Don't set contiguous if the combined extent would be too large. - * Also check for all-three-contiguous being too large. + * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ - if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { - state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); + block = ifp->if_broot; + level = be16_to_cpu(block->bb_level); + ASSERT(level > 0); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + bno = be64_to_cpu(*pp); + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); - if (isnullstartblock(RIGHT.br_startblock)) - state |= BMAP_RIGHT_DELAY; + if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { + XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, + mp); + return XFS_ERROR(EFSCORRUPTED); } - if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && - new_endoff == RIGHT.br_startoff && - new->br_startblock + new->br_blockcount == RIGHT.br_startblock && - new->br_state == RIGHT.br_state && - new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && - ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | - BMAP_RIGHT_FILLING)) != - (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | - BMAP_RIGHT_FILLING) || - LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= MAXEXTLEN)) - state |= BMAP_RIGHT_CONTIG; + return 0; +} - error = 0; - /* - * Switch out based on the FILLING and CONTIG state bits. - */ - switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | - BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | - BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Filling in all of a previously delayed allocation extent. - * The left and right neighbors are both contiguous with new. - */ - bma->idx--; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); +/* + * Debug/sanity checking code + */ - xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); - bma->ip->i_d.di_nextents--; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_btree_delete(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_btree_decrement(bma->cur, 0, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - PREV.br_blockcount + - RIGHT.br_blockcount, LEFT.br_state); - if (error) - goto done; - } - break; +STATIC int +xfs_bmap_sanity_check( + struct xfs_mount *mp, + struct xfs_buf *bp, + int level) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: - /* - * Filling in all of a previously delayed allocation extent. - * The left neighbor is contiguous, the right is not. - */ - bma->idx--; + if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) || + be16_to_cpu(block->bb_level) != level || + be16_to_cpu(block->bb_numrecs) == 0 || + be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) + return 0; + return 1; +} - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); +#ifdef DEBUG +STATIC struct xfs_buf * +xfs_bmap_get_bp( + struct xfs_btree_cur *cur, + xfs_fsblock_t bno) +{ + struct xfs_log_item_desc *lidp; + int i; - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); - if (bma->cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, LEFT.br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - PREV.br_blockcount, LEFT.br_state); - if (error) - goto done; - } - break; + if (!cur) + return NULL; - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Filling in all of a previously delayed allocation extent. - * The right neighbor is contiguous, the left is not. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, new->br_startblock); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { + if (!cur->bc_bufs[i]) + break; + if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) + return cur->bc_bufs[i]; + } - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); - if (bma->cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, PREV.br_startoff, - new->br_startblock, - PREV.br_blockcount + - RIGHT.br_blockcount, PREV.br_state); - if (error) - goto done; - } - break; + /* Chase down all the log items to see if the bp is there */ + list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) { + struct xfs_buf_log_item *bip; + bip = (struct xfs_buf_log_item *)lidp->lid_item; + if (bip->bli_item.li_type == XFS_LI_BUF && + XFS_BUF_ADDR(bip->bli_buf) == bno) + return bip->bli_buf; + } - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: - /* - * Filling in all of a previously delayed allocation extent. - * Neither the left nor right neighbors are contiguous with - * the new one. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, new->br_startblock); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + return NULL; +} - bma->ip->i_d.di_nextents++; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; +STATIC void +xfs_check_block( + struct xfs_btree_block *block, + xfs_mount_t *mp, + int root, + short sz) +{ + int i, j, dmxr; + __be64 *pp, *thispa; /* pointer to block address */ + xfs_bmbt_key_t *prevp, *keyp; - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: - /* - * Filling in the first part of a previous delayed allocation. - * The left neighbor is contiguous. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), - LEFT.br_blockcount + new->br_blockcount); - xfs_bmbt_set_startoff(ep, - PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); + ASSERT(be16_to_cpu(block->bb_level) > 0); - temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - if (bma->cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, LEFT.br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - new->br_blockcount, - LEFT.br_state); - if (error) - goto done; - } - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock)); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + prevp = NULL; + for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { + dmxr = mp->m_bmap_dmxr[0]; + keyp = XFS_BMBT_KEY_ADDR(mp, block, i); - bma->idx--; - break; + if (prevp) { + ASSERT(be64_to_cpu(prevp->br_startoff) < + be64_to_cpu(keyp->br_startoff)); + } + prevp = keyp; - case BMAP_LEFT_FILLING: /* - * Filling in the first part of a previous delayed allocation. - * The left neighbor is not contiguous. + * Compare the block numbers to see if there are dups. */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, new_endoff); - temp = PREV.br_blockcount - new->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(bma->ip, bma->idx, 1, new, state); - bma->ip->i_d.di_nextents++; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if (root) + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); + else + pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); + + for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { + if (root) + thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); + else + thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); + if (*thispa == *pp) { + xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", + __func__, j, i, + (unsigned long long)be64_to_cpu(*thispa)); + panic("%s: ptrs are equal in node\n", + __func__); + } } + } +} - if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, - &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); - rval |= tmp_rval; +/* + * Check that the extents for the inode ip are in the right order in all + * btree leaves. + */ + +STATIC void +xfs_bmap_check_leaf_extents( + xfs_btree_cur_t *cur, /* btree cursor or null */ + xfs_inode_t *ip, /* incore inode pointer */ + int whichfork) /* data or attr fork */ +{ + struct xfs_btree_block *block; /* current btree block */ + xfs_fsblock_t bno; /* block # of "block" */ + xfs_buf_t *bp; /* buffer for "block" */ + int error; /* error return value */ + xfs_extnum_t i=0, j; /* index into the extents list */ + xfs_ifork_t *ifp; /* fork structure */ + int level; /* btree level, for checking */ + xfs_mount_t *mp; /* file system mount structure */ + __be64 *pp; /* pointer to block address */ + xfs_bmbt_rec_t *ep; /* pointer to current extent */ + xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ + xfs_bmbt_rec_t *nextp; /* pointer to next extent */ + int bp_release = 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { + return; + } + + bno = NULLFSBLOCK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + block = ifp->if_broot; + /* + * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. + */ + level = be16_to_cpu(block->bb_level); + ASSERT(level > 0); + xfs_check_block(block, mp, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + bno = be64_to_cpu(*pp); + + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + + /* + * Go down the tree until leaf level is reached, following the first + * pointer (leftmost) at each level. + */ + while (level-- > 0) { + /* See if buf is in cur first */ + bp_release = 0; + bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); + if (!bp) { + bp_release = 1; + error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); if (error) - goto done; + goto error_norelse; } - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, bma->idx + 1); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); - break; + block = XFS_BUF_TO_BLOCK(bp); + XFS_WANT_CORRUPTED_GOTO( + xfs_bmap_sanity_check(mp, bp, level), + error0); + if (level == 0) + break; - case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* - * Filling in the last part of a previous delayed allocation. - * The right neighbor is contiguous with the new allocation. + * Check this block for basic sanity (increasing keys and + * no duplicate blocks). */ - temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), - new->br_startoff, new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - RIGHT.br_state); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); - if (bma->cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + - RIGHT.br_blockcount, - RIGHT.br_state); - if (error) - goto done; + + xfs_check_block(block, mp, 0, 0); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + bno = be64_to_cpu(*pp); + XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); } + } - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock)); - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + /* + * Here with bp and block set to the leftmost leaf node in the tree. + */ + i = 0; - bma->idx++; - break; + /* + * Loop over all leaf nodes checking that all extents are in the right order. + */ + for (;;) { + xfs_fsblock_t nextbno; + xfs_extnum_t num_recs; + + + num_recs = xfs_btree_get_numrecs(block); - case BMAP_RIGHT_FILLING: /* - * Filling in the last part of a previous delayed allocation. - * The right neighbor is not contiguous. + * Read-ahead the next leaf block, if any. */ - temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); - bma->ip->i_d.di_nextents++; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, &bma->cur, 1, - &tmp_rval, XFS_DATA_FORK); - rval |= tmp_rval; - if (error) - goto done; - } - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - bma->idx++; - break; + /* + * Check all the extents to make sure they are OK. + * If we had a previous block, the last entry should + * conform with the first entry in this one. + */ - case 0: + ep = XFS_BMBT_REC_ADDR(mp, block, 1); + if (i) { + ASSERT(xfs_bmbt_disk_get_startoff(&last) + + xfs_bmbt_disk_get_blockcount(&last) <= + xfs_bmbt_disk_get_startoff(ep)); + } + for (j = 1; j < num_recs; j++) { + nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); + ASSERT(xfs_bmbt_disk_get_startoff(ep) + + xfs_bmbt_disk_get_blockcount(ep) <= + xfs_bmbt_disk_get_startoff(nextp)); + ep = nextp; + } + + last = *ep; + i += num_recs; + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); + } + bno = nextbno; /* - * Filling in the middle part of a previous delayed allocation. - * Contiguity is impossible here. - * This case is avoided almost all the time. - * - * We start with a delayed allocation: - * - * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ - * PREV @ idx - * - * and we are allocating: - * +rrrrrrrrrrrrrrrrr+ - * new - * - * and we set it up for insertion as: - * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ - * new - * PREV @ idx LEFT RIGHT - * inserted at idx + 1 + * If we've reached the end, stop. */ - temp = new->br_startoff - PREV.br_startoff; - temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ - LEFT = *new; - RIGHT.br_state = PREV.br_state; - RIGHT.br_startblock = nullstartblock( - (int)xfs_bmap_worst_indlen(bma->ip, temp2)); - RIGHT.br_startoff = new_endoff; - RIGHT.br_blockcount = temp2; - /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ - xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); - bma->ip->i_d.di_nextents++; - if (bma->cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } + if (bno == NULLFSBLOCK) + break; - if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, &bma->cur, - 1, &tmp_rval, XFS_DATA_FORK); - rval |= tmp_rval; - if (error) - goto done; - } - temp = xfs_bmap_worst_indlen(bma->ip, temp); - temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); - if (diff > 0) { - error = xfs_icsb_modify_counters(bma->ip->i_mount, - XFS_SBS_FDBLOCKS, - -((int64_t)diff), 0); - ASSERT(!error); + bp_release = 0; + bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); + if (!bp) { + bp_release = 1; + error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); if (error) - goto done; + goto error_norelse; } - - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), - nullstartblock((int)temp2)); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); - - bma->idx++; - da_new = temp + temp2; - break; - - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: - case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_LEFT_CONTIG: - case BMAP_RIGHT_CONTIG: - /* - * These cases are all impossible. - */ - ASSERT(0); - } - - /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { - int tmp_logflags; /* partial log flag return val */ - - ASSERT(bma->cur == NULL); - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, &bma->cur, - da_old > 0, &tmp_logflags, XFS_DATA_FORK); - bma->logflags |= tmp_logflags; - if (error) - goto done; + block = XFS_BUF_TO_BLOCK(bp); } - - /* adjust for changes in reserved delayed indirect blocks */ - if (da_old || da_new) { - temp = da_new; - if (bma->cur) - temp += bma->cur->bc_private.b.allocated; - ASSERT(temp <= da_old); - if (temp < da_old) - xfs_icsb_modify_counters(bma->ip->i_mount, - XFS_SBS_FDBLOCKS, - (int64_t)(da_old - temp), 0); + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); } + return; - /* clear out the allocated field, done with it now in any case. */ - if (bma->cur) - bma->cur->bc_private.b.allocated = 0; - - xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK); -done: - bma->logflags |= rval; - return error; -#undef LEFT -#undef RIGHT -#undef PREV +error0: + xfs_warn(mp, "%s: at error0", __func__); + if (bp_release) + xfs_trans_brelse(NULL, bp); +error_norelse: + xfs_warn(mp, "%s: BAD after btree leaves for %d extents", + __func__, i); + panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); + return; } /* - * Convert an unwritten allocation to a real allocation or vice versa. + * Add bmap trace insert entries for all the contents of the extent records. */ -STATIC int /* error */ -xfs_bmap_add_extent_unwritten_real( - struct xfs_trans *tp, - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t *idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp) /* inode logging flags */ +void +xfs_bmap_trace_exlist( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t cnt, /* count of entries in the list */ + int whichfork, /* data or attr fork */ + unsigned long caller_ip) { - xfs_btree_cur_t *cur; /* btree cursor */ - xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ - int error; /* error return value */ - int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_fileoff_t new_endoff; /* end offset of new entry */ - xfs_exntst_t newext; /* new extent state */ - xfs_exntst_t oldext; /* old extent state */ - xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ - /* left is 0, right is 1, prev is 2 */ - int rval=0; /* return value (logging flags) */ - int state = 0;/* state bits, accessed thru macros */ - - *logflagsp = 0; + xfs_extnum_t idx; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int state = 0; - cur = *curp; - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; - ASSERT(*idx >= 0); - ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); - ASSERT(!isnullstartblock(new->br_startblock)); + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + for (idx = 0; idx < cnt; idx++) + trace_xfs_extlist(ip, idx, whichfork, caller_ip); +} - XFS_STATS_INC(xs_add_exlist); +/* + * Validate that the bmbt_irecs being returned from bmapi are valid + * given the callers original parameters. Specifically check the + * ranges of the returned irecs to ensure that they only extent beyond + * the given parameters if the XFS_BMAPI_ENTIRE flag was set. + */ +STATIC void +xfs_bmap_validate_ret( + xfs_fileoff_t bno, + xfs_filblks_t len, + int flags, + xfs_bmbt_irec_t *mval, + int nmap, + int ret_nmap) +{ + int i; /* index to map values */ -#define LEFT r[0] -#define RIGHT r[1] -#define PREV r[2] + ASSERT(ret_nmap <= nmap); - /* - * Set up a bunch of variables to make the tests simpler. - */ - error = 0; - ep = xfs_iext_get_ext(ifp, *idx); - xfs_bmbt_get_all(ep, &PREV); - newext = new->br_state; - oldext = (newext == XFS_EXT_UNWRITTEN) ? - XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - ASSERT(PREV.br_state == oldext); - new_endoff = new->br_startoff + new->br_blockcount; - ASSERT(PREV.br_startoff <= new->br_startoff); - ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + for (i = 0; i < ret_nmap; i++) { + ASSERT(mval[i].br_blockcount > 0); + if (!(flags & XFS_BMAPI_ENTIRE)) { + ASSERT(mval[i].br_startoff >= bno); + ASSERT(mval[i].br_blockcount <= len); + ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= + bno + len); + } else { + ASSERT(mval[i].br_startoff < bno + len); + ASSERT(mval[i].br_startoff + mval[i].br_blockcount > + bno); + } + ASSERT(i == 0 || + mval[i - 1].br_startoff + mval[i - 1].br_blockcount == + mval[i].br_startoff); + ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && + mval[i].br_startblock != HOLESTARTBLOCK); + ASSERT(mval[i].br_state == XFS_EXT_NORM || + mval[i].br_state == XFS_EXT_UNWRITTEN); + } +} - /* - * Set flags determining what part of the previous oldext allocation - * extent is being replaced by a newext allocation. - */ - if (PREV.br_startoff == new->br_startoff) - state |= BMAP_LEFT_FILLING; - if (PREV.br_startoff + PREV.br_blockcount == new_endoff) - state |= BMAP_RIGHT_FILLING; +#else +#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) +#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) +#endif /* DEBUG */ - /* - * Check and set flags if this segment has a left neighbor. - * Don't set contiguous if the combined extent would be too large. - */ - if (*idx > 0) { - state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); +/* + * bmap free list manipulation functions + */ - if (isnullstartblock(LEFT.br_startblock)) - state |= BMAP_LEFT_DELAY; - } - - if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && - LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && - LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && - LEFT.br_state == newext && - LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) - state |= BMAP_LEFT_CONTIG; +/* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +void +xfs_bmap_add_free( + xfs_fsblock_t bno, /* fs block number of extent */ + xfs_filblks_t len, /* length of extent */ + xfs_bmap_free_t *flist, /* list of extents */ + xfs_mount_t *mp) /* mount point structure */ +{ + xfs_bmap_free_item_t *cur; /* current (next) element */ + xfs_bmap_free_item_t *new; /* new element */ + xfs_bmap_free_item_t *prev; /* previous element */ +#ifdef DEBUG + xfs_agnumber_t agno; + xfs_agblock_t agbno; - /* - * Check and set flags if this segment has a right neighbor. - * Don't set contiguous if the combined extent would be too large. - * Also check for all-three-contiguous being too large. - */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { - state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); - if (isnullstartblock(RIGHT.br_startblock)) - state |= BMAP_RIGHT_DELAY; + ASSERT(bno != NULLFSBLOCK); + ASSERT(len > 0); + ASSERT(len <= MAXEXTLEN); + ASSERT(!isnullstartblock(bno)); + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(agbno < mp->m_sb.sb_agblocks); + ASSERT(len < mp->m_sb.sb_agblocks); + ASSERT(agbno + len <= mp->m_sb.sb_agblocks); +#endif + ASSERT(xfs_bmap_free_item_zone != NULL); + new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); + new->xbfi_startblock = bno; + new->xbfi_blockcount = (xfs_extlen_t)len; + for (prev = NULL, cur = flist->xbf_first; + cur != NULL; + prev = cur, cur = cur->xbfi_next) { + if (cur->xbfi_startblock >= bno) + break; } + if (prev) + prev->xbfi_next = new; + else + flist->xbf_first = new; + new->xbfi_next = cur; + flist->xbf_count++; +} - if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && - new_endoff == RIGHT.br_startoff && - new->br_startblock + new->br_blockcount == RIGHT.br_startblock && - newext == RIGHT.br_state && - new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && - ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | - BMAP_RIGHT_FILLING)) != - (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | - BMAP_RIGHT_FILLING) || - LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= MAXEXTLEN)) - state |= BMAP_RIGHT_CONTIG; +/* + * Remove the entry "free" from the free item list. Prev points to the + * previous entry, unless "free" is the head of the list. + */ +void +xfs_bmap_del_free( + xfs_bmap_free_t *flist, /* free item list header */ + xfs_bmap_free_item_t *prev, /* previous item on list, if any */ + xfs_bmap_free_item_t *free) /* list item to be freed */ +{ + if (prev) + prev->xbfi_next = free->xbfi_next; + else + flist->xbf_first = free->xbfi_next; + flist->xbf_count--; + kmem_zone_free(xfs_bmap_free_item_zone, free); +} - /* - * Switch out based on the FILLING and CONTIG state bits. - */ - switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | - BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | - BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Setting all of a previous oldext extent to newext. - * The left and right neighbors are both contiguous with new. - */ - --*idx; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); +/* + * Free up any items left in the list. + */ +void +xfs_bmap_cancel( + xfs_bmap_free_t *flist) /* list of bmap_free_items */ +{ + xfs_bmap_free_item_t *free; /* free list item */ + xfs_bmap_free_item_t *next; - xfs_iext_remove(ip, *idx + 1, 2, state); - ip->i_d.di_nextents -= 2; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount, LEFT.br_state))) - goto done; - } - break; + if (flist->xbf_count == 0) + return; + ASSERT(flist->xbf_first != NULL); + for (free = flist->xbf_first; free; free = next) { + next = free->xbfi_next; + xfs_bmap_del_free(flist, NULL, free); + } + ASSERT(flist->xbf_count == 0); +} - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: - /* - * Setting all of a previous oldext extent to newext. - * The left neighbor is contiguous, the right is not. - */ - --*idx; +/* + * Inode fork format manipulation functions + */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), - LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); +/* + * Transform a btree format file with only one leaf node, where the + * extents list will fit in the inode, into an extents format file. + * Since the file extents are already in-core, all we have to do is + * give up the space for the btree root and pitch the leaf block. + */ +STATIC int /* error */ +xfs_bmap_btree_to_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_btree_cur_t *cur, /* btree cursor */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ +{ + /* REFERENCED */ + struct xfs_btree_block *cblock;/* child btree block */ + xfs_fsblock_t cbno; /* child block number */ + xfs_buf_t *cbp; /* child block's buffer */ + int error; /* error return value */ + xfs_ifork_t *ifp; /* inode fork data */ + xfs_mount_t *mp; /* mount point structure */ + __be64 *pp; /* ptr to block address */ + struct xfs_btree_block *rblock;/* root btree block */ - xfs_iext_remove(ip, *idx + 1, 1, state); - ip->i_d.di_nextents--; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + PREV.br_blockcount, - LEFT.br_state))) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Setting all of a previous oldext extent to newext. - * The right neighbor is contiguous, the left is not. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount + RIGHT.br_blockcount); - xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, *idx + 1, 1, state); - ip->i_d.di_nextents--; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - newext))) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: - /* - * Setting all of a previous oldext extent to newext. - * Neither the left nor right neighbors are contiguous with - * the new one. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - if (cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - newext))) - goto done; - } - break; - - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: - /* - * Setting the first part of a previous oldext extent to newext. - * The left neighbor is contiguous. - */ - trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), - LEFT.br_blockcount + new->br_blockcount); - xfs_bmbt_set_startoff(ep, - PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, - new->br_startblock + new->br_blockcount); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - --*idx; - - if (cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, - PREV.br_startoff + new->br_blockcount, - PREV.br_startblock + new->br_blockcount, - PREV.br_blockcount - new->br_blockcount, - oldext))) - goto done; - if ((error = xfs_btree_decrement(cur, 0, &i))) - goto done; - error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + new->br_blockcount, - LEFT.br_state); - if (error) - goto done; - } - break; - - case BMAP_LEFT_FILLING: - /* - * Setting the first part of a previous oldext extent to newext. - * The left neighbor is not contiguous. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); - xfs_bmbt_set_startoff(ep, new_endoff); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - xfs_bmbt_set_startblock(ep, - new->br_startblock + new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - xfs_iext_insert(ip, *idx, 1, new, state); - ip->i_d.di_nextents++; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, - PREV.br_startoff + new->br_blockcount, - PREV.br_startblock + new->br_blockcount, - PREV.br_blockcount - new->br_blockcount, - oldext))) - goto done; - cur->bc_rec.b = *new; - if ((error = xfs_btree_insert(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; - - case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: - /* - * Setting the last part of a previous oldext extent to newext. - * The right neighbor is contiguous with the new allocation. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - ++*idx; - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), - new->br_startoff, new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - if (cur == NULL) - rval = XFS_ILOG_DEXT; - else { - rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount - new->br_blockcount, - oldext))) - goto done; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto done; - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - newext))) - goto done; - } - break; - - case BMAP_RIGHT_FILLING: - /* - * Setting the last part of a previous oldext extent to newext. - * The right neighbor is not contiguous. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - ++*idx; - xfs_iext_insert(ip, *idx, 1, new, state); - - ip->i_d.di_nextents++; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount - new->br_blockcount, - oldext))) - goto done; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_btree_insert(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; - - case 0: - /* - * Setting the middle part of a previous oldext extent to - * newext. Contiguity is impossible here. - * One extent becomes three extents. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - new->br_startoff - PREV.br_startoff); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - r[0] = *new; - r[1].br_startoff = new_endoff; - r[1].br_blockcount = - PREV.br_startoff + PREV.br_blockcount - new_endoff; - r[1].br_startblock = new->br_startblock + new->br_blockcount; - r[1].br_state = oldext; - - ++*idx; - xfs_iext_insert(ip, *idx, 2, &r[0], state); - - ip->i_d.di_nextents += 2; - if (cur == NULL) - rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; - else { - rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - /* new right extent - oldext */ - if ((error = xfs_bmbt_update(cur, r[1].br_startoff, - r[1].br_startblock, r[1].br_blockcount, - r[1].br_state))) - goto done; - /* new left extent - oldext */ - cur->bc_rec.b = PREV; - cur->bc_rec.b.br_blockcount = - new->br_startoff - PREV.br_startoff; - if ((error = xfs_btree_insert(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - /* - * Reset the cursor to the position of the new extent - * we are about to insert as we can't trust it after - * the previous insert. - */ - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - /* new middle extent - newext */ - cur->bc_rec.b.br_state = new->br_state; - if ((error = xfs_btree_insert(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; - - case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: - case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - case BMAP_LEFT_CONTIG: - case BMAP_RIGHT_CONTIG: - /* - * These cases are all impossible. - */ - ASSERT(0); - } - - /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { - int tmp_logflags; /* partial log flag return val */ - - ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, - 0, &tmp_logflags, XFS_DATA_FORK); - *logflagsp |= tmp_logflags; - if (error) - goto done; - } - - /* clear out the allocated field, done with it now in any case. */ - if (cur) { - cur->bc_private.b.allocated = 0; - *curp = cur; - } - - xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); -done: - *logflagsp |= rval; - return error; -#undef LEFT -#undef RIGHT -#undef PREV -} + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); + rblock = ifp->if_broot; + ASSERT(be16_to_cpu(rblock->bb_level) == 1); + ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); + ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); + cbno = be64_to_cpu(*pp); + *logflagsp = 0; +#ifdef DEBUG + if ((error = xfs_btree_check_lptr(cur, cbno, 1))) + return error; +#endif + error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + return error; + cblock = XFS_BUF_TO_BLOCK(cbp); + if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) + return error; + xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); + ip->i_d.di_nblocks--; + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_binval(tp, cbp); + if (cur->bc_bufs[0] == cbp) + cur->bc_bufs[0] = NULL; + xfs_iroot_realloc(ip, -1, whichfork); + ASSERT(ifp->if_broot == NULL); + ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); + return 0; +} /* - * Convert a hole to a delayed allocation. + * Convert an extents-format file into a btree-format file. + * The new file will have a root block (in the inode) and a single child block. */ -STATIC void -xfs_bmap_add_extent_hole_delay( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t *idx, /* extent number to update/insert */ - xfs_bmbt_irec_t *new) /* new data to add to file extents */ +STATIC int /* error */ +xfs_bmap_extents_to_btree( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first-block-allocated */ + xfs_bmap_free_t *flist, /* blocks freed in xaction */ + xfs_btree_cur_t **curp, /* cursor returned to caller */ + int wasdel, /* converting a delayed alloc */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ { - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_irec_t left; /* left neighbor extent entry */ - xfs_filblks_t newlen=0; /* new indirect size */ - xfs_filblks_t oldlen=0; /* old indirect size */ - xfs_bmbt_irec_t right; /* right neighbor extent entry */ - int state; /* state bits, accessed thru macros */ - xfs_filblks_t temp=0; /* temp for indirect calculations */ + struct xfs_btree_block *ablock; /* allocated (child) bt block */ + xfs_buf_t *abp; /* buffer for ablock */ + xfs_alloc_arg_t args; /* allocation arguments */ + xfs_bmbt_rec_t *arp; /* child record pointer */ + struct xfs_btree_block *block; /* btree root block */ + xfs_btree_cur_t *cur; /* bmap btree cursor */ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + int error; /* error return value */ + xfs_extnum_t i, cnt; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_key_t *kp; /* root block key pointer */ + xfs_mount_t *mp; /* mount structure */ + xfs_extnum_t nextents; /* number of file extents */ + xfs_bmbt_ptr_t *pp; /* root block address pointer */ - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - state = 0; - ASSERT(isnullstartblock(new->br_startblock)); + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); /* - * Check and set flags if this segment has a left neighbor + * Make space in the inode incore. */ - if (*idx > 0) { - state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); - - if (isnullstartblock(left.br_startblock)) - state |= BMAP_LEFT_DELAY; - } + xfs_iroot_realloc(ip, 1, whichfork); + ifp->if_flags |= XFS_IFBROOT; /* - * Check and set flags if the current (right) segment exists. - * If it doesn't exist, we're converting the hole at end-of-file. + * Fill in the root. */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { - state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); + block = ifp->if_broot; + block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); + block->bb_level = cpu_to_be16(1); + block->bb_numrecs = cpu_to_be16(1); + block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); - if (isnullstartblock(right.br_startblock)) - state |= BMAP_RIGHT_DELAY; + /* + * Need a cursor. Can't allocate until bb_level is filled in. + */ + mp = ip->i_mount; + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + /* + * Convert to a btree with two levels, one record in root. + */ + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); + memset(&args, 0, sizeof(args)); + args.tp = tp; + args.mp = mp; + args.firstblock = *firstblock; + if (*firstblock == NULLFSBLOCK) { + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); + } else if (flist->xbf_low) { + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = *firstblock; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.fsbno = *firstblock; + } + args.minlen = args.maxlen = args.prod = 1; + args.wasdel = wasdel; + *logflagsp = 0; + if ((error = xfs_alloc_vextent(&args))) { + xfs_iroot_realloc(ip, -1, whichfork); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; } - /* - * Set contiguity flags on the left and right neighbors. - * Don't let extents get too large, even if the pieces are contiguous. + * Allocation can't fail, the space was reserved. */ - if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && - left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_blockcount + new->br_blockcount <= MAXEXTLEN) - state |= BMAP_LEFT_CONTIG; - - if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && - new->br_startoff + new->br_blockcount == right.br_startoff && - new->br_blockcount + right.br_blockcount <= MAXEXTLEN && - (!(state & BMAP_LEFT_CONTIG) || - (left.br_blockcount + new->br_blockcount + - right.br_blockcount <= MAXEXTLEN))) - state |= BMAP_RIGHT_CONTIG; - + ASSERT(args.fsbno != NULLFSBLOCK); + ASSERT(*firstblock == NULLFSBLOCK || + args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || + (flist->xbf_low && + args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); + *firstblock = cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + ip->i_d.di_nblocks++; + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); + abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); /* - * Switch out based on the contiguity flags. + * Fill in the child block. */ - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with delayed allocations - * on the left and on the right. - * Merge all three into a single extent record. - */ - --*idx; - temp = left.br_blockcount + new->br_blockcount + - right.br_blockcount; - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); - oldlen = startblockval(left.br_startblock) + - startblockval(new->br_startblock) + - startblockval(right.br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), - nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + abp->b_ops = &xfs_bmbt_buf_ops; + ablock = XFS_BUF_TO_BLOCK(abp); + ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); + ablock->bb_level = 0; + ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + for (cnt = i = 0; i < nextents; i++) { + ep = xfs_iext_get_ext(ifp, i); + if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { + arp->l0 = cpu_to_be64(ep->l0); + arp->l1 = cpu_to_be64(ep->l1); + arp++; cnt++; + } + } + ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); + xfs_btree_set_numrecs(ablock, cnt); - xfs_iext_remove(ip, *idx + 1, 1, state); - break; + /* + * Fill in the root key and pointer. + */ + kp = XFS_BMBT_KEY_ADDR(mp, block, 1); + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); + kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, + be16_to_cpu(block->bb_level))); + *pp = cpu_to_be64(args.fsbno); - case BMAP_LEFT_CONTIG: - /* - * New allocation is contiguous with a delayed allocation - * on the left. - * Merge the new allocation with the left neighbor. - */ - --*idx; - temp = left.br_blockcount + new->br_blockcount; + /* + * Do all this logging at the end so that + * the root is at the right level. + */ + xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); + xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); + ASSERT(*curp == NULL); + *curp = cur; + *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); + return 0; +} - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); - oldlen = startblockval(left.br_startblock) + - startblockval(new->br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), - nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - break; +/* + * Convert a local file to an extents file. + * This code is out of bounds for data forks of regular files, + * since the file data needs to get logged so things will stay consistent. + * (The bmap-level manipulations are ok, though). + */ +STATIC int /* error */ +xfs_bmap_local_to_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated in xaction */ + xfs_extlen_t total, /* total blocks needed by transaction */ + int *logflagsp, /* inode logging flags */ + int whichfork, + void (*init_fn)(struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp)) +{ + int error; /* error return value */ + int flags; /* logging flags returned */ + xfs_ifork_t *ifp; /* inode fork pointer */ - case BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with a delayed allocation - * on the right. - * Merge the new allocation with the right neighbor. - */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - temp = new->br_blockcount + right.br_blockcount; - oldlen = startblockval(new->br_startblock) + - startblockval(right.br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), - new->br_startoff, - nullstartblock((int)newlen), temp, right.br_state); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - break; + /* + * We don't want to deal with the case of keeping inode data inline yet. + * So sending the data fork of a regular inode is invalid. + */ + ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + flags = 0; + error = 0; + if (ifp->if_bytes) { + xfs_alloc_arg_t args; /* allocation arguments */ + xfs_buf_t *bp; /* buffer for extent block */ + xfs_bmbt_rec_host_t *ep;/* extent record pointer */ - case 0: - /* - * New allocation is not contiguous with another - * delayed allocation. - * Insert a new entry. - */ - oldlen = newlen = 0; - xfs_iext_insert(ip, *idx, 1, new, state); - break; - } - if (oldlen != newlen) { - ASSERT(oldlen > newlen); - xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(oldlen - newlen), 0); + ASSERT((ifp->if_flags & + (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); + memset(&args, 0, sizeof(args)); + args.tp = tp; + args.mp = ip->i_mount; + args.firstblock = *firstblock; /* - * Nothing to do for disk quota accounting here. + * Allocate a block. We know we need only one, since the + * file currently fits in an inode. */ + if (*firstblock == NULLFSBLOCK) { + args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); + args.type = XFS_ALLOCTYPE_START_BNO; + } else { + args.fsbno = *firstblock; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + } + args.total = total; + args.minlen = args.maxlen = args.prod = 1; + error = xfs_alloc_vextent(&args); + if (error) + goto done; + + /* Can't fail, the space was reserved. */ + ASSERT(args.fsbno != NULLFSBLOCK); + ASSERT(args.len == 1); + *firstblock = args.fsbno; + bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); + + /* initialise the block and copy the data */ + init_fn(bp, ip, ifp); + + /* account for the change in fork size and log everything */ + xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); + xfs_bmap_forkoff_reset(args.mp, ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); + xfs_iext_add(ifp, 0, 1); + ep = xfs_iext_get_ext(ifp, 0); + xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); + trace_xfs_bmap_post_update(ip, 0, + whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, + _THIS_IP_); + XFS_IFORK_NEXT_SET(ip, whichfork, 1); + ip->i_d.di_nblocks = 1; + xfs_trans_mod_dquot_byino(tp, ip, + XFS_TRANS_DQ_BCOUNT, 1L); + flags |= xfs_ilog_fext(whichfork); + } else { + ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); + xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); } + ifp->if_flags &= ~XFS_IFINLINE; + ifp->if_flags |= XFS_IFEXTENTS; + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + flags |= XFS_ILOG_CORE; +done: + *logflagsp = flags; + return error; } /* - * Convert a hole to a real allocation. + * Called from xfs_bmap_add_attrfork to handle btree format files. */ -STATIC int /* error */ -xfs_bmap_add_extent_hole_real( - struct xfs_bmalloca *bma, - int whichfork) +STATIC int /* error */ +xfs_bmap_add_attrfork_btree( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated */ + xfs_bmap_free_t *flist, /* blocks to free at commit */ + int *flags) /* inode logging flags */ { - struct xfs_bmbt_irec *new = &bma->got; - int error; /* error return value */ - int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_irec_t left; /* left neighbor extent entry */ - xfs_bmbt_irec_t right; /* right neighbor extent entry */ - int rval=0; /* return value (logging flags) */ - int state; /* state bits, accessed thru macros */ - - ifp = XFS_IFORK_PTR(bma->ip, whichfork); - - ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); - ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!bma->cur || - !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - - XFS_STATS_INC(xs_add_exlist); - - state = 0; - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; + xfs_btree_cur_t *cur; /* btree cursor */ + int error; /* error return value */ + xfs_mount_t *mp; /* file system mount struct */ + int stat; /* newroot status */ - /* - * Check and set flags if this segment has a left neighbor. - */ - if (bma->idx > 0) { - state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); - if (isnullstartblock(left.br_startblock)) - state |= BMAP_LEFT_DELAY; + mp = ip->i_mount; + if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) + *flags |= XFS_ILOG_DBROOT; + else { + cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); + cur->bc_private.b.flist = flist; + cur->bc_private.b.firstblock = *firstblock; + if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) + goto error0; + /* must be at least one entry */ + XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); + if ((error = xfs_btree_new_iroot(cur, flags, &stat))) + goto error0; + if (stat == 0) { + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return XFS_ERROR(ENOSPC); + } + *firstblock = cur->bc_private.b.firstblock; + cur->bc_private.b.allocated = 0; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); } + return 0; +error0: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} - /* - * Check and set flags if this segment has a current value. - * Not true if we're inserting into the "hole" at eof. - */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { - state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); - if (isnullstartblock(right.br_startblock)) - state |= BMAP_RIGHT_DELAY; +/* + * Called from xfs_bmap_add_attrfork to handle extents format files. + */ +STATIC int /* error */ +xfs_bmap_add_attrfork_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated */ + xfs_bmap_free_t *flist, /* blocks to free at commit */ + int *flags) /* inode logging flags */ +{ + xfs_btree_cur_t *cur; /* bmap btree cursor */ + int error; /* error return value */ + + if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) + return 0; + cur = NULL; + error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, + flags, XFS_DATA_FORK); + if (cur) { + cur->bc_private.b.allocated = 0; + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); } + return error; +} - /* - * We're inserting a real allocation between "left" and "right". - * Set the contiguity flags. Don't let extents get too large. - */ - if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && - left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_startblock + left.br_blockcount == new->br_startblock && - left.br_state == new->br_state && - left.br_blockcount + new->br_blockcount <= MAXEXTLEN) - state |= BMAP_LEFT_CONTIG; +/* + * Block initialisation functions for local to extent format conversion. + * As these get more complex, they will be moved to the relevant files, + * but for now they are too simple to worry about. + */ +STATIC void +xfs_bmap_local_to_extents_init_fn( + struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp) +{ + bp->b_ops = &xfs_bmbt_buf_ops; + memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); +} - if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && - new->br_startoff + new->br_blockcount == right.br_startoff && - new->br_startblock + new->br_blockcount == right.br_startblock && - new->br_state == right.br_state && - new->br_blockcount + right.br_blockcount <= MAXEXTLEN && - (!(state & BMAP_LEFT_CONTIG) || - left.br_blockcount + new->br_blockcount + - right.br_blockcount <= MAXEXTLEN)) - state |= BMAP_RIGHT_CONTIG; +STATIC void +xfs_symlink_local_to_remote( + struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp) +{ + /* remote symlink blocks are not verifiable until CRCs come along */ + bp->b_ops = NULL; + memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); +} - error = 0; - /* - * Select which case we're in here, and implement it. - */ - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with real allocations on the - * left and on the right. - * Merge all three into a single extent record. - */ - --bma->idx; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - left.br_blockcount + new->br_blockcount + - right.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); +/* + * Called from xfs_bmap_add_attrfork to handle local format files. Each + * different data fork content type needs a different callout to do the + * conversion. Some are basic and only require special block initialisation + * callouts for the data formating, others (directories) are so specialised they + * handle everything themselves. + * + * XXX (dgc): investigate whether directory conversion can use the generic + * formatting callout. It should be possible - it's just a very complex + * formatter. it would also require passing the transaction through to the init + * function. + */ +STATIC int /* error */ +xfs_bmap_add_attrfork_local( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated */ + xfs_bmap_free_t *flist, /* blocks to free at commit */ + int *flags) /* inode logging flags */ +{ + xfs_da_args_t dargs; /* args for dir/attr code */ - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) + return 0; - XFS_IFORK_NEXT_SET(bma->ip, whichfork, - XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); - if (bma->cur == NULL) { - rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); - } else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, - right.br_startblock, right.br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_btree_delete(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_btree_decrement(bma->cur, 0, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, left.br_startoff, - left.br_startblock, - left.br_blockcount + - new->br_blockcount + - right.br_blockcount, - left.br_state); - if (error) - goto done; - } - break; + if (S_ISDIR(ip->i_d.di_mode)) { + memset(&dargs, 0, sizeof(dargs)); + dargs.dp = ip; + dargs.firstblock = firstblock; + dargs.flist = flist; + dargs.total = ip->i_mount->m_dirblkfsbs; + dargs.whichfork = XFS_DATA_FORK; + dargs.trans = tp; + return xfs_dir2_sf_to_block(&dargs); + } - case BMAP_LEFT_CONTIG: - /* - * New allocation is contiguous with a real allocation - * on the left. - * Merge the new allocation with the left neighbor. - */ - --bma->idx; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - left.br_blockcount + new->br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + if (S_ISLNK(ip->i_d.di_mode)) + return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, + flags, XFS_DATA_FORK, + xfs_symlink_local_to_remote); - if (bma->cur == NULL) { - rval = xfs_ilog_fext(whichfork); - } else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, - left.br_startblock, left.br_blockcount, - &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, left.br_startoff, - left.br_startblock, - left.br_blockcount + - new->br_blockcount, - left.br_state); - if (error) - goto done; - } - break; + return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, + XFS_DATA_FORK, + xfs_bmap_local_to_extents_init_fn); +} - case BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with a real allocation - * on the right. - * Merge the new allocation with the right neighbor. - */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), - new->br_startoff, new->br_startblock, - new->br_blockcount + right.br_blockcount, - right.br_state); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); +/* + * Convert inode from non-attributed to attributed. + * Must not be in a transaction, ip must not be locked. + */ +int /* error code */ +xfs_bmap_add_attrfork( + xfs_inode_t *ip, /* incore inode pointer */ + int size, /* space new attribute needs */ + int rsvd) /* xact may use reserved blks */ +{ + xfs_fsblock_t firstblock; /* 1st block/ag allocated */ + xfs_bmap_free_t flist; /* freed extent records */ + xfs_mount_t *mp; /* mount structure */ + xfs_trans_t *tp; /* transaction pointer */ + int blks; /* space reservation */ + int version = 1; /* superblock attr version */ + int committed; /* xaction was committed */ + int logflags; /* logging flags */ + int error; /* error return value */ - if (bma->cur == NULL) { - rval = xfs_ilog_fext(whichfork); - } else { - rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, - right.br_startoff, - right.br_startblock, - right.br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - error = xfs_bmbt_update(bma->cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + - right.br_blockcount, - right.br_state); - if (error) - goto done; - } - break; + ASSERT(XFS_IFORK_Q(ip) == 0); - case 0: + mp = ip->i_mount; + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK); + blks = XFS_ADDAFORK_SPACE_RES(mp); + if (rsvd) + tp->t_flags |= XFS_TRANS_RESERVE; + if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT))) + goto error0; + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? + XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : + XFS_QMOPT_RES_REGBLKS); + if (error) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); + return error; + } + if (XFS_IFORK_Q(ip)) + goto error1; + if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* - * New allocation is not contiguous with another - * real allocation. - * Insert a new entry. + * For inodes coming from pre-6.2 filesystems. */ - xfs_iext_insert(bma->ip, bma->idx, 1, new, state); - XFS_IFORK_NEXT_SET(bma->ip, whichfork, - XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); - if (bma->cur == NULL) { - rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); - } else { - rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, - new->br_startoff, - new->br_startblock, - new->br_blockcount, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 0, done); - bma->cur->bc_rec.b.br_state = new->br_state; - error = xfs_btree_insert(bma->cur, &i); - if (error) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - break; + ASSERT(ip->i_d.di_aformat == 0); + ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; } + ASSERT(ip->i_d.di_anextents == 0); - /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(bma->ip, whichfork)) { - int tmp_logflags; /* partial log flag return val */ + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - ASSERT(bma->cur == NULL); - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->flist, &bma->cur, - 0, &tmp_logflags, whichfork); - bma->logflags |= tmp_logflags; - if (error) - goto done; + switch (ip->i_d.di_format) { + case XFS_DINODE_FMT_DEV: + ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; + break; + case XFS_DINODE_FMT_UUID: + ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3; + break; + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); + if (!ip->i_d.di_forkoff) + ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; + else if (mp->m_flags & XFS_MOUNT_ATTR2) + version = 2; + break; + default: + ASSERT(0); + error = XFS_ERROR(EINVAL); + goto error1; } - /* clear out the allocated field, done with it now in any case. */ - if (bma->cur) - bma->cur->bc_private.b.allocated = 0; + ASSERT(ip->i_afp == NULL); + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); + ip->i_afp->if_flags = XFS_IFEXTENTS; + logflags = 0; + xfs_bmap_init(&flist, &firstblock); + switch (ip->i_d.di_format) { + case XFS_DINODE_FMT_LOCAL: + error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, + &logflags); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, + &flist, &logflags); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, + &logflags); + break; + default: + error = 0; + break; + } + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + if (error) + goto error2; + if (!xfs_sb_version_hasattr(&mp->m_sb) || + (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { + __int64_t sbfields = 0; - xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); -done: - bma->logflags |= rval; + spin_lock(&mp->m_sb_lock); + if (!xfs_sb_version_hasattr(&mp->m_sb)) { + xfs_sb_version_addattr(&mp->m_sb); + sbfields |= XFS_SB_VERSIONNUM; + } + if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { + xfs_sb_version_addattr2(&mp->m_sb); + sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); + } + if (sbfields) { + spin_unlock(&mp->m_sb_lock); + xfs_mod_sb(tp, sbfields); + } else + spin_unlock(&mp->m_sb_lock); + } + + error = xfs_bmap_finish(&tp, &flist, &committed); + if (error) + goto error2; + return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +error2: + xfs_bmap_cancel(&flist); +error1: + xfs_iunlock(ip, XFS_ILOCK_EXCL); +error0: + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); return error; } /* - * Adjust the size of the new extent based on di_extsize and rt extsize. - */ -STATIC int -xfs_bmap_extsize_align( - xfs_mount_t *mp, - xfs_bmbt_irec_t *gotp, /* next extent pointer */ - xfs_bmbt_irec_t *prevp, /* previous extent pointer */ - xfs_extlen_t extsz, /* align to this extent size */ - int rt, /* is this a realtime inode? */ - int eof, /* is extent at end-of-file? */ - int delay, /* creating delalloc extent? */ - int convert, /* overwriting unwritten extent? */ - xfs_fileoff_t *offp, /* in/out: aligned offset */ - xfs_extlen_t *lenp) /* in/out: aligned length */ -{ - xfs_fileoff_t orig_off; /* original offset */ - xfs_extlen_t orig_alen; /* original length */ - xfs_fileoff_t orig_end; /* original off+len */ - xfs_fileoff_t nexto; /* next file offset */ - xfs_fileoff_t prevo; /* previous file offset */ - xfs_fileoff_t align_off; /* temp for offset */ - xfs_extlen_t align_alen; /* temp for length */ - xfs_extlen_t temp; /* temp for calculations */ - - if (convert) - return 0; - - orig_off = align_off = *offp; - orig_alen = align_alen = *lenp; - orig_end = orig_off + orig_alen; + * Internal and external extent tree search functions. + */ - /* - * If this request overlaps an existing extent, then don't - * attempt to perform any additional alignment. - */ - if (!delay && !eof && - (orig_off >= gotp->br_startoff) && - (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { - return 0; - } +/* + * Read in the extents to if_extents. + * All inode fields are set up by caller, we just traverse the btree + * and copy the records in. If the file system cannot contain unwritten + * extents, the records are checked for no "state" flags. + */ +int /* error */ +xfs_bmap_read_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + int whichfork) /* data or attr fork */ +{ + struct xfs_btree_block *block; /* current btree block */ + xfs_fsblock_t bno; /* block # of "block" */ + xfs_buf_t *bp; /* buffer for "block" */ + int error; /* error return value */ + xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ + xfs_extnum_t i, j; /* index into the extents list */ + xfs_ifork_t *ifp; /* fork structure */ + int level; /* btree level, for checking */ + xfs_mount_t *mp; /* file system mount structure */ + __be64 *pp; /* pointer to block address */ + /* REFERENCED */ + xfs_extnum_t room; /* number of entries there's room for */ + bno = NULLFSBLOCK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : + XFS_EXTFMT_INODE(ip); + block = ifp->if_broot; /* - * If the file offset is unaligned vs. the extent size - * we need to align it. This will be possible unless - * the file was previously written with a kernel that didn't - * perform this alignment, or if a truncate shot us in the - * foot. + * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ - temp = do_mod(orig_off, extsz); - if (temp) { - align_alen += temp; - align_off -= temp; - } + level = be16_to_cpu(block->bb_level); + ASSERT(level > 0); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + bno = be64_to_cpu(*pp); + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); /* - * Same adjustment for the end of the requested area. + * Go down the tree until leaf level is reached, following the first + * pointer (leftmost) at each level. */ - if ((temp = (align_alen % extsz))) { - align_alen += extsz - temp; + while (level-- > 0) { + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); + if (error) + return error; + block = XFS_BUF_TO_BLOCK(bp); + XFS_WANT_CORRUPTED_GOTO( + xfs_bmap_sanity_check(mp, bp, level), + error0); + if (level == 0) + break; + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + bno = be64_to_cpu(*pp); + XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); + xfs_trans_brelse(tp, bp); } /* - * If the previous block overlaps with this proposed allocation - * then move the start forward without adjusting the length. - */ - if (prevp->br_startoff != NULLFILEOFF) { - if (prevp->br_startblock == HOLESTARTBLOCK) - prevo = prevp->br_startoff; - else - prevo = prevp->br_startoff + prevp->br_blockcount; - } else - prevo = 0; - if (align_off != orig_off && align_off < prevo) - align_off = prevo; - /* - * If the next block overlaps with this proposed allocation - * then move the start back without adjusting the length, - * but not before offset 0. - * This may of course make the start overlap previous block, - * and if we hit the offset 0 limit then the next block - * can still overlap too. + * Here with bp and block set to the leftmost leaf node in the tree. */ - if (!eof && gotp->br_startoff != NULLFILEOFF) { - if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || - (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) - nexto = gotp->br_startoff + gotp->br_blockcount; - else - nexto = gotp->br_startoff; - } else - nexto = NULLFILEOFF; - if (!eof && - align_off + align_alen != orig_end && - align_off + align_alen > nexto) - align_off = nexto > align_alen ? nexto - align_alen : 0; + room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + i = 0; /* - * If we're now overlapping the next or previous extent that - * means we can't fit an extsz piece in this hole. Just move - * the start forward to the first valid spot and set - * the length so we hit the end. + * Loop over all leaf nodes. Copy information to the extent records. */ - if (align_off != orig_off && align_off < prevo) - align_off = prevo; - if (align_off + align_alen != orig_end && - align_off + align_alen > nexto && - nexto != NULLFILEOFF) { - ASSERT(nexto > prevo); - align_alen = nexto - align_off; - } + for (;;) { + xfs_bmbt_rec_t *frp; + xfs_fsblock_t nextbno; + xfs_extnum_t num_recs; + xfs_extnum_t start; - /* - * If realtime, and the result isn't a multiple of the realtime - * extent size we need to remove blocks until it is. - */ - if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { - /* - * We're not covering the original request, or - * we won't be able to once we fix the length. - */ - if (orig_off < align_off || - orig_end > align_off + align_alen || - align_alen - temp < orig_alen) - return XFS_ERROR(EINVAL); - /* - * Try to fix it by moving the start up. - */ - if (align_off + temp <= orig_off) { - align_alen -= temp; - align_off += temp; + num_recs = xfs_btree_get_numrecs(block); + if (unlikely(i + num_recs > room)) { + ASSERT(i + num_recs <= room); + xfs_warn(ip->i_mount, + "corrupt dinode %Lu, (btree extents).", + (unsigned long long) ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", + XFS_ERRLEVEL_LOW, ip->i_mount, block); + goto error0; } + XFS_WANT_CORRUPTED_GOTO( + xfs_bmap_sanity_check(mp, bp, 0), + error0); /* - * Try to fix it by moving the end in. + * Read-ahead the next leaf block, if any. */ - else if (align_off + align_alen - temp >= orig_end) - align_alen -= temp; + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); + if (nextbno != NULLFSBLOCK) + xfs_btree_reada_bufl(mp, nextbno, 1, + &xfs_bmbt_buf_ops); /* - * Set the start to the minimum then trim the length. + * Copy records into the extent records. */ - else { - align_alen -= orig_off - align_off; - align_off = orig_off; - align_alen -= align_alen % mp->m_sb.sb_rextsize; + frp = XFS_BMBT_REC_ADDR(mp, block, 1); + start = i; + for (j = 0; j < num_recs; j++, i++, frp++) { + xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); + trp->l0 = be64_to_cpu(frp->l0); + trp->l1 = be64_to_cpu(frp->l1); + } + if (exntf == XFS_EXTFMT_NOSTATE) { + /* + * Check all attribute bmap btree records and + * any "older" data bmap btree records for a + * set bit in the "extent flag" position. + */ + if (unlikely(xfs_check_nostate_extents(ifp, + start, num_recs))) { + XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", + XFS_ERRLEVEL_LOW, + ip->i_mount); + goto error0; + } } + xfs_trans_brelse(tp, bp); + bno = nextbno; /* - * Result doesn't cover the request, fail it. + * If we've reached the end, stop. */ - if (orig_off < align_off || orig_end > align_off + align_alen) - return XFS_ERROR(EINVAL); - } else { - ASSERT(orig_off >= align_off); - ASSERT(orig_end <= align_off + align_alen); + if (bno == NULLFSBLOCK) + break; + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); + if (error) + return error; + block = XFS_BUF_TO_BLOCK(bp); } + ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); + XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); + return 0; +error0: + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); +} -#ifdef DEBUG - if (!eof && gotp->br_startoff != NULLFILEOFF) - ASSERT(align_off + align_alen <= gotp->br_startoff); - if (prevp->br_startoff != NULLFILEOFF) - ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); + +/* + * Search the extent records for the entry containing block bno. + * If bno lies in a hole, point to the next entry. If bno lies + * past eof, *eofp will be set, and *prevp will contain the last + * entry (null if none). Else, *lastxp will be set to the index + * of the found entry; *gotp will contain the entry. + */ +STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ +xfs_bmap_search_multi_extents( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fileoff_t bno, /* block number searched for */ + int *eofp, /* out: end of file found */ + xfs_extnum_t *lastxp, /* out: last extent index */ + xfs_bmbt_irec_t *gotp, /* out: extent entry found */ + xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ +{ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + xfs_extnum_t lastx; /* last extent index */ + + /* + * Initialize the extent entry structure to catch access to + * uninitialized br_startblock field. + */ + gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; + gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; + gotp->br_state = XFS_EXT_INVALID; +#if XFS_BIG_BLKNOS + gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; +#else + gotp->br_startblock = 0xffffa5a5; #endif + prevp->br_startoff = NULLFILEOFF; - *lenp = align_alen; - *offp = align_off; - return 0; + ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); + if (lastx > 0) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); + } + if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { + xfs_bmbt_get_all(ep, gotp); + *eofp = 0; + } else { + if (lastx > 0) { + *gotp = *prevp; + } + *eofp = 1; + ep = NULL; + } + *lastxp = lastx; + return ep; } -#define XFS_ALLOC_GAP_UNITS 4 - -STATIC void -xfs_bmap_adjacent( - xfs_bmalloca_t *ap) /* bmap alloc argument struct */ +/* + * Search the extents list for the inode, for the extent containing bno. + * If bno lies in a hole, point to the next entry. If bno lies past eof, + * *eofp will be set, and *prevp will contain the last entry (null if none). + * Else, *lastxp will be set to the index of the found + * entry; *gotp will contain the entry. + */ +xfs_bmbt_rec_host_t * /* pointer to found extent entry */ +xfs_bmap_search_extents( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fileoff_t bno, /* block number searched for */ + int fork, /* data or attr fork */ + int *eofp, /* out: end of file found */ + xfs_extnum_t *lastxp, /* out: last extent index */ + xfs_bmbt_irec_t *gotp, /* out: extent entry found */ + xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ { - xfs_fsblock_t adjust; /* adjustment to block numbers */ - xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ - xfs_mount_t *mp; /* mount point structure */ - int nullfb; /* true if ap->firstblock isn't set */ - int rt; /* true if inode is realtime */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ -#define ISVALID(x,y) \ - (rt ? \ - (x) < mp->m_sb.sb_rblocks : \ - XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ - XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) + XFS_STATS_INC(xs_look_exlist); + ifp = XFS_IFORK_PTR(ip, fork); - mp = ap->ip->i_mount; - nullfb = *ap->firstblock == NULLFSBLOCK; - rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); - /* - * If allocating at eof, and there's a previous real block, - * try to use its last block as our starting point. - */ - if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && - !isnullstartblock(ap->prev.br_startblock) && - ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, - ap->prev.br_startblock)) { - ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; - /* - * Adjust for the gap between prevp and us. - */ - adjust = ap->offset - - (ap->prev.br_startoff + ap->prev.br_blockcount); - if (adjust && - ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) - ap->blkno += adjust; + ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); + + if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && + !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { + xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, + "Access to block zero in inode %llu " + "start_block: %llx start_off: %llx " + "blkcnt: %llx extent-state: %x lastx: %x\n", + (unsigned long long)ip->i_ino, + (unsigned long long)gotp->br_startblock, + (unsigned long long)gotp->br_startoff, + (unsigned long long)gotp->br_blockcount, + gotp->br_state, *lastxp); + *lastxp = NULLEXTNUM; + *eofp = 1; + return NULL; } - /* - * If not at eof, then compare the two neighbor blocks. - * Figure out whether either one gives us a good starting point, - * and pick the better one. - */ - else if (!ap->eof) { - xfs_fsblock_t gotbno; /* right side block number */ - xfs_fsblock_t gotdiff=0; /* right side difference */ - xfs_fsblock_t prevbno; /* left side block number */ - xfs_fsblock_t prevdiff=0; /* left side difference */ + return ep; +} +/* + * Returns the file-relative block number of the first unused block(s) + * in the file with at least "len" logically contiguous blocks free. + * This is the lowest-address hole if the file has holes, else the first block + * past the end of file. + * Return 0 if the file is currently local (in-inode). + */ +int /* error */ +xfs_bmap_first_unused( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_extlen_t len, /* size of hole to find */ + xfs_fileoff_t *first_unused, /* unused block */ + int whichfork) /* data or attr fork */ +{ + int error; /* error return value */ + int idx; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_fileoff_t lastaddr; /* last block number seen */ + xfs_fileoff_t lowest; /* lowest useful block */ + xfs_fileoff_t max; /* starting useful block */ + xfs_fileoff_t off; /* offset for this block */ + xfs_extnum_t nextents; /* number of extent entries */ + + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + *first_unused = 0; + return 0; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + lowest = *first_unused; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { + xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); + off = xfs_bmbt_get_startoff(ep); /* - * If there's a previous (left) block, select a requested - * start block based on it. - */ - if (ap->prev.br_startoff != NULLFILEOFF && - !isnullstartblock(ap->prev.br_startblock) && - (prevbno = ap->prev.br_startblock + - ap->prev.br_blockcount) && - ISVALID(prevbno, ap->prev.br_startblock)) { - /* - * Calculate gap to end of previous block. - */ - adjust = prevdiff = ap->offset - - (ap->prev.br_startoff + - ap->prev.br_blockcount); - /* - * Figure the startblock based on the previous block's - * end and the gap size. - * Heuristic! - * If the gap is large relative to the piece we're - * allocating, or using it gives us an invalid block - * number, then just use the end of the previous block. - */ - if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(prevbno + prevdiff, - ap->prev.br_startblock)) - prevbno += adjust; - else - prevdiff += adjust; - /* - * If the firstblock forbids it, can't use it, - * must use default. - */ - if (!rt && !nullfb && - XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) - prevbno = NULLFSBLOCK; - } - /* - * No previous block or can't follow it, just default. - */ - else - prevbno = NULLFSBLOCK; - /* - * If there's a following (right) block, select a requested - * start block based on it. + * See if the hole before this extent will work. */ - if (!isnullstartblock(ap->got.br_startblock)) { - /* - * Calculate gap to start of next block. - */ - adjust = gotdiff = ap->got.br_startoff - ap->offset; - /* - * Figure the startblock based on the next block's - * start and the gap size. - */ - gotbno = ap->got.br_startblock; - /* - * Heuristic! - * If the gap is large relative to the piece we're - * allocating, or using it gives us an invalid block - * number, then just use the start of the next block - * offset by our length. - */ - if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(gotbno - gotdiff, gotbno)) - gotbno -= adjust; - else if (ISVALID(gotbno - ap->length, gotbno)) { - gotbno -= ap->length; - gotdiff += adjust - ap->length; - } else - gotdiff += adjust; - /* - * If the firstblock forbids it, can't use it, - * must use default. - */ - if (!rt && !nullfb && - XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) - gotbno = NULLFSBLOCK; + if (off >= lowest + len && off - max >= len) { + *first_unused = max; + return 0; } - /* - * No next block, just default. - */ + lastaddr = off + xfs_bmbt_get_blockcount(ep); + max = XFS_FILEOFF_MAX(lastaddr, lowest); + } + *first_unused = max; + return 0; +} + +/* + * Returns the file-relative block number of the last block + 1 before + * last_block (input value) in the file. + * This is not based on i_size, it is based on the extent records. + * Returns 0 for local files, as they do not have extent records. + */ +int /* error */ +xfs_bmap_last_before( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_fileoff_t *last_block, /* last block */ + int whichfork) /* data or attr fork */ +{ + xfs_fileoff_t bno; /* input file offset */ + int eof; /* hit end of file */ + xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ + int error; /* error return value */ + xfs_bmbt_irec_t got; /* current extent value */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t lastx; /* last extent used */ + xfs_bmbt_irec_t prev; /* previous extent value */ + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return XFS_ERROR(EIO); + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + *last_block = 0; + return 0; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + bno = *last_block - 1; + ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, + &prev); + if (eof || xfs_bmbt_get_startoff(ep) > bno) { + if (prev.br_startoff == NULLFILEOFF) + *last_block = 0; else - gotbno = NULLFSBLOCK; - /* - * If both valid, pick the better one, else the only good - * one, else ap->rval is already set (to 0 or the inode block). - */ - if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) - ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; - else if (prevbno != NULLFSBLOCK) - ap->blkno = prevbno; - else if (gotbno != NULLFSBLOCK) - ap->blkno = gotbno; + *last_block = prev.br_startoff + prev.br_blockcount; } -#undef ISVALID + /* + * Otherwise *last_block is already the right answer. + */ + return 0; } STATIC int -xfs_bmap_btalloc_nullfb( - struct xfs_bmalloca *ap, - struct xfs_alloc_arg *args, - xfs_extlen_t *blen) +xfs_bmap_last_extent( + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *rec, + int *is_empty) { - struct xfs_mount *mp = ap->ip->i_mount; - struct xfs_perag *pag; - xfs_agnumber_t ag, startag; - int notinit = 0; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); int error; + int nextents; - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) - args->type = XFS_ALLOCTYPE_NEAR_BNO; - else - args->type = XFS_ALLOCTYPE_START_BNO; - args->total = ap->total; + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + + nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *is_empty = 1; + return 0; + } + + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); + *is_empty = 0; + return 0; +} + +/* + * Check the last inode extent to determine whether this allocation will result + * in blocks being allocated at the end of the file. When we allocate new data + * blocks at the end of the file which do not start at the previous data block, + * we will try to align the new blocks at stripe unit boundaries. + * + * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be + * at, or past the EOF. + */ +STATIC int +xfs_bmap_isaeof( + struct xfs_bmalloca *bma, + int whichfork) +{ + struct xfs_bmbt_irec rec; + int is_empty; + int error; + + bma->aeof = 0; + error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, + &is_empty); + if (error || is_empty) + return error; /* - * Search for an allocation group with a single extent large enough - * for the request. If one isn't found, then adjust the minimum - * allocation size to the largest space found. + * Check if we are allocation or past the last extent, or at least into + * the last delayed allocated extent. */ - startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); - if (startag == NULLAGNUMBER) - startag = ag = 0; + bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || + (bma->offset >= rec.br_startoff && + isnullstartblock(rec.br_startblock)); + return 0; +} - pag = xfs_perag_get(mp, ag); - while (*blen < args->maxlen) { - if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, args->tp, ag, - XFS_ALLOC_FLAG_TRYLOCK); - if (error) { - xfs_perag_put(pag); - return error; - } - } +/* + * Check if the endoff is outside the last extent. If so the caller will grow + * the allocation to a stripe unit boundary. All offsets are considered outside + * the end of file for an empty fork, so 1 is returned in *eof in that case. + */ +int +xfs_bmap_eof( + struct xfs_inode *ip, + xfs_fileoff_t endoff, + int whichfork, + int *eof) +{ + struct xfs_bmbt_irec rec; + int error; - /* - * See xfs_alloc_fix_freelist... - */ - if (pag->pagf_init) { - xfs_extlen_t longest; - longest = xfs_alloc_longest_free_extent(mp, pag); - if (*blen < longest) - *blen = longest; - } else - notinit = 1; + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); + if (error || *eof) + return error; - if (xfs_inode_is_filestream(ap->ip)) { - if (*blen >= args->maxlen) - break; + *eof = endoff >= rec.br_startoff + rec.br_blockcount; + return 0; +} - if (ap->userdata) { - /* - * If startag is an invalid AG, we've - * come here once before and - * xfs_filestream_new_ag picked the - * best currently available. - * - * Don't continue looping, since we - * could loop forever. - */ - if (startag == NULLAGNUMBER) - break; +/* + * Returns the file-relative block number of the first block past eof in + * the file. This is not based on i_size, it is based on the extent records. + * Returns 0 for local files, as they do not have extent records. + */ +int +xfs_bmap_last_offset( + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_fileoff_t *last_block, + int whichfork) +{ + struct xfs_bmbt_irec rec; + int is_empty; + int error; + + *last_block = 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) + return 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + return XFS_ERROR(EIO); + + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); + if (error || is_empty) + return error; + + *last_block = rec.br_startoff + rec.br_blockcount; + return 0; +} + +/* + * Returns whether the selected fork of the inode has exactly one + * block or not. For the data fork we check this matches di_size, + * implying the file's range is 0..bsize-1. + */ +int /* 1=>1 block, 0=>otherwise */ +xfs_bmap_one_block( + xfs_inode_t *ip, /* incore inode */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int rval; /* return value */ + xfs_bmbt_irec_t s; /* internal version of extent */ + +#ifndef DEBUG + if (whichfork == XFS_DATA_FORK) + return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; +#endif /* !DEBUG */ + if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) + return 0; + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + return 0; + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ep = xfs_iext_get_ext(ifp, 0); + xfs_bmbt_get_all(ep, &s); + rval = s.br_startoff == 0 && s.br_blockcount == 1; + if (rval && whichfork == XFS_DATA_FORK) + ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); + return rval; +} + +/* + * Extent tree manipulation functions used during allocation. + */ + +/* + * Convert a delayed allocation to a real allocation. + */ +STATIC int /* error */ +xfs_bmap_add_extent_delay_real( + struct xfs_bmalloca *bma) +{ + struct xfs_bmbt_irec *new = &bma->got; + int diff; /* temp value */ + xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ + int error; /* error return value */ + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_fileoff_t new_endoff; /* end offset of new entry */ + xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ + /* left is 0, right is 1, prev is 2 */ + int rval=0; /* return value (logging flags) */ + int state = 0;/* state bits, accessed thru macros */ + xfs_filblks_t da_new; /* new count del alloc blocks used */ + xfs_filblks_t da_old; /* old count del alloc blocks used */ + xfs_filblks_t temp=0; /* value for da_new calculations */ + xfs_filblks_t temp2=0;/* value for da_new calculations */ + int tmp_rval; /* partial logging flags */ + + ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); - error = xfs_filestream_new_ag(ap, &ag); - xfs_perag_put(pag); - if (error) - return error; + ASSERT(bma->idx >= 0); + ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + ASSERT(!bma->cur || + (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - /* loop again to set 'blen'*/ - startag = NULLAGNUMBER; - pag = xfs_perag_get(mp, ag); - continue; - } - } - if (++ag == mp->m_sb.sb_agcount) - ag = 0; - if (ag == startag) - break; - xfs_perag_put(pag); - pag = xfs_perag_get(mp, ag); - } - xfs_perag_put(pag); + XFS_STATS_INC(xs_add_exlist); + +#define LEFT r[0] +#define RIGHT r[1] +#define PREV r[2] /* - * Since the above loop did a BUF_TRYLOCK, it is - * possible that there is space for this request. + * Set up a bunch of variables to make the tests simpler. */ - if (notinit || *blen < ap->minlen) - args->minlen = ap->minlen; + ep = xfs_iext_get_ext(ifp, bma->idx); + xfs_bmbt_get_all(ep, &PREV); + new_endoff = new->br_startoff + new->br_blockcount; + ASSERT(PREV.br_startoff <= new->br_startoff); + ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + + da_old = startblockval(PREV.br_startblock); + da_new = 0; + /* - * If the best seen length is less than the request - * length, use the best as the minimum. + * Set flags determining what part of the previous delayed allocation + * extent is being replaced by a real allocation. */ - else if (*blen < args->maxlen) - args->minlen = *blen; + if (PREV.br_startoff == new->br_startoff) + state |= BMAP_LEFT_FILLING; + if (PREV.br_startoff + PREV.br_blockcount == new_endoff) + state |= BMAP_RIGHT_FILLING; + /* - * Otherwise we've seen an extent as big as maxlen, - * use that as the minimum. + * Check and set flags if this segment has a left neighbor. + * Don't set contiguous if the combined extent would be too large. */ - else - args->minlen = args->maxlen; + if (bma->idx > 0) { + state |= BMAP_LEFT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); + + if (isnullstartblock(LEFT.br_startblock)) + state |= BMAP_LEFT_DELAY; + } + + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && + LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && + LEFT.br_state == new->br_state && + LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; /* - * set the failure fallback case to look in the selected - * AG as the stream may have moved. + * Check and set flags if this segment has a right neighbor. + * Don't set contiguous if the combined extent would be too large. + * Also check for all-three-contiguous being too large. */ - if (xfs_inode_is_filestream(ap->ip)) - ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); + if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); - return 0; -} + if (isnullstartblock(RIGHT.br_startblock)) + state |= BMAP_RIGHT_DELAY; + } -STATIC int -xfs_bmap_btalloc( - xfs_bmalloca_t *ap) /* bmap alloc argument struct */ -{ - xfs_mount_t *mp; /* mount point structure */ - xfs_alloctype_t atype = 0; /* type for allocation routines */ - xfs_extlen_t align; /* minimum allocation alignment */ - xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ - xfs_agnumber_t ag; - xfs_alloc_arg_t args; - xfs_extlen_t blen; - xfs_extlen_t nextminlen = 0; - int nullfb; /* true if ap->firstblock isn't set */ - int isaligned; - int tryagain; - int error; + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new_endoff == RIGHT.br_startoff && + new->br_startblock + new->br_blockcount == RIGHT.br_startblock && + new->br_state == RIGHT.br_state && + new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING)) != + (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING) || + LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount + <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; - ASSERT(ap->length); + error = 0; + /* + * Switch out based on the FILLING and CONTIG state bits. + */ + switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Filling in all of a previously delayed allocation extent. + * The left and right neighbors are both contiguous with new. + */ + bma->idx--; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - mp = ap->ip->i_mount; - align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; - if (unlikely(align)) { - error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, - align, 0, ap->eof, 0, ap->conv, - &ap->offset, &ap->length); - ASSERT(!error); - ASSERT(ap->length); - } - nullfb = *ap->firstblock == NULLFSBLOCK; - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); - if (nullfb) { - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { - ag = xfs_filestream_lookup_ag(ap->ip); - ag = (ag != NULLAGNUMBER) ? ag : 0; - ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); - } else { - ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); + xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); + bma->ip->i_d.di_nextents--; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_btree_delete(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_btree_decrement(bma->cur, 0, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + PREV.br_blockcount + + RIGHT.br_blockcount, LEFT.br_state); + if (error) + goto done; } - } else - ap->blkno = *ap->firstblock; + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + /* + * Filling in all of a previously delayed allocation extent. + * The left neighbor is contiguous, the right is not. + */ + bma->idx--; + + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + LEFT.br_blockcount + PREV.br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + if (bma->cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, LEFT.br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + PREV.br_blockcount, LEFT.br_state); + if (error) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Filling in all of a previously delayed allocation extent. + * The right neighbor is contiguous, the left is not. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, new->br_startblock); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount + RIGHT.br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + if (bma->cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, PREV.br_startoff, + new->br_startblock, + PREV.br_blockcount + + RIGHT.br_blockcount, PREV.br_state); + if (error) + goto done; + } + break; + + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: + /* + * Filling in all of a previously delayed allocation extent. + * Neither the left nor right neighbors are contiguous with + * the new one. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, new->br_startblock); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmap_adjacent(ap); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + break; - /* - * If allowed, use ap->blkno; otherwise must use firstblock since - * it's in the right allocation group. - */ - if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) - ; - else - ap->blkno = *ap->firstblock; - /* - * Normal allocation, done through xfs_alloc_vextent. - */ - tryagain = isaligned = 0; - memset(&args, 0, sizeof(args)); - args.tp = ap->tp; - args.mp = mp; - args.fsbno = ap->blkno; + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: + /* + * Filling in the first part of a previous delayed allocation. + * The left neighbor is contiguous. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), + LEFT.br_blockcount + new->br_blockcount); + xfs_bmbt_set_startoff(ep, + PREV.br_startoff + new->br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); - /* Trim the allocation back to the maximum an AG can fit. */ - args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); - args.firstblock = *ap->firstblock; - blen = 0; - if (nullfb) { - error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); - if (error) - return error; - } else if (ap->flist->xbf_low) { - if (xfs_inode_is_filestream(ap->ip)) - args.type = XFS_ALLOCTYPE_FIRST_AG; - else - args.type = XFS_ALLOCTYPE_START_BNO; - args.total = args.minlen = ap->minlen; - } else { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.total = ap->total; - args.minlen = ap->minlen; - } - /* apply extent size hints if obtained earlier */ - if (unlikely(align)) { - args.prod = align; - if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) - args.mod = (xfs_extlen_t)(args.prod - args.mod); - } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { - args.prod = 1; - args.mod = 0; - } else { - args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; - if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod)))) - args.mod = (xfs_extlen_t)(args.prod - args.mod); - } - /* - * If we are not low on available data blocks, and the - * underlying logical volume manager is a stripe, and - * the file offset is zero then try to allocate data - * blocks on stripe unit boundary. - * NOTE: ap->aeof is only set if the allocation length - * is >= the stripe unit and the allocation offset is - * at the end of file. - */ - if (!ap->flist->xbf_low && ap->aeof) { - if (!ap->offset) { - args.alignment = mp->m_dalign; - atype = args.type; - isaligned = 1; - /* - * Adjust for alignment - */ - if (blen > args.alignment && blen <= args.maxlen) - args.minlen = blen - args.alignment; - args.minalignslop = 0; - } else { - /* - * First try an exact bno allocation. - * If it fails then do a near or start bno - * allocation with alignment turned on. - */ - atype = args.type; - tryagain = 1; - args.type = XFS_ALLOCTYPE_THIS_BNO; - args.alignment = 1; - /* - * Compute the minlen+alignment for the - * next case. Set slop so that the value - * of minlen+alignment+slop doesn't go up - * between the calls. - */ - if (blen > mp->m_dalign && blen <= args.maxlen) - nextminlen = blen - mp->m_dalign; - else - nextminlen = args.minlen; - if (nextminlen + mp->m_dalign > args.minlen + 1) - args.minalignslop = - nextminlen + mp->m_dalign - - args.minlen - 1; - else - args.minalignslop = 0; + temp = PREV.br_blockcount - new->br_blockcount; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + if (bma->cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, LEFT.br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + new->br_blockcount, + LEFT.br_state); + if (error) + goto done; } - } else { - args.alignment = 1; - args.minalignslop = 0; - } - args.minleft = ap->minleft; - args.wasdel = ap->wasdel; - args.isfl = 0; - args.userdata = ap->userdata; - if ((error = xfs_alloc_vextent(&args))) - return error; - if (tryagain && args.fsbno == NULLFSBLOCK) { + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock)); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx--; + break; + + case BMAP_LEFT_FILLING: /* - * Exact allocation failed. Now try with alignment - * turned on. + * Filling in the first part of a previous delayed allocation. + * The left neighbor is not contiguous. */ - args.type = atype; - args.fsbno = ap->blkno; - args.alignment = mp->m_dalign; - args.minlen = nextminlen; - args.minalignslop = 0; - isaligned = 1; - if ((error = xfs_alloc_vextent(&args))) - return error; - } - if (isaligned && args.fsbno == NULLFSBLOCK) { + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startoff(ep, new_endoff); + temp = PREV.br_blockcount - new->br_blockcount; + xfs_bmbt_set_blockcount(ep, temp); + xfs_iext_insert(bma->ip, bma->idx, 1, new, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, + &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock) - + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + ep = xfs_iext_get_ext(ifp, bma->idx + 1); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + break; + + case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* - * allocation failed, so turn off alignment and - * try again. + * Filling in the last part of a previous delayed allocation. + * The right neighbor is contiguous with the new allocation. */ - args.type = atype; - args.fsbno = ap->blkno; - args.alignment = 0; - if ((error = xfs_alloc_vextent(&args))) - return error; - } - if (args.fsbno == NULLFSBLOCK && nullfb && - args.minlen > ap->minlen) { - args.minlen = ap->minlen; - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = ap->blkno; - if ((error = xfs_alloc_vextent(&args))) - return error; - } - if (args.fsbno == NULLFSBLOCK && nullfb) { - args.fsbno = 0; - args.type = XFS_ALLOCTYPE_FIRST_AG; - args.total = ap->minlen; - args.minleft = 0; - if ((error = xfs_alloc_vextent(&args))) - return error; - ap->flist->xbf_low = 1; - } - if (args.fsbno != NULLFSBLOCK) { + temp = PREV.br_blockcount - new->br_blockcount; + trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), + new->br_startoff, new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + RIGHT.br_state); + trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + if (bma->cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + + RIGHT.br_blockcount, + RIGHT.br_state); + if (error) + goto done; + } + + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock)); + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx++; + break; + + case BMAP_RIGHT_FILLING: /* - * check the allocation happened at the same or higher AG than - * the first block that was allocated. + * Filling in the last part of a previous delayed allocation. + * The right neighbor is not contiguous. */ - ASSERT(*ap->firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *ap->firstblock) == - XFS_FSB_TO_AGNO(mp, args.fsbno) || - (ap->flist->xbf_low && - XFS_FSB_TO_AGNO(mp, *ap->firstblock) < - XFS_FSB_TO_AGNO(mp, args.fsbno))); + temp = PREV.br_blockcount - new->br_blockcount; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } - ap->blkno = args.fsbno; - if (*ap->firstblock == NULLFSBLOCK) - *ap->firstblock = args.fsbno; - ASSERT(nullfb || fb_agno == args.agno || - (ap->flist->xbf_low && fb_agno < args.agno)); - ap->length = args.len; - ap->ip->i_d.di_nblocks += args.len; - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - if (ap->wasdel) - ap->ip->i_delayed_blks -= args.len; + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, 1, + &tmp_rval, XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock) - + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + ep = xfs_iext_get_ext(ifp, bma->idx); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx++; + break; + + case 0: /* - * Adjust the disk quota also. This was reserved - * earlier. + * Filling in the middle part of a previous delayed allocation. + * Contiguity is impossible here. + * This case is avoided almost all the time. + * + * We start with a delayed allocation: + * + * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ + * PREV @ idx + * + * and we are allocating: + * +rrrrrrrrrrrrrrrrr+ + * new + * + * and we set it up for insertion as: + * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ + * new + * PREV @ idx LEFT RIGHT + * inserted at idx + 1 */ - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : - XFS_TRANS_DQ_BCOUNT, - (long) args.len); - } else { - ap->blkno = NULLFSBLOCK; - ap->length = 0; - } - return 0; -} + temp = new->br_startoff - PREV.br_startoff; + temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ + LEFT = *new; + RIGHT.br_state = PREV.br_state; + RIGHT.br_startblock = nullstartblock( + (int)xfs_bmap_worst_indlen(bma->ip, temp2)); + RIGHT.br_startoff = new_endoff; + RIGHT.br_blockcount = temp2; + /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ + xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } -/* - * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. - * It figures out where to ask the underlying allocator to put the new extent. - */ -STATIC int -xfs_bmap_alloc( - xfs_bmalloca_t *ap) /* bmap alloc argument struct */ -{ - if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) - return xfs_bmap_rtalloc(ap); - return xfs_bmap_btalloc(ap); -} + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + 1, &tmp_rval, XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + temp = xfs_bmap_worst_indlen(bma->ip, temp); + temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); + diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + if (diff > 0) { + error = xfs_icsb_modify_counters(bma->ip->i_mount, + XFS_SBS_FDBLOCKS, + -((int64_t)diff), 0); + ASSERT(!error); + if (error) + goto done; + } -/* - * Transform a btree format file with only one leaf node, where the - * extents list will fit in the inode, into an extents format file. - * Since the file extents are already in-core, all we have to do is - * give up the space for the btree root and pitch the leaf block. - */ -STATIC int /* error */ -xfs_bmap_btree_to_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_btree_cur_t *cur, /* btree cursor */ - int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ -{ - /* REFERENCED */ - struct xfs_btree_block *cblock;/* child btree block */ - xfs_fsblock_t cbno; /* child block number */ - xfs_buf_t *cbp; /* child block's buffer */ - int error; /* error return value */ - xfs_ifork_t *ifp; /* inode fork data */ - xfs_mount_t *mp; /* mount point structure */ - __be64 *pp; /* ptr to block address */ - struct xfs_btree_block *rblock;/* root btree block */ + ep = xfs_iext_get_ext(ifp, bma->idx); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), + nullstartblock((int)temp2)); + trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(ifp->if_flags & XFS_IFEXTENTS); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); - rblock = ifp->if_broot; - ASSERT(be16_to_cpu(rblock->bb_level) == 1); - ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); - ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); - cbno = be64_to_cpu(*pp); - *logflagsp = 0; -#ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, cbno, 1))) - return error; -#endif - error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - return error; - cblock = XFS_BUF_TO_BLOCK(cbp); - if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) - return error; - xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); - ip->i_d.di_nblocks--; - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(tp, cbp); - if (cur->bc_bufs[0] == cbp) - cur->bc_bufs[0] = NULL; - xfs_iroot_realloc(ip, -1, whichfork); - ASSERT(ifp->if_broot == NULL); - ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); - *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); - return 0; + bma->idx++; + da_new = temp + temp2; + break; + + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_CONTIG: + case BMAP_RIGHT_CONTIG: + /* + * These cases are all impossible. + */ + ASSERT(0); + } + + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(bma->cur == NULL); + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + da_old > 0, &tmp_logflags, XFS_DATA_FORK); + bma->logflags |= tmp_logflags; + if (error) + goto done; + } + + /* adjust for changes in reserved delayed indirect blocks */ + if (da_old || da_new) { + temp = da_new; + if (bma->cur) + temp += bma->cur->bc_private.b.allocated; + ASSERT(temp <= da_old); + if (temp < da_old) + xfs_icsb_modify_counters(bma->ip->i_mount, + XFS_SBS_FDBLOCKS, + (int64_t)(da_old - temp), 0); + } + + /* clear out the allocated field, done with it now in any case. */ + if (bma->cur) + bma->cur->bc_private.b.allocated = 0; + + xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK); +done: + bma->logflags |= rval; + return error; +#undef LEFT +#undef RIGHT +#undef PREV } /* - * Called by xfs_bmapi to update file extent records and the btree - * after removing space (or undoing a delayed allocation). + * Convert an unwritten allocation to a real allocation or vice versa. */ STATIC int /* error */ -xfs_bmap_del_extent( +xfs_bmap_add_extent_unwritten_real( + struct xfs_trans *tp, xfs_inode_t *ip, /* incore inode pointer */ - xfs_trans_t *tp, /* current transaction pointer */ - xfs_extnum_t *idx, /* extent number to update/delete */ + xfs_extnum_t *idx, /* extent number to update/insert */ + xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + xfs_bmbt_irec_t *new, /* new data to add to file extents */ + xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ - xfs_btree_cur_t *cur, /* if null, not a btree */ - xfs_bmbt_irec_t *del, /* data to remove from extents */ - int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ + int *logflagsp) /* inode logging flags */ { - xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ - xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ - xfs_fsblock_t del_endblock=0; /* first block past del */ - xfs_fileoff_t del_endoff; /* first offset past del */ - int delay; /* current block is delayed allocated */ - int do_fx; /* free extent at end of routine */ - xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */ + xfs_btree_cur_t *cur; /* btree cursor */ + xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ int error; /* error return value */ - int flags; /* inode logging flags */ - xfs_bmbt_irec_t got; /* current extent entry */ - xfs_fileoff_t got_endoff; /* first offset past got */ int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_mount_t *mp; /* mount structure */ - xfs_filblks_t nblks; /* quota/sb block count */ - xfs_bmbt_irec_t new; /* new record to be inserted */ - /* REFERENCED */ - uint qfield; /* quota field to update */ - xfs_filblks_t temp; /* for indirect length calculations */ - xfs_filblks_t temp2; /* for indirect length calculations */ - int state = 0; + xfs_fileoff_t new_endoff; /* end offset of new entry */ + xfs_exntst_t newext; /* new extent state */ + xfs_exntst_t oldext; /* old extent state */ + xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ + /* left is 0, right is 1, prev is 2 */ + int rval=0; /* return value (logging flags) */ + int state = 0;/* state bits, accessed thru macros */ - XFS_STATS_INC(xs_del_exlist); + *logflagsp = 0; - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; + cur = *curp; + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(del->br_blockcount > 0); - ep = xfs_iext_get_ext(ifp, *idx); - xfs_bmbt_get_all(ep, &got); - ASSERT(got.br_startoff <= del->br_startoff); - del_endoff = del->br_startoff + del->br_blockcount; - got_endoff = got.br_startoff + got.br_blockcount; - ASSERT(got_endoff >= del_endoff); - delay = isnullstartblock(got.br_startblock); - ASSERT(isnullstartblock(del->br_startblock) == delay); - flags = 0; - qfield = 0; + ASSERT(*idx >= 0); + ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + + XFS_STATS_INC(xs_add_exlist); + +#define LEFT r[0] +#define RIGHT r[1] +#define PREV r[2] + + /* + * Set up a bunch of variables to make the tests simpler. + */ error = 0; + ep = xfs_iext_get_ext(ifp, *idx); + xfs_bmbt_get_all(ep, &PREV); + newext = new->br_state; + oldext = (newext == XFS_EXT_UNWRITTEN) ? + XFS_EXT_NORM : XFS_EXT_UNWRITTEN; + ASSERT(PREV.br_state == oldext); + new_endoff = new->br_startoff + new->br_blockcount; + ASSERT(PREV.br_startoff <= new->br_startoff); + ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + /* - * If deleting a real allocation, must free up the disk space. + * Set flags determining what part of the previous oldext allocation + * extent is being replaced by a newext allocation. */ - if (!delay) { - flags = XFS_ILOG_CORE; - /* - * Realtime allocation. Free it and record di_nblocks update. - */ - if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_fsblock_t bno; - xfs_filblks_t len; + if (PREV.br_startoff == new->br_startoff) + state |= BMAP_LEFT_FILLING; + if (PREV.br_startoff + PREV.br_blockcount == new_endoff) + state |= BMAP_RIGHT_FILLING; - ASSERT(do_mod(del->br_blockcount, - mp->m_sb.sb_rextsize) == 0); - ASSERT(do_mod(del->br_startblock, - mp->m_sb.sb_rextsize) == 0); - bno = del->br_startblock; - len = del->br_blockcount; - do_div(bno, mp->m_sb.sb_rextsize); - do_div(len, mp->m_sb.sb_rextsize); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) - goto done; - do_fx = 0; - nblks = len * mp->m_sb.sb_rextsize; - qfield = XFS_TRANS_DQ_RTBCOUNT; - } - /* - * Ordinary allocation. - */ - else { - do_fx = 1; - nblks = del->br_blockcount; - qfield = XFS_TRANS_DQ_BCOUNT; - } - /* - * Set up del_endblock and cur for later. - */ - del_endblock = del->br_startblock + del->br_blockcount; - if (cur) { - if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, got.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } - da_old = da_new = 0; - } else { - da_old = startblockval(got.br_startblock); - da_new = 0; - nblks = 0; - do_fx = 0; + /* + * Check and set flags if this segment has a left neighbor. + * Don't set contiguous if the combined extent would be too large. + */ + if (*idx > 0) { + state |= BMAP_LEFT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); + + if (isnullstartblock(LEFT.br_startblock)) + state |= BMAP_LEFT_DELAY; } + + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && + LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && + LEFT.br_state == newext && + LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + /* - * Set flag value to use in switch statement. - * Left-contig is 2, right-contig is 1. + * Check and set flags if this segment has a right neighbor. + * Don't set contiguous if the combined extent would be too large. + * Also check for all-three-contiguous being too large. */ - switch (((got.br_startoff == del->br_startoff) << 1) | - (got_endoff == del_endoff)) { - case 3: - /* - * Matches the whole extent. Delete the entry. - */ - xfs_iext_remove(ip, *idx, 1, - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); - --*idx; - if (delay) - break; + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); + if (isnullstartblock(RIGHT.br_startblock)) + state |= BMAP_RIGHT_DELAY; + } - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); - flags |= XFS_ILOG_CORE; - if (!cur) { - flags |= xfs_ilog_fext(whichfork); - break; - } - if ((error = xfs_btree_delete(cur, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - break; + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new_endoff == RIGHT.br_startoff && + new->br_startblock + new->br_blockcount == RIGHT.br_startblock && + newext == RIGHT.br_state && + new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING)) != + (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | + BMAP_RIGHT_FILLING) || + LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount + <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; - case 2: + /* + * Switch out based on the FILLING and CONTIG state bits. + */ + switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | + BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* - * Deleting the first part of the extent. + * Setting all of a previous oldext extent to newext. + * The left and right neighbors are both contiguous with new. */ + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, del_endoff); - temp = got.br_blockcount - del->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - da_new = temp; - break; - } - xfs_bmbt_set_startblock(ep, del_endblock); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (!cur) { - flags |= xfs_ilog_fext(whichfork); - break; + + xfs_iext_remove(ip, *idx + 1, 2, state); + ip->i_d.di_nextents -= 2; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount, LEFT.br_state))) + goto done; } - if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, - got.br_blockcount - del->br_blockcount, - got.br_state))) - goto done; break; - case 1: + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: /* - * Deleting the last part of the extent. + * Setting all of a previous oldext extent to newext. + * The left neighbor is contiguous, the right is not. */ - temp = got.br_blockcount - del->br_blockcount; + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - da_new = temp; - break; - } + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), + LEFT.br_blockcount + PREV.br_blockcount); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (!cur) { - flags |= xfs_ilog_fext(whichfork); - break; + + xfs_iext_remove(ip, *idx + 1, 1, state); + ip->i_d.di_nextents--; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + PREV.br_blockcount, + LEFT.br_state))) + goto done; } - if ((error = xfs_bmbt_update(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount - del->br_blockcount, - got.br_state))) - goto done; break; - case 0: + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: /* - * Deleting the middle of the extent. + * Setting all of a previous oldext extent to newext. + * The right neighbor is contiguous, the left is not. */ - temp = del->br_startoff - got.br_startoff; trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - new.br_startoff = del_endoff; - temp2 = got_endoff - del_endoff; - new.br_blockcount = temp2; - new.br_state = got.br_state; - if (!delay) { - new.br_startblock = del_endblock; - flags |= XFS_ILOG_CORE; - if (cur) { - if ((error = xfs_bmbt_update(cur, - got.br_startoff, - got.br_startblock, temp, - got.br_state))) - goto done; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto done; - cur->bc_rec.b = new; - error = xfs_btree_insert(cur, &i); - if (error && error != ENOSPC) - goto done; - /* - * If get no-space back from btree insert, - * it tried a split, and we have a zero - * block reservation. - * Fix up our state and return the error. - */ - if (error == ENOSPC) { - /* - * Reset the cursor, don't trust - * it after any insert operation. - */ - if ((error = xfs_bmbt_lookup_eq(cur, - got.br_startoff, - got.br_startblock, - temp, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - /* - * Update the btree record back - * to the original value. - */ - if ((error = xfs_bmbt_update(cur, - got.br_startoff, - got.br_startblock, - got.br_blockcount, - got.br_state))) - goto done; - /* - * Reset the extent record back - * to the original value. - */ - xfs_bmbt_set_blockcount(ep, - got.br_blockcount); - flags = 0; - error = XFS_ERROR(ENOSPC); - goto done; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, done); - } else - flags |= xfs_ilog_fext(whichfork); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); - } else { - ASSERT(whichfork == XFS_DATA_FORK); - temp = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - temp2 = xfs_bmap_worst_indlen(ip, temp2); - new.br_startblock = nullstartblock((int)temp2); - da_new = temp + temp2; - while (da_new > da_old) { - if (temp) { - temp--; - da_new--; - xfs_bmbt_set_startblock(ep, - nullstartblock((int)temp)); - } - if (da_new == da_old) - break; - if (temp2) { - temp2--; - da_new--; - new.br_startblock = - nullstartblock((int)temp2); - } - } - } + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount + RIGHT.br_blockcount); + xfs_bmbt_set_state(ep, newext); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_insert(ip, *idx + 1, 1, &new, state); - ++*idx; - break; - } - /* - * If we need to, add to list of extents to delete. - */ - if (do_fx) - xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist, - mp); - /* - * Adjust inode # blocks in the file. - */ - if (nblks) - ip->i_d.di_nblocks -= nblks; - /* - * Adjust quota data. - */ - if (qfield) - xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); - - /* - * Account for change in delayed indirect blocks. - * Nothing to do for disk quota accounting here. - */ - ASSERT(da_old >= da_new); - if (da_old > da_new) { - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - da_new), 0); - } -done: - *logflagsp = flags; - return error; -} + xfs_iext_remove(ip, *idx + 1, 1, state); + ip->i_d.di_nextents--; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + newext))) + goto done; + } + break; -/* - * Remove the entry "free" from the free item list. Prev points to the - * previous entry, unless "free" is the head of the list. - */ -void -xfs_bmap_del_free( - xfs_bmap_free_t *flist, /* free item list header */ - xfs_bmap_free_item_t *prev, /* previous item on list, if any */ - xfs_bmap_free_item_t *free) /* list item to be freed */ -{ - if (prev) - prev->xbfi_next = free->xbfi_next; - else - flist->xbf_first = free->xbfi_next; - flist->xbf_count--; - kmem_zone_free(xfs_bmap_free_item_zone, free); -} + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: + /* + * Setting all of a previous oldext extent to newext. + * Neither the left nor right neighbors are contiguous with + * the new one. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_state(ep, newext); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); -/* - * Convert an extents-format file into a btree-format file. - * The new file will have a root block (in the inode) and a single child block. - */ -STATIC int /* error */ -xfs_bmap_extents_to_btree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first-block-allocated */ - xfs_bmap_free_t *flist, /* blocks freed in xaction */ - xfs_btree_cur_t **curp, /* cursor returned to caller */ - int wasdel, /* converting a delayed alloc */ - int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ -{ - struct xfs_btree_block *ablock; /* allocated (child) bt block */ - xfs_buf_t *abp; /* buffer for ablock */ - xfs_alloc_arg_t args; /* allocation arguments */ - xfs_bmbt_rec_t *arp; /* child record pointer */ - struct xfs_btree_block *block; /* btree root block */ - xfs_btree_cur_t *cur; /* bmap btree cursor */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - int error; /* error return value */ - xfs_extnum_t i, cnt; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_key_t *kp; /* root block key pointer */ - xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_bmbt_ptr_t *pp; /* root block address pointer */ + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + newext))) + goto done; + } + break; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: + /* + * Setting the first part of a previous oldext extent to newext. + * The left neighbor is contiguous. + */ + trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), + LEFT.br_blockcount + new->br_blockcount); + xfs_bmbt_set_startoff(ep, + PREV.br_startoff + new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); - /* - * Make space in the inode incore. - */ - xfs_iroot_realloc(ip, 1, whichfork); - ifp->if_flags |= XFS_IFBROOT; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, + new->br_startblock + new->br_blockcount); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - /* - * Fill in the root. - */ - block = ifp->if_broot; - block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); - block->bb_level = cpu_to_be16(1); - block->bb_numrecs = cpu_to_be16(1); - block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); - block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); + --*idx; - /* - * Need a cursor. Can't allocate until bb_level is filled in. - */ - mp = ip->i_mount; - cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.flist = flist; - cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; - /* - * Convert to a btree with two levels, one record in root. - */ - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); - memset(&args, 0, sizeof(args)); - args.tp = tp; - args.mp = mp; - args.firstblock = *firstblock; - if (*firstblock == NULLFSBLOCK) { - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); - } else if (flist->xbf_low) { - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = *firstblock; - } else { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.fsbno = *firstblock; - } - args.minlen = args.maxlen = args.prod = 1; - args.wasdel = wasdel; - *logflagsp = 0; - if ((error = xfs_alloc_vextent(&args))) { - xfs_iroot_realloc(ip, -1, whichfork); - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; - } - /* - * Allocation can't fail, the space was reserved. - */ - ASSERT(args.fsbno != NULLFSBLOCK); - ASSERT(*firstblock == NULLFSBLOCK || - args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || - (flist->xbf_low && - args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); - *firstblock = cur->bc_private.b.firstblock = args.fsbno; - cur->bc_private.b.allocated++; - ip->i_d.di_nblocks++; - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); - abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); - /* - * Fill in the child block. - */ - abp->b_ops = &xfs_bmbt_buf_ops; - ablock = XFS_BUF_TO_BLOCK(abp); - ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); - ablock->bb_level = 0; - ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); - ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - for (cnt = i = 0; i < nextents; i++) { - ep = xfs_iext_get_ext(ifp, i); - if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { - arp->l0 = cpu_to_be64(ep->l0); - arp->l1 = cpu_to_be64(ep->l1); - arp++; cnt++; + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, + PREV.br_startoff + new->br_blockcount, + PREV.br_startblock + new->br_blockcount, + PREV.br_blockcount - new->br_blockcount, + oldext))) + goto done; + if ((error = xfs_btree_decrement(cur, 0, &i))) + goto done; + error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + new->br_blockcount, + LEFT.br_state); + if (error) + goto done; } - } - ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); - xfs_btree_set_numrecs(ablock, cnt); - - /* - * Fill in the root key and pointer. - */ - kp = XFS_BMBT_KEY_ADDR(mp, block, 1); - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, - be16_to_cpu(block->bb_level))); - *pp = cpu_to_be64(args.fsbno); + break; - /* - * Do all this logging at the end so that - * the root is at the right level. - */ - xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); - xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); - ASSERT(*curp == NULL); - *curp = cur; - *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); - return 0; -} + case BMAP_LEFT_FILLING: + /* + * Setting the first part of a previous oldext extent to newext. + * The left neighbor is not contiguous. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); + xfs_bmbt_set_startoff(ep, new_endoff); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + xfs_bmbt_set_startblock(ep, + new->br_startblock + new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); -/* - * Calculate the default attribute fork offset for newly created inodes. - */ -uint -xfs_default_attroffset( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - uint offset; + xfs_iext_insert(ip, *idx, 1, new, state); + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, + PREV.br_startoff + new->br_blockcount, + PREV.br_startblock + new->br_blockcount, + PREV.br_blockcount - new->br_blockcount, + oldext))) + goto done; + cur->bc_rec.b = *new; + if ((error = xfs_btree_insert(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + break; - if (mp->m_sb.sb_inodesize == 256) { - offset = XFS_LITINO(mp) - - XFS_BMDR_SPACE_CALC(MINABTPTRS); - } else { - offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); - } + case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: + /* + * Setting the last part of a previous oldext extent to newext. + * The right neighbor is contiguous with the new allocation. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ASSERT(offset < XFS_LITINO(mp)); - return offset; -} + ++*idx; -/* - * Helper routine to reset inode di_forkoff field when switching - * attribute fork from local to extent format - we reset it where - * possible to make space available for inline data fork extents. - */ -STATIC void -xfs_bmap_forkoff_reset( - xfs_mount_t *mp, - xfs_inode_t *ip, - int whichfork) -{ - if (whichfork == XFS_ATTR_FORK && - ip->i_d.di_format != XFS_DINODE_FMT_DEV && - ip->i_d.di_format != XFS_DINODE_FMT_UUID && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { - uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, newext); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (dfl_forkoff > ip->i_d.di_forkoff) - ip->i_d.di_forkoff = dfl_forkoff; - } -} + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount - new->br_blockcount, + oldext))) + goto done; + if ((error = xfs_btree_increment(cur, 0, &i))) + goto done; + if ((error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + newext))) + goto done; + } + break; -/* - * Convert a local file to an extents file. - * This code is out of bounds for data forks of regular files, - * since the file data needs to get logged so things will stay consistent. - * (The bmap-level manipulations are ok, though). - */ -STATIC int /* error */ -xfs_bmap_local_to_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated in xaction */ - xfs_extlen_t total, /* total blocks needed by transaction */ - int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ -{ - int error; /* error return value */ - int flags; /* logging flags returned */ - xfs_ifork_t *ifp; /* inode fork pointer */ + case BMAP_RIGHT_FILLING: + /* + * Setting the last part of a previous oldext extent to newext. + * The right neighbor is not contiguous. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - /* - * We don't want to deal with the case of keeping inode data inline yet. - * So sending the data fork of a regular inode is invalid. - */ - ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); - flags = 0; - error = 0; - if (ifp->if_bytes) { - xfs_alloc_arg_t args; /* allocation arguments */ - xfs_buf_t *bp; /* buffer for extent block */ - xfs_bmbt_rec_host_t *ep;/* extent record pointer */ + ++*idx; + xfs_iext_insert(ip, *idx, 1, new, state); - memset(&args, 0, sizeof(args)); - args.tp = tp; - args.mp = ip->i_mount; - args.firstblock = *firstblock; - ASSERT((ifp->if_flags & - (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + if ((error = xfs_bmbt_update(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount - new->br_blockcount, + oldext))) + goto done; + if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + cur->bc_rec.b.br_state = XFS_EXT_NORM; + if ((error = xfs_btree_insert(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } + break; + + case 0: /* - * Allocate a block. We know we need only one, since the - * file currently fits in an inode. + * Setting the middle part of a previous oldext extent to + * newext. Contiguity is impossible here. + * One extent becomes three extents. */ - if (*firstblock == NULLFSBLOCK) { - args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); - args.type = XFS_ALLOCTYPE_START_BNO; - } else { - args.fsbno = *firstblock; - args.type = XFS_ALLOCTYPE_NEAR_BNO; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, + new->br_startoff - PREV.br_startoff); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + r[0] = *new; + r[1].br_startoff = new_endoff; + r[1].br_blockcount = + PREV.br_startoff + PREV.br_blockcount - new_endoff; + r[1].br_startblock = new->br_startblock + new->br_blockcount; + r[1].br_state = oldext; + + ++*idx; + xfs_iext_insert(ip, *idx, 2, &r[0], state); + + ip->i_d.di_nextents += 2; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + /* new right extent - oldext */ + if ((error = xfs_bmbt_update(cur, r[1].br_startoff, + r[1].br_startblock, r[1].br_blockcount, + r[1].br_state))) + goto done; + /* new left extent - oldext */ + cur->bc_rec.b = PREV; + cur->bc_rec.b.br_blockcount = + new->br_startoff - PREV.br_startoff; + if ((error = xfs_btree_insert(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + /* + * Reset the cursor to the position of the new extent + * we are about to insert as we can't trust it after + * the previous insert. + */ + if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + /* new middle extent - newext */ + cur->bc_rec.b.br_state = new->br_state; + if ((error = xfs_btree_insert(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - args.total = total; - args.minlen = args.maxlen = args.prod = 1; - if ((error = xfs_alloc_vextent(&args))) - goto done; + break; + + case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + case BMAP_LEFT_CONTIG: + case BMAP_RIGHT_CONTIG: /* - * Can't fail, the space was reserved. + * These cases are all impossible. */ - ASSERT(args.fsbno != NULLFSBLOCK); - ASSERT(args.len == 1); - *firstblock = args.fsbno; - bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - bp->b_ops = &xfs_bmbt_buf_ops; - memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); - xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); - xfs_bmap_forkoff_reset(args.mp, ip, whichfork); - xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); - xfs_iext_add(ifp, 0, 1); - ep = xfs_iext_get_ext(ifp, 0); - xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); - trace_xfs_bmap_post_update(ip, 0, - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, - _THIS_IP_); - XFS_IFORK_NEXT_SET(ip, whichfork, 1); - ip->i_d.di_nblocks = 1; - xfs_trans_mod_dquot_byino(tp, ip, - XFS_TRANS_DQ_BCOUNT, 1L); - flags |= xfs_ilog_fext(whichfork); - } else { - ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); - xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); + ASSERT(0); + } + + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, + 0, &tmp_logflags, XFS_DATA_FORK); + *logflagsp |= tmp_logflags; + if (error) + goto done; } - ifp->if_flags &= ~XFS_IFINLINE; - ifp->if_flags |= XFS_IFEXTENTS; - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); - flags |= XFS_ILOG_CORE; + + /* clear out the allocated field, done with it now in any case. */ + if (cur) { + cur->bc_private.b.allocated = 0; + *curp = cur; + } + + xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); done: - *logflagsp = flags; + *logflagsp |= rval; return error; +#undef LEFT +#undef RIGHT +#undef PREV } /* - * Search the extent records for the entry containing block bno. - * If bno lies in a hole, point to the next entry. If bno lies - * past eof, *eofp will be set, and *prevp will contain the last - * entry (null if none). Else, *lastxp will be set to the index - * of the found entry; *gotp will contain the entry. + * Convert a hole to a delayed allocation. */ -STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ -xfs_bmap_search_multi_extents( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fileoff_t bno, /* block number searched for */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ +STATIC void +xfs_bmap_add_extent_hole_delay( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t *idx, /* extent number to update/insert */ + xfs_bmbt_irec_t *new) /* new data to add to file extents */ { - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - xfs_extnum_t lastx; /* last extent index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_irec_t left; /* left neighbor extent entry */ + xfs_filblks_t newlen=0; /* new indirect size */ + xfs_filblks_t oldlen=0; /* old indirect size */ + xfs_bmbt_irec_t right; /* right neighbor extent entry */ + int state; /* state bits, accessed thru macros */ + xfs_filblks_t temp=0; /* temp for indirect calculations */ + + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + state = 0; + ASSERT(isnullstartblock(new->br_startblock)); /* - * Initialize the extent entry structure to catch access to - * uninitialized br_startblock field. + * Check and set flags if this segment has a left neighbor */ - gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; - gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; - gotp->br_state = XFS_EXT_INVALID; -#if XFS_BIG_BLKNOS - gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; -#else - gotp->br_startblock = 0xffffa5a5; -#endif - prevp->br_startoff = NULLFILEOFF; + if (*idx > 0) { + state |= BMAP_LEFT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); - ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); - if (lastx > 0) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); + if (isnullstartblock(left.br_startblock)) + state |= BMAP_LEFT_DELAY; } - if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { - xfs_bmbt_get_all(ep, gotp); - *eofp = 0; - } else { - if (lastx > 0) { - *gotp = *prevp; - } - *eofp = 1; - ep = NULL; + + /* + * Check and set flags if the current (right) segment exists. + * If it doesn't exist, we're converting the hole at end-of-file. + */ + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); + + if (isnullstartblock(right.br_startblock)) + state |= BMAP_RIGHT_DELAY; + } + + /* + * Set contiguity flags on the left and right neighbors. + * Don't let extents get too large, even if the pieces are contiguous. + */ + if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && + left.br_startoff + left.br_blockcount == new->br_startoff && + left.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; + + if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && + new->br_startoff + new->br_blockcount == right.br_startoff && + new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + (!(state & BMAP_LEFT_CONTIG) || + (left.br_blockcount + new->br_blockcount + + right.br_blockcount <= MAXEXTLEN))) + state |= BMAP_RIGHT_CONTIG; + + /* + * Switch out based on the contiguity flags. + */ + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * New allocation is contiguous with delayed allocations + * on the left and on the right. + * Merge all three into a single extent record. + */ + --*idx; + temp = left.br_blockcount + new->br_blockcount + + right.br_blockcount; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); + oldlen = startblockval(left.br_startblock) + + startblockval(new->br_startblock) + + startblockval(right.br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), + nullstartblock((int)newlen)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_remove(ip, *idx + 1, 1, state); + break; + + case BMAP_LEFT_CONTIG: + /* + * New allocation is contiguous with a delayed allocation + * on the left. + * Merge the new allocation with the left neighbor. + */ + --*idx; + temp = left.br_blockcount + new->br_blockcount; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); + oldlen = startblockval(left.br_startblock) + + startblockval(new->br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), + nullstartblock((int)newlen)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + + case BMAP_RIGHT_CONTIG: + /* + * New allocation is contiguous with a delayed allocation + * on the right. + * Merge the new allocation with the right neighbor. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + temp = new->br_blockcount + right.br_blockcount; + oldlen = startblockval(new->br_startblock) + + startblockval(right.br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, + nullstartblock((int)newlen), temp, right.br_state); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + + case 0: + /* + * New allocation is not contiguous with another + * delayed allocation. + * Insert a new entry. + */ + oldlen = newlen = 0; + xfs_iext_insert(ip, *idx, 1, new, state); + break; + } + if (oldlen != newlen) { + ASSERT(oldlen > newlen); + xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, + (int64_t)(oldlen - newlen), 0); + /* + * Nothing to do for disk quota accounting here. + */ } - *lastxp = lastx; - return ep; } /* - * Search the extents list for the inode, for the extent containing bno. - * If bno lies in a hole, point to the next entry. If bno lies past eof, - * *eofp will be set, and *prevp will contain the last entry (null if none). - * Else, *lastxp will be set to the index of the found - * entry; *gotp will contain the entry. + * Convert a hole to a real allocation. */ -xfs_bmbt_rec_host_t * /* pointer to found extent entry */ -xfs_bmap_search_extents( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fileoff_t bno, /* block number searched for */ - int fork, /* data or attr fork */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ +STATIC int /* error */ +xfs_bmap_add_extent_hole_real( + struct xfs_bmalloca *bma, + int whichfork) { - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + struct xfs_bmbt_irec *new = &bma->got; + int error; /* error return value */ + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_irec_t left; /* left neighbor extent entry */ + xfs_bmbt_irec_t right; /* right neighbor extent entry */ + int rval=0; /* return value (logging flags) */ + int state; /* state bits, accessed thru macros */ - XFS_STATS_INC(xs_look_exlist); - ifp = XFS_IFORK_PTR(ip, fork); + ifp = XFS_IFORK_PTR(bma->ip, whichfork); - ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); + ASSERT(bma->idx >= 0); + ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + ASSERT(!bma->cur || + !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && - !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { - xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, - "Access to block zero in inode %llu " - "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x lastx: %x\n", - (unsigned long long)ip->i_ino, - (unsigned long long)gotp->br_startblock, - (unsigned long long)gotp->br_startoff, - (unsigned long long)gotp->br_blockcount, - gotp->br_state, *lastxp); - *lastxp = NULLEXTNUM; - *eofp = 1; - return NULL; + XFS_STATS_INC(xs_add_exlist); + + state = 0; + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; + + /* + * Check and set flags if this segment has a left neighbor. + */ + if (bma->idx > 0) { + state |= BMAP_LEFT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); + if (isnullstartblock(left.br_startblock)) + state |= BMAP_LEFT_DELAY; } - return ep; -} - -/* - * Compute the worst-case number of indirect blocks that will be used - * for ip's delayed extent of length "len". - */ -STATIC xfs_filblks_t -xfs_bmap_worst_indlen( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_filblks_t len) /* delayed extent length */ -{ - int level; /* btree level number */ - int maxrecs; /* maximum record count at this level */ - xfs_mount_t *mp; /* mount structure */ - xfs_filblks_t rval; /* return value */ - mp = ip->i_mount; - maxrecs = mp->m_bmap_dmxr[0]; - for (level = 0, rval = 0; - level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); - level++) { - len += maxrecs - 1; - do_div(len, maxrecs); - rval += len; - if (len == 1) - return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - - level - 1; - if (level == 0) - maxrecs = mp->m_bmap_dmxr[1]; + /* + * Check and set flags if this segment has a current value. + * Not true if we're inserting into the "hole" at eof. + */ + if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + state |= BMAP_RIGHT_VALID; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); + if (isnullstartblock(right.br_startblock)) + state |= BMAP_RIGHT_DELAY; } - return rval; -} -/* - * Convert inode from non-attributed to attributed. - * Must not be in a transaction, ip must not be locked. - */ -int /* error code */ -xfs_bmap_add_attrfork( - xfs_inode_t *ip, /* incore inode pointer */ - int size, /* space new attribute needs */ - int rsvd) /* xact may use reserved blks */ -{ - xfs_fsblock_t firstblock; /* 1st block/ag allocated */ - xfs_bmap_free_t flist; /* freed extent records */ - xfs_mount_t *mp; /* mount structure */ - xfs_trans_t *tp; /* transaction pointer */ - int blks; /* space reservation */ - int version = 1; /* superblock attr version */ - int committed; /* xaction was committed */ - int logflags; /* logging flags */ - int error; /* error return value */ + /* + * We're inserting a real allocation between "left" and "right". + * Set the contiguity flags. Don't let extents get too large. + */ + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && + left.br_startoff + left.br_blockcount == new->br_startoff && + left.br_startblock + left.br_blockcount == new->br_startblock && + left.br_state == new->br_state && + left.br_blockcount + new->br_blockcount <= MAXEXTLEN) + state |= BMAP_LEFT_CONTIG; - ASSERT(XFS_IFORK_Q(ip) == 0); + if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && + new->br_startoff + new->br_blockcount == right.br_startoff && + new->br_startblock + new->br_blockcount == right.br_startblock && + new->br_state == right.br_state && + new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + (!(state & BMAP_LEFT_CONTIG) || + left.br_blockcount + new->br_blockcount + + right.br_blockcount <= MAXEXTLEN)) + state |= BMAP_RIGHT_CONTIG; - mp = ip->i_mount; - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); - tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK); - blks = XFS_ADDAFORK_SPACE_RES(mp); - if (rsvd) - tp->t_flags |= XFS_TRANS_RESERVE; - if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT))) - goto error0; - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? - XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : - XFS_QMOPT_RES_REGBLKS); - if (error) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); - return error; - } - if (XFS_IFORK_Q(ip)) - goto error1; - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { + error = 0; + /* + * Select which case we're in here, and implement it. + */ + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: /* - * For inodes coming from pre-6.2 filesystems. + * New allocation is contiguous with real allocations on the + * left and on the right. + * Merge all three into a single extent record. */ - ASSERT(ip->i_d.di_aformat == 0); - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; - } - ASSERT(ip->i_d.di_anextents == 0); + --bma->idx; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + left.br_blockcount + new->br_blockcount + + right.br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); - switch (ip->i_d.di_format) { - case XFS_DINODE_FMT_DEV: - ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; + XFS_IFORK_NEXT_SET(bma->ip, whichfork, + XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); + if (bma->cur == NULL) { + rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); + } else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, + right.br_startblock, right.br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_btree_delete(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_btree_decrement(bma->cur, 0, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, left.br_startoff, + left.br_startblock, + left.br_blockcount + + new->br_blockcount + + right.br_blockcount, + left.br_state); + if (error) + goto done; + } break; - case XFS_DINODE_FMT_UUID: - ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3; + + case BMAP_LEFT_CONTIG: + /* + * New allocation is contiguous with a real allocation + * on the left. + * Merge the new allocation with the left neighbor. + */ + --bma->idx; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + left.br_blockcount + new->br_blockcount); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + if (bma->cur == NULL) { + rval = xfs_ilog_fext(whichfork); + } else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, + left.br_startblock, left.br_blockcount, + &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, left.br_startoff, + left.br_startblock, + left.br_blockcount + + new->br_blockcount, + left.br_state); + if (error) + goto done; + } break; - case XFS_DINODE_FMT_LOCAL: - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); - if (!ip->i_d.di_forkoff) - ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; - else if (mp->m_flags & XFS_MOUNT_ATTR2) - version = 2; + + case BMAP_RIGHT_CONTIG: + /* + * New allocation is contiguous with a real allocation + * on the right. + * Merge the new allocation with the right neighbor. + */ + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), + new->br_startoff, new->br_startblock, + new->br_blockcount + right.br_blockcount, + right.br_state); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + if (bma->cur == NULL) { + rval = xfs_ilog_fext(whichfork); + } else { + rval = 0; + error = xfs_bmbt_lookup_eq(bma->cur, + right.br_startoff, + right.br_startblock, + right.br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + error = xfs_bmbt_update(bma->cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + + right.br_blockcount, + right.br_state); + if (error) + goto done; + } + break; + + case 0: + /* + * New allocation is not contiguous with another + * real allocation. + * Insert a new entry. + */ + xfs_iext_insert(bma->ip, bma->idx, 1, new, state); + XFS_IFORK_NEXT_SET(bma->ip, whichfork, + XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); + if (bma->cur == NULL) { + rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); + } else { + rval = XFS_ILOG_CORE; + error = xfs_bmbt_lookup_eq(bma->cur, + new->br_startoff, + new->br_startblock, + new->br_blockcount, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 0, done); + bma->cur->bc_rec.b.br_state = new->br_state; + error = xfs_btree_insert(bma->cur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } break; - default: - ASSERT(0); - error = XFS_ERROR(EINVAL); - goto error1; } - ASSERT(ip->i_afp == NULL); - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); - ip->i_afp->if_flags = XFS_IFEXTENTS; - logflags = 0; - xfs_bmap_init(&flist, &firstblock); - switch (ip->i_d.di_format) { - case XFS_DINODE_FMT_LOCAL: - error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, - &logflags); - break; - case XFS_DINODE_FMT_EXTENTS: - error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, - &flist, &logflags); - break; - case XFS_DINODE_FMT_BTREE: - error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, - &logflags); - break; - default: - error = 0; - break; - } - if (logflags) - xfs_trans_log_inode(tp, ip, logflags); - if (error) - goto error2; - if (!xfs_sb_version_hasattr(&mp->m_sb) || - (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { - __int64_t sbfields = 0; + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(bma->ip, whichfork)) { + int tmp_logflags; /* partial log flag return val */ - spin_lock(&mp->m_sb_lock); - if (!xfs_sb_version_hasattr(&mp->m_sb)) { - xfs_sb_version_addattr(&mp->m_sb); - sbfields |= XFS_SB_VERSIONNUM; - } - if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { - xfs_sb_version_addattr2(&mp->m_sb); - sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); - } - if (sbfields) { - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, sbfields); - } else - spin_unlock(&mp->m_sb_lock); + ASSERT(bma->cur == NULL); + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + 0, &tmp_logflags, whichfork); + bma->logflags |= tmp_logflags; + if (error) + goto done; } - error = xfs_bmap_finish(&tp, &flist, &committed); - if (error) - goto error2; - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -error2: - xfs_bmap_cancel(&flist); -error1: - xfs_iunlock(ip, XFS_ILOCK_EXCL); -error0: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + /* clear out the allocated field, done with it now in any case. */ + if (bma->cur) + bma->cur->bc_private.b.allocated = 0; + + xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); +done: + bma->logflags |= rval; return error; } /* - * Add the extent to the list of extents to be free at transaction end. - * The list is maintained sorted (by block number). + * Functions used in the extent read, allocate and remove paths */ -/* ARGSUSED */ -void -xfs_bmap_add_free( - xfs_fsblock_t bno, /* fs block number of extent */ - xfs_filblks_t len, /* length of extent */ - xfs_bmap_free_t *flist, /* list of extents */ - xfs_mount_t *mp) /* mount point structure */ -{ - xfs_bmap_free_item_t *cur; /* current (next) element */ - xfs_bmap_free_item_t *new; /* new element */ - xfs_bmap_free_item_t *prev; /* previous element */ -#ifdef DEBUG - xfs_agnumber_t agno; - xfs_agblock_t agbno; - - ASSERT(bno != NULLFSBLOCK); - ASSERT(len > 0); - ASSERT(len <= MAXEXTLEN); - ASSERT(!isnullstartblock(bno)); - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - ASSERT(agno < mp->m_sb.sb_agcount); - ASSERT(agbno < mp->m_sb.sb_agblocks); - ASSERT(len < mp->m_sb.sb_agblocks); - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); -#endif - ASSERT(xfs_bmap_free_item_zone != NULL); - new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); - new->xbfi_startblock = bno; - new->xbfi_blockcount = (xfs_extlen_t)len; - for (prev = NULL, cur = flist->xbf_first; - cur != NULL; - prev = cur, cur = cur->xbfi_next) { - if (cur->xbfi_startblock >= bno) - break; - } - if (prev) - prev->xbfi_next = new; - else - flist->xbf_first = new; - new->xbfi_next = cur; - flist->xbf_count++; -} /* - * Compute and fill in the value of the maximum depth of a bmap btree - * in this filesystem. Done once, during mount. + * Adjust the size of the new extent based on di_extsize and rt extsize. */ -void -xfs_bmap_compute_maxlevels( - xfs_mount_t *mp, /* file system mount structure */ - int whichfork) /* data or attr fork */ +STATIC int +xfs_bmap_extsize_align( + xfs_mount_t *mp, + xfs_bmbt_irec_t *gotp, /* next extent pointer */ + xfs_bmbt_irec_t *prevp, /* previous extent pointer */ + xfs_extlen_t extsz, /* align to this extent size */ + int rt, /* is this a realtime inode? */ + int eof, /* is extent at end-of-file? */ + int delay, /* creating delalloc extent? */ + int convert, /* overwriting unwritten extent? */ + xfs_fileoff_t *offp, /* in/out: aligned offset */ + xfs_extlen_t *lenp) /* in/out: aligned length */ { - int level; /* btree level */ - uint maxblocks; /* max blocks at this level */ - uint maxleafents; /* max leaf entries possible */ - int maxrootrecs; /* max records in root block */ - int minleafrecs; /* min records in leaf block */ - int minnoderecs; /* min records in node block */ - int sz; /* root block size */ + xfs_fileoff_t orig_off; /* original offset */ + xfs_extlen_t orig_alen; /* original length */ + xfs_fileoff_t orig_end; /* original off+len */ + xfs_fileoff_t nexto; /* next file offset */ + xfs_fileoff_t prevo; /* previous file offset */ + xfs_fileoff_t align_off; /* temp for offset */ + xfs_extlen_t align_alen; /* temp for length */ + xfs_extlen_t temp; /* temp for calculations */ + + if (convert) + return 0; + + orig_off = align_off = *offp; + orig_alen = align_alen = *lenp; + orig_end = orig_off + orig_alen; /* - * The maximum number of extents in a file, hence the maximum - * number of leaf entries, is controlled by the type of di_nextents - * (a signed 32-bit number, xfs_extnum_t), or by di_anextents - * (a signed 16-bit number, xfs_aextnum_t). - * - * Note that we can no longer assume that if we are in ATTR1 that - * the fork offset of all the inodes will be - * (xfs_default_attroffset(ip) >> 3) because we could have mounted - * with ATTR2 and then mounted back with ATTR1, keeping the - * di_forkoff's fixed but probably at various positions. Therefore, - * for both ATTR1 and ATTR2 we have to assume the worst case scenario - * of a minimum size available. + * If this request overlaps an existing extent, then don't + * attempt to perform any additional alignment. */ - if (whichfork == XFS_DATA_FORK) { - maxleafents = MAXEXTNUM; - sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); - } else { - maxleafents = MAXAEXTNUM; - sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); - } - maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); - minleafrecs = mp->m_bmap_dmnr[0]; - minnoderecs = mp->m_bmap_dmnr[1]; - maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; - for (level = 1; maxblocks > 1; level++) { - if (maxblocks <= maxrootrecs) - maxblocks = 1; - else - maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + if (!delay && !eof && + (orig_off >= gotp->br_startoff) && + (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { + return 0; } - mp->m_bm_maxlevels[whichfork] = level; -} - -/* - * Free up any items left in the list. - */ -void -xfs_bmap_cancel( - xfs_bmap_free_t *flist) /* list of bmap_free_items */ -{ - xfs_bmap_free_item_t *free; /* free list item */ - xfs_bmap_free_item_t *next; - if (flist->xbf_count == 0) - return; - ASSERT(flist->xbf_first != NULL); - for (free = flist->xbf_first; free; free = next) { - next = free->xbfi_next; - xfs_bmap_del_free(flist, NULL, free); + /* + * If the file offset is unaligned vs. the extent size + * we need to align it. This will be possible unless + * the file was previously written with a kernel that didn't + * perform this alignment, or if a truncate shot us in the + * foot. + */ + temp = do_mod(orig_off, extsz); + if (temp) { + align_alen += temp; + align_off -= temp; + } + /* + * Same adjustment for the end of the requested area. + */ + if ((temp = (align_alen % extsz))) { + align_alen += extsz - temp; + } + /* + * If the previous block overlaps with this proposed allocation + * then move the start forward without adjusting the length. + */ + if (prevp->br_startoff != NULLFILEOFF) { + if (prevp->br_startblock == HOLESTARTBLOCK) + prevo = prevp->br_startoff; + else + prevo = prevp->br_startoff + prevp->br_blockcount; + } else + prevo = 0; + if (align_off != orig_off && align_off < prevo) + align_off = prevo; + /* + * If the next block overlaps with this proposed allocation + * then move the start back without adjusting the length, + * but not before offset 0. + * This may of course make the start overlap previous block, + * and if we hit the offset 0 limit then the next block + * can still overlap too. + */ + if (!eof && gotp->br_startoff != NULLFILEOFF) { + if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || + (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) + nexto = gotp->br_startoff + gotp->br_blockcount; + else + nexto = gotp->br_startoff; + } else + nexto = NULLFILEOFF; + if (!eof && + align_off + align_alen != orig_end && + align_off + align_alen > nexto) + align_off = nexto > align_alen ? nexto - align_alen : 0; + /* + * If we're now overlapping the next or previous extent that + * means we can't fit an extsz piece in this hole. Just move + * the start forward to the first valid spot and set + * the length so we hit the end. + */ + if (align_off != orig_off && align_off < prevo) + align_off = prevo; + if (align_off + align_alen != orig_end && + align_off + align_alen > nexto && + nexto != NULLFILEOFF) { + ASSERT(nexto > prevo); + align_alen = nexto - align_off; } - ASSERT(flist->xbf_count == 0); -} - -/* - * Returns the file-relative block number of the first unused block(s) - * in the file with at least "len" logically contiguous blocks free. - * This is the lowest-address hole if the file has holes, else the first block - * past the end of file. - * Return 0 if the file is currently local (in-inode). - */ -int /* error */ -xfs_bmap_first_unused( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_extlen_t len, /* size of hole to find */ - xfs_fileoff_t *first_unused, /* unused block */ - int whichfork) /* data or attr fork */ -{ - int error; /* error return value */ - int idx; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_fileoff_t lastaddr; /* last block number seen */ - xfs_fileoff_t lowest; /* lowest useful block */ - xfs_fileoff_t max; /* starting useful block */ - xfs_fileoff_t off; /* offset for this block */ - xfs_extnum_t nextents; /* number of extent entries */ - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - *first_unused = 0; - return 0; - } - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - lowest = *first_unused; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); - off = xfs_bmbt_get_startoff(ep); + /* + * If realtime, and the result isn't a multiple of the realtime + * extent size we need to remove blocks until it is. + */ + if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { /* - * See if the hole before this extent will work. + * We're not covering the original request, or + * we won't be able to once we fix the length. */ - if (off >= lowest + len && off - max >= len) { - *first_unused = max; - return 0; + if (orig_off < align_off || + orig_end > align_off + align_alen || + align_alen - temp < orig_alen) + return XFS_ERROR(EINVAL); + /* + * Try to fix it by moving the start up. + */ + if (align_off + temp <= orig_off) { + align_alen -= temp; + align_off += temp; } - lastaddr = off + xfs_bmbt_get_blockcount(ep); - max = XFS_FILEOFF_MAX(lastaddr, lowest); + /* + * Try to fix it by moving the end in. + */ + else if (align_off + align_alen - temp >= orig_end) + align_alen -= temp; + /* + * Set the start to the minimum then trim the length. + */ + else { + align_alen -= orig_off - align_off; + align_off = orig_off; + align_alen -= align_alen % mp->m_sb.sb_rextsize; + } + /* + * Result doesn't cover the request, fail it. + */ + if (orig_off < align_off || orig_end > align_off + align_alen) + return XFS_ERROR(EINVAL); + } else { + ASSERT(orig_off >= align_off); + ASSERT(orig_end <= align_off + align_alen); } - *first_unused = max; + +#ifdef DEBUG + if (!eof && gotp->br_startoff != NULLFILEOFF) + ASSERT(align_off + align_alen <= gotp->br_startoff); + if (prevp->br_startoff != NULLFILEOFF) + ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); +#endif + + *lenp = align_alen; + *offp = align_off; return 0; } -/* - * Returns the file-relative block number of the last block + 1 before - * last_block (input value) in the file. - * This is not based on i_size, it is based on the extent records. - * Returns 0 for local files, as they do not have extent records. - */ -int /* error */ -xfs_bmap_last_before( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_fileoff_t *last_block, /* last block */ - int whichfork) /* data or attr fork */ +#define XFS_ALLOC_GAP_UNITS 4 + +STATIC void +xfs_bmap_adjacent( + xfs_bmalloca_t *ap) /* bmap alloc argument struct */ { - xfs_fileoff_t bno; /* input file offset */ - int eof; /* hit end of file */ - xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ - int error; /* error return value */ - xfs_bmbt_irec_t got; /* current extent value */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t lastx; /* last extent used */ - xfs_bmbt_irec_t prev; /* previous extent value */ + xfs_fsblock_t adjust; /* adjustment to block numbers */ + xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ + xfs_mount_t *mp; /* mount point structure */ + int nullfb; /* true if ap->firstblock isn't set */ + int rt; /* true if inode is realtime */ - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EIO); - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - *last_block = 0; - return 0; +#define ISVALID(x,y) \ + (rt ? \ + (x) < mp->m_sb.sb_rblocks : \ + XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ + XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ + XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) + + mp = ap->ip->i_mount; + nullfb = *ap->firstblock == NULLFSBLOCK; + rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); + /* + * If allocating at eof, and there's a previous real block, + * try to use its last block as our starting point. + */ + if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && + !isnullstartblock(ap->prev.br_startblock) && + ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, + ap->prev.br_startblock)) { + ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; + /* + * Adjust for the gap between prevp and us. + */ + adjust = ap->offset - + (ap->prev.br_startoff + ap->prev.br_blockcount); + if (adjust && + ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) + ap->blkno += adjust; } - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - bno = *last_block - 1; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); - if (eof || xfs_bmbt_get_startoff(ep) > bno) { - if (prev.br_startoff == NULLFILEOFF) - *last_block = 0; + /* + * If not at eof, then compare the two neighbor blocks. + * Figure out whether either one gives us a good starting point, + * and pick the better one. + */ + else if (!ap->eof) { + xfs_fsblock_t gotbno; /* right side block number */ + xfs_fsblock_t gotdiff=0; /* right side difference */ + xfs_fsblock_t prevbno; /* left side block number */ + xfs_fsblock_t prevdiff=0; /* left side difference */ + + /* + * If there's a previous (left) block, select a requested + * start block based on it. + */ + if (ap->prev.br_startoff != NULLFILEOFF && + !isnullstartblock(ap->prev.br_startblock) && + (prevbno = ap->prev.br_startblock + + ap->prev.br_blockcount) && + ISVALID(prevbno, ap->prev.br_startblock)) { + /* + * Calculate gap to end of previous block. + */ + adjust = prevdiff = ap->offset - + (ap->prev.br_startoff + + ap->prev.br_blockcount); + /* + * Figure the startblock based on the previous block's + * end and the gap size. + * Heuristic! + * If the gap is large relative to the piece we're + * allocating, or using it gives us an invalid block + * number, then just use the end of the previous block. + */ + if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && + ISVALID(prevbno + prevdiff, + ap->prev.br_startblock)) + prevbno += adjust; + else + prevdiff += adjust; + /* + * If the firstblock forbids it, can't use it, + * must use default. + */ + if (!rt && !nullfb && + XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) + prevbno = NULLFSBLOCK; + } + /* + * No previous block or can't follow it, just default. + */ + else + prevbno = NULLFSBLOCK; + /* + * If there's a following (right) block, select a requested + * start block based on it. + */ + if (!isnullstartblock(ap->got.br_startblock)) { + /* + * Calculate gap to start of next block. + */ + adjust = gotdiff = ap->got.br_startoff - ap->offset; + /* + * Figure the startblock based on the next block's + * start and the gap size. + */ + gotbno = ap->got.br_startblock; + /* + * Heuristic! + * If the gap is large relative to the piece we're + * allocating, or using it gives us an invalid block + * number, then just use the start of the next block + * offset by our length. + */ + if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && + ISVALID(gotbno - gotdiff, gotbno)) + gotbno -= adjust; + else if (ISVALID(gotbno - ap->length, gotbno)) { + gotbno -= ap->length; + gotdiff += adjust - ap->length; + } else + gotdiff += adjust; + /* + * If the firstblock forbids it, can't use it, + * must use default. + */ + if (!rt && !nullfb && + XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) + gotbno = NULLFSBLOCK; + } + /* + * No next block, just default. + */ else - *last_block = prev.br_startoff + prev.br_blockcount; - } - /* - * Otherwise *last_block is already the right answer. - */ - return 0; -} - -STATIC int -xfs_bmap_last_extent( - struct xfs_trans *tp, - struct xfs_inode *ip, - int whichfork, - struct xfs_bmbt_irec *rec, - int *is_empty) -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - int error; - int nextents; - - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(tp, ip, whichfork); - if (error) - return error; - } - - nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { - *is_empty = 1; - return 0; + gotbno = NULLFSBLOCK; + /* + * If both valid, pick the better one, else the only good + * one, else ap->blkno is already set (to 0 or the inode block). + */ + if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) + ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; + else if (prevbno != NULLFSBLOCK) + ap->blkno = prevbno; + else if (gotbno != NULLFSBLOCK) + ap->blkno = gotbno; } - - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); - *is_empty = 0; - return 0; +#undef ISVALID } -/* - * Check the last inode extent to determine whether this allocation will result - * in blocks being allocated at the end of the file. When we allocate new data - * blocks at the end of the file which do not start at the previous data block, - * we will try to align the new blocks at stripe unit boundaries. - * - * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be - * at, or past the EOF. - */ STATIC int -xfs_bmap_isaeof( - struct xfs_bmalloca *bma, - int whichfork) +xfs_bmap_btalloc_nullfb( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *blen) { - struct xfs_bmbt_irec rec; - int is_empty; + struct xfs_mount *mp = ap->ip->i_mount; + struct xfs_perag *pag; + xfs_agnumber_t ag, startag; + int notinit = 0; int error; - bma->aeof = 0; - error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, - &is_empty); - if (error || is_empty) - return error; + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) + args->type = XFS_ALLOCTYPE_NEAR_BNO; + else + args->type = XFS_ALLOCTYPE_START_BNO; + args->total = ap->total; /* - * Check if we are allocation or past the last extent, or at least into - * the last delayed allocated extent. + * Search for an allocation group with a single extent large enough + * for the request. If one isn't found, then adjust the minimum + * allocation size to the largest space found. */ - bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || - (bma->offset >= rec.br_startoff && - isnullstartblock(rec.br_startblock)); - return 0; -} - -/* - * Check if the endoff is outside the last extent. If so the caller will grow - * the allocation to a stripe unit boundary. All offsets are considered outside - * the end of file for an empty fork, so 1 is returned in *eof in that case. - */ -int -xfs_bmap_eof( - struct xfs_inode *ip, - xfs_fileoff_t endoff, - int whichfork, - int *eof) -{ - struct xfs_bmbt_irec rec; - int error; - - error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); - if (error || *eof) - return error; - - *eof = endoff >= rec.br_startoff + rec.br_blockcount; - return 0; -} + startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); + if (startag == NULLAGNUMBER) + startag = ag = 0; -/* - * Returns the file-relative block number of the first block past eof in - * the file. This is not based on i_size, it is based on the extent records. - * Returns 0 for local files, as they do not have extent records. - */ -int -xfs_bmap_last_offset( - struct xfs_trans *tp, - struct xfs_inode *ip, - xfs_fileoff_t *last_block, - int whichfork) -{ - struct xfs_bmbt_irec rec; - int is_empty; - int error; + pag = xfs_perag_get(mp, ag); + while (*blen < args->maxlen) { + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, args->tp, ag, + XFS_ALLOC_FLAG_TRYLOCK); + if (error) { + xfs_perag_put(pag); + return error; + } + } - *last_block = 0; + /* + * See xfs_alloc_fix_freelist... + */ + if (pag->pagf_init) { + xfs_extlen_t longest; + longest = xfs_alloc_longest_free_extent(mp, pag); + if (*blen < longest) + *blen = longest; + } else + notinit = 1; - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) - return 0; + if (xfs_inode_is_filestream(ap->ip)) { + if (*blen >= args->maxlen) + break; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - return XFS_ERROR(EIO); + if (ap->userdata) { + /* + * If startag is an invalid AG, we've + * come here once before and + * xfs_filestream_new_ag picked the + * best currently available. + * + * Don't continue looping, since we + * could loop forever. + */ + if (startag == NULLAGNUMBER) + break; - error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); - if (error || is_empty) - return error; + error = xfs_filestream_new_ag(ap, &ag); + xfs_perag_put(pag); + if (error) + return error; - *last_block = rec.br_startoff + rec.br_blockcount; - return 0; -} + /* loop again to set 'blen'*/ + startag = NULLAGNUMBER; + pag = xfs_perag_get(mp, ag); + continue; + } + } + if (++ag == mp->m_sb.sb_agcount) + ag = 0; + if (ag == startag) + break; + xfs_perag_put(pag); + pag = xfs_perag_get(mp, ag); + } + xfs_perag_put(pag); -/* - * Returns whether the selected fork of the inode has exactly one - * block or not. For the data fork we check this matches di_size, - * implying the file's range is 0..bsize-1. - */ -int /* 1=>1 block, 0=>otherwise */ -xfs_bmap_one_block( - xfs_inode_t *ip, /* incore inode */ - int whichfork) /* data or attr fork */ -{ - xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int rval; /* return value */ - xfs_bmbt_irec_t s; /* internal version of extent */ + /* + * Since the above loop did a BUF_TRYLOCK, it is + * possible that there is space for this request. + */ + if (notinit || *blen < ap->minlen) + args->minlen = ap->minlen; + /* + * If the best seen length is less than the request + * length, use the best as the minimum. + */ + else if (*blen < args->maxlen) + args->minlen = *blen; + /* + * Otherwise we've seen an extent as big as maxlen, + * use that as the minimum. + */ + else + args->minlen = args->maxlen; -#ifndef DEBUG - if (whichfork == XFS_DATA_FORK) - return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; -#endif /* !DEBUG */ - if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) - return 0; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - return 0; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(ifp->if_flags & XFS_IFEXTENTS); - ep = xfs_iext_get_ext(ifp, 0); - xfs_bmbt_get_all(ep, &s); - rval = s.br_startoff == 0 && s.br_blockcount == 1; - if (rval && whichfork == XFS_DATA_FORK) - ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); - return rval; + /* + * set the failure fallback case to look in the selected + * AG as the stream may have moved. + */ + if (xfs_inode_is_filestream(ap->ip)) + ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); + + return 0; } STATIC int -xfs_bmap_sanity_check( - struct xfs_mount *mp, - struct xfs_buf *bp, - int level) +xfs_bmap_btalloc( + xfs_bmalloca_t *ap) /* bmap alloc argument struct */ { - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_mount_t *mp; /* mount point structure */ + xfs_alloctype_t atype = 0; /* type for allocation routines */ + xfs_extlen_t align; /* minimum allocation alignment */ + xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ + xfs_agnumber_t ag; + xfs_alloc_arg_t args; + xfs_extlen_t blen; + xfs_extlen_t nextminlen = 0; + int nullfb; /* true if ap->firstblock isn't set */ + int isaligned; + int tryagain; + int error; - if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) || - be16_to_cpu(block->bb_level) != level || - be16_to_cpu(block->bb_numrecs) == 0 || - be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) - return 0; - return 1; -} + ASSERT(ap->length); -/* - * Read in the extents to if_extents. - * All inode fields are set up by caller, we just traverse the btree - * and copy the records in. If the file system cannot contain unwritten - * extents, the records are checked for no "state" flags. - */ -int /* error */ -xfs_bmap_read_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - int whichfork) /* data or attr fork */ -{ - struct xfs_btree_block *block; /* current btree block */ - xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ - int error; /* error return value */ - xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ - xfs_extnum_t i, j; /* index into the extents list */ - xfs_ifork_t *ifp; /* fork structure */ - int level; /* btree level, for checking */ - xfs_mount_t *mp; /* file system mount structure */ - __be64 *pp; /* pointer to block address */ - /* REFERENCED */ - xfs_extnum_t room; /* number of entries there's room for */ + mp = ap->ip->i_mount; + align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; + if (unlikely(align)) { + error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, + align, 0, ap->eof, 0, ap->conv, + &ap->offset, &ap->length); + ASSERT(!error); + ASSERT(ap->length); + } + nullfb = *ap->firstblock == NULLFSBLOCK; + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); + if (nullfb) { + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { + ag = xfs_filestream_lookup_ag(ap->ip); + ag = (ag != NULLAGNUMBER) ? ag : 0; + ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); + } else { + ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); + } + } else + ap->blkno = *ap->firstblock; + + xfs_bmap_adjacent(ap); - bno = NULLFSBLOCK; - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : - XFS_EXTFMT_INODE(ip); - block = ifp->if_broot; /* - * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. + * If allowed, use ap->blkno; otherwise must use firstblock since + * it's in the right allocation group. */ - level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); - bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) + ; + else + ap->blkno = *ap->firstblock; /* - * Go down the tree until leaf level is reached, following the first - * pointer (leftmost) at each level. + * Normal allocation, done through xfs_alloc_vextent. */ - while (level-- > 0) { - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, - XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); + tryagain = isaligned = 0; + memset(&args, 0, sizeof(args)); + args.tp = ap->tp; + args.mp = mp; + args.fsbno = ap->blkno; + + /* Trim the allocation back to the maximum an AG can fit. */ + args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); + args.firstblock = *ap->firstblock; + blen = 0; + if (nullfb) { + error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); if (error) return error; - block = XFS_BUF_TO_BLOCK(bp); - XFS_WANT_CORRUPTED_GOTO( - xfs_bmap_sanity_check(mp, bp, level), - error0); - if (level == 0) - break; - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); - bno = be64_to_cpu(*pp); - XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); - xfs_trans_brelse(tp, bp); + } else if (ap->flist->xbf_low) { + if (xfs_inode_is_filestream(ap->ip)) + args.type = XFS_ALLOCTYPE_FIRST_AG; + else + args.type = XFS_ALLOCTYPE_START_BNO; + args.total = args.minlen = ap->minlen; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.total = ap->total; + args.minlen = ap->minlen; + } + /* apply extent size hints if obtained earlier */ + if (unlikely(align)) { + args.prod = align; + if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) + args.mod = (xfs_extlen_t)(args.prod - args.mod); + } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { + args.prod = 1; + args.mod = 0; + } else { + args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; + if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod)))) + args.mod = (xfs_extlen_t)(args.prod - args.mod); } /* - * Here with bp and block set to the leftmost leaf node in the tree. - */ - room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - i = 0; - /* - * Loop over all leaf nodes. Copy information to the extent records. + * If we are not low on available data blocks, and the + * underlying logical volume manager is a stripe, and + * the file offset is zero then try to allocate data + * blocks on stripe unit boundary. + * NOTE: ap->aeof is only set if the allocation length + * is >= the stripe unit and the allocation offset is + * at the end of file. */ - for (;;) { - xfs_bmbt_rec_t *frp; - xfs_fsblock_t nextbno; - xfs_extnum_t num_recs; - xfs_extnum_t start; - - num_recs = xfs_btree_get_numrecs(block); - if (unlikely(i + num_recs > room)) { - ASSERT(i + num_recs <= room); - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, (btree extents).", - (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", - XFS_ERRLEVEL_LOW, ip->i_mount, block); - goto error0; + if (!ap->flist->xbf_low && ap->aeof) { + if (!ap->offset) { + args.alignment = mp->m_dalign; + atype = args.type; + isaligned = 1; + /* + * Adjust for alignment + */ + if (blen > args.alignment && blen <= args.maxlen) + args.minlen = blen - args.alignment; + args.minalignslop = 0; + } else { + /* + * First try an exact bno allocation. + * If it fails then do a near or start bno + * allocation with alignment turned on. + */ + atype = args.type; + tryagain = 1; + args.type = XFS_ALLOCTYPE_THIS_BNO; + args.alignment = 1; + /* + * Compute the minlen+alignment for the + * next case. Set slop so that the value + * of minlen+alignment+slop doesn't go up + * between the calls. + */ + if (blen > mp->m_dalign && blen <= args.maxlen) + nextminlen = blen - mp->m_dalign; + else + nextminlen = args.minlen; + if (nextminlen + mp->m_dalign > args.minlen + 1) + args.minalignslop = + nextminlen + mp->m_dalign - + args.minlen - 1; + else + args.minalignslop = 0; } - XFS_WANT_CORRUPTED_GOTO( - xfs_bmap_sanity_check(mp, bp, 0), - error0); + } else { + args.alignment = 1; + args.minalignslop = 0; + } + args.minleft = ap->minleft; + args.wasdel = ap->wasdel; + args.isfl = 0; + args.userdata = ap->userdata; + if ((error = xfs_alloc_vextent(&args))) + return error; + if (tryagain && args.fsbno == NULLFSBLOCK) { + /* + * Exact allocation failed. Now try with alignment + * turned on. + */ + args.type = atype; + args.fsbno = ap->blkno; + args.alignment = mp->m_dalign; + args.minlen = nextminlen; + args.minalignslop = 0; + isaligned = 1; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + if (isaligned && args.fsbno == NULLFSBLOCK) { /* - * Read-ahead the next leaf block, if any. + * allocation failed, so turn off alignment and + * try again. */ - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - if (nextbno != NULLFSBLOCK) - xfs_btree_reada_bufl(mp, nextbno, 1, - &xfs_bmbt_buf_ops); + args.type = atype; + args.fsbno = ap->blkno; + args.alignment = 0; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + if (args.fsbno == NULLFSBLOCK && nullfb && + args.minlen > ap->minlen) { + args.minlen = ap->minlen; + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = ap->blkno; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + if (args.fsbno == NULLFSBLOCK && nullfb) { + args.fsbno = 0; + args.type = XFS_ALLOCTYPE_FIRST_AG; + args.total = ap->minlen; + args.minleft = 0; + if ((error = xfs_alloc_vextent(&args))) + return error; + ap->flist->xbf_low = 1; + } + if (args.fsbno != NULLFSBLOCK) { /* - * Copy records into the extent records. + * check the allocation happened at the same or higher AG than + * the first block that was allocated. */ - frp = XFS_BMBT_REC_ADDR(mp, block, 1); - start = i; - for (j = 0; j < num_recs; j++, i++, frp++) { - xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); - trp->l0 = be64_to_cpu(frp->l0); - trp->l1 = be64_to_cpu(frp->l1); - } - if (exntf == XFS_EXTFMT_NOSTATE) { - /* - * Check all attribute bmap btree records and - * any "older" data bmap btree records for a - * set bit in the "extent flag" position. - */ - if (unlikely(xfs_check_nostate_extents(ifp, - start, num_recs))) { - XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", - XFS_ERRLEVEL_LOW, - ip->i_mount); - goto error0; - } - } - xfs_trans_brelse(tp, bp); - bno = nextbno; + ASSERT(*ap->firstblock == NULLFSBLOCK || + XFS_FSB_TO_AGNO(mp, *ap->firstblock) == + XFS_FSB_TO_AGNO(mp, args.fsbno) || + (ap->flist->xbf_low && + XFS_FSB_TO_AGNO(mp, *ap->firstblock) < + XFS_FSB_TO_AGNO(mp, args.fsbno))); + + ap->blkno = args.fsbno; + if (*ap->firstblock == NULLFSBLOCK) + *ap->firstblock = args.fsbno; + ASSERT(nullfb || fb_agno == args.agno || + (ap->flist->xbf_low && fb_agno < args.agno)); + ap->length = args.len; + ap->ip->i_d.di_nblocks += args.len; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + if (ap->wasdel) + ap->ip->i_delayed_blks -= args.len; /* - * If we've reached the end, stop. + * Adjust the disk quota also. This was reserved + * earlier. */ - if (bno == NULLFSBLOCK) - break; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, - XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); - if (error) - return error; - block = XFS_BUF_TO_BLOCK(bp); + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : + XFS_TRANS_DQ_BCOUNT, + (long) args.len); + } else { + ap->blkno = NULLFSBLOCK; + ap->length = 0; } - ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); - XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); return 0; -error0: - xfs_trans_brelse(tp, bp); - return XFS_ERROR(EFSCORRUPTED); -} - -#ifdef DEBUG -/* - * Add bmap trace insert entries for all the contents of the extent records. - */ -void -xfs_bmap_trace_exlist( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t cnt, /* count of entries in the list */ - int whichfork, /* data or attr fork */ - unsigned long caller_ip) -{ - xfs_extnum_t idx; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int state = 0; - - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; - - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); - for (idx = 0; idx < cnt; idx++) - trace_xfs_extlist(ip, idx, whichfork, caller_ip); } /* - * Validate that the bmbt_irecs being returned from bmapi are valid - * given the callers original parameters. Specifically check the - * ranges of the returned irecs to ensure that they only extent beyond - * the given parameters if the XFS_BMAPI_ENTIRE flag was set. + * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. + * It figures out where to ask the underlying allocator to put the new extent. */ -STATIC void -xfs_bmap_validate_ret( - xfs_fileoff_t bno, - xfs_filblks_t len, - int flags, - xfs_bmbt_irec_t *mval, - int nmap, - int ret_nmap) +STATIC int +xfs_bmap_alloc( + xfs_bmalloca_t *ap) /* bmap alloc argument struct */ { - int i; /* index to map values */ - - ASSERT(ret_nmap <= nmap); - - for (i = 0; i < ret_nmap; i++) { - ASSERT(mval[i].br_blockcount > 0); - if (!(flags & XFS_BMAPI_ENTIRE)) { - ASSERT(mval[i].br_startoff >= bno); - ASSERT(mval[i].br_blockcount <= len); - ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= - bno + len); - } else { - ASSERT(mval[i].br_startoff < bno + len); - ASSERT(mval[i].br_startoff + mval[i].br_blockcount > - bno); - } - ASSERT(i == 0 || - mval[i - 1].br_startoff + mval[i - 1].br_blockcount == - mval[i].br_startoff); - ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && - mval[i].br_startblock != HOLESTARTBLOCK); - ASSERT(mval[i].br_state == XFS_EXT_NORM || - mval[i].br_state == XFS_EXT_UNWRITTEN); - } + if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) + return xfs_bmap_rtalloc(ap); + return xfs_bmap_btalloc(ap); } -#endif /* DEBUG */ - /* * Trim the returned map to the required bounds @@ -4385,9 +4422,12 @@ xfs_bmapi_allocate( struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); int tmp_logflags = 0; int error; + int rt; ASSERT(bma->length > 0); + rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip); + /* * For the wasdelay case, we could also just allocate the stuff asked * for in this bmap call but that wouldn't be as good. @@ -4428,9 +4468,6 @@ xfs_bmapi_allocate( return error; } - if (bma->flags & XFS_BMAPI_STACK_SWITCH) - bma->stack_switch = 1; - error = xfs_bmap_alloc(bma); if (error) return error; @@ -4493,7 +4530,6 @@ xfs_bmapi_allocate( return 0; } - STATIC int xfs_bmapi_convert_unwritten( struct xfs_bmalloca *bma, @@ -4632,8 +4668,32 @@ xfs_bmapi_write( XFS_STATS_INC(xs_blk_mapw); if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + /* + * XXX (dgc): This assumes we are only called for inodes that + * contain content neutral data in local format. Anything that + * contains caller-specific data in local format that needs + * transformation to move to a block format needs to do the + * conversion to extent format itself. + * + * Directory data forks and attribute forks handle this + * themselves, but with the addition of metadata verifiers every + * data fork in local format now contains caller specific data + * and as such conversion through this function is likely to be + * broken. + * + * The only likely user of this branch is for remote symlinks, + * but we cannot overwrite the data fork contents of the symlink + * (EEXIST occurs higher up the stack) and so it will never go + * from local format to extent format here. Hence I don't think + * this branch is ever executed intentionally and we should + * consider removing it and asserting that xfs_bmapi_write() + * cannot be called directly on local format forks. i.e. callers + * are completely responsible for local to extent format + * conversion, not xfs_bmapi_write(). + */ error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, - &bma.logflags, whichfork); + &bma.logflags, whichfork, + xfs_bmap_local_to_extents_init_fn); if (error) goto error0; } @@ -4666,6 +4726,9 @@ xfs_bmapi_write( bma.flist = flist; bma.firstblock = firstblock; + if (flags & XFS_BMAPI_STACK_SWITCH) + bma.stack_switch = 1; + while (bno < end && n < *nmap) { inhole = eof || bma.got.br_startoff > bno; wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); @@ -4793,804 +4856,703 @@ error0: } /* - * Unmap (remove) blocks from a file. - * If nexts is nonzero then the number of extents to remove is limited to - * that value. If not all extents in the block range can be removed then - * *done is set. + * Called by xfs_bmapi to update file extent records and the btree + * after removing space (or undoing a delayed allocation). */ -int /* error */ -xfs_bunmapi( - xfs_trans_t *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting offset to unmap */ - xfs_filblks_t len, /* length to unmap in file */ - int flags, /* misc flags */ - xfs_extnum_t nexts, /* number of extents max */ - xfs_fsblock_t *firstblock, /* first allocated block - controls a.g. for allocs */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - int *done) /* set if not done yet */ +STATIC int /* error */ +xfs_bmap_del_extent( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_trans_t *tp, /* current transaction pointer */ + xfs_extnum_t *idx, /* extent number to update/delete */ + xfs_bmap_free_t *flist, /* list of extents to be freed */ + xfs_btree_cur_t *cur, /* if null, not a btree */ + xfs_bmbt_irec_t *del, /* data to remove from extents */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ { - xfs_btree_cur_t *cur; /* bmap btree cursor */ - xfs_bmbt_irec_t del; /* extent being deleted */ - int eof; /* is deleting at eof */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - int error; /* error return value */ - xfs_extnum_t extno; /* extent number in list */ - xfs_bmbt_irec_t got; /* current extent record */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int isrt; /* freeing in rt area */ - xfs_extnum_t lastx; /* last extent index used */ - int logflags; /* transaction logging flags */ - xfs_extlen_t mod; /* rt extent offset */ - xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_bmbt_irec_t prev; /* previous extent record */ - xfs_fileoff_t start; /* first file offset deleted */ - int tmp_logflags; /* partial logging flags */ - int wasdel; /* was a delayed alloc extent */ - int whichfork; /* data or attribute fork */ - xfs_fsblock_t sum; - - trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); - - whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - ifp = XFS_IFORK_PTR(ip, whichfork); - if (unlikely( - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { - XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, - ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); - } - mp = ip->i_mount; - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ + xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ + xfs_fsblock_t del_endblock=0; /* first block past del */ + xfs_fileoff_t del_endoff; /* first offset past del */ + int delay; /* current block is delayed allocated */ + int do_fx; /* free extent at end of routine */ + xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */ + int error; /* error return value */ + int flags; /* inode logging flags */ + xfs_bmbt_irec_t got; /* current extent entry */ + xfs_fileoff_t got_endoff; /* first offset past got */ + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_mount_t *mp; /* mount structure */ + xfs_filblks_t nblks; /* quota/sb block count */ + xfs_bmbt_irec_t new; /* new record to be inserted */ + /* REFERENCED */ + uint qfield; /* quota field to update */ + xfs_filblks_t temp; /* for indirect length calculations */ + xfs_filblks_t temp2; /* for indirect length calculations */ + int state = 0; - ASSERT(len > 0); - ASSERT(nexts >= 0); + XFS_STATS_INC(xs_del_exlist); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { - *done = 1; - return 0; - } - XFS_STATS_INC(xs_blk_unmap); - isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); - start = bno; - bno = start + len - 1; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); + if (whichfork == XFS_ATTR_FORK) + state |= BMAP_ATTRFORK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / + (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(del->br_blockcount > 0); + ep = xfs_iext_get_ext(ifp, *idx); + xfs_bmbt_get_all(ep, &got); + ASSERT(got.br_startoff <= del->br_startoff); + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got.br_startoff + got.br_blockcount; + ASSERT(got_endoff >= del_endoff); + delay = isnullstartblock(got.br_startblock); + ASSERT(isnullstartblock(del->br_startblock) == delay); + flags = 0; + qfield = 0; + error = 0; /* - * Check to see if the given block number is past the end of the - * file, back up to the last block if so... + * If deleting a real allocation, must free up the disk space. */ - if (eof) { - ep = xfs_iext_get_ext(ifp, --lastx); - xfs_bmbt_get_all(ep, &got); - bno = got.br_startoff + got.br_blockcount - 1; - } - logflags = 0; - if (ifp->if_flags & XFS_IFBROOT) { - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); - cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.flist = flist; - cur->bc_private.b.flags = 0; - } else - cur = NULL; - - if (isrt) { + if (!delay) { + flags = XFS_ILOG_CORE; /* - * Synchronize by locking the bitmap inode. + * Realtime allocation. Free it and record di_nblocks update. */ - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); - } + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { + xfs_fsblock_t bno; + xfs_filblks_t len; - extno = 0; - while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && - (nexts == 0 || extno < nexts)) { - /* - * Is the found extent after a hole in which bno lives? - * Just back up to the previous extent, if so. - */ - if (got.br_startoff > bno) { - if (--lastx < 0) - break; - ep = xfs_iext_get_ext(ifp, lastx); - xfs_bmbt_get_all(ep, &got); + ASSERT(do_mod(del->br_blockcount, + mp->m_sb.sb_rextsize) == 0); + ASSERT(do_mod(del->br_startblock, + mp->m_sb.sb_rextsize) == 0); + bno = del->br_startblock; + len = del->br_blockcount; + do_div(bno, mp->m_sb.sb_rextsize); + do_div(len, mp->m_sb.sb_rextsize); + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) + goto done; + do_fx = 0; + nblks = len * mp->m_sb.sb_rextsize; + qfield = XFS_TRANS_DQ_RTBCOUNT; } /* - * Is the last block of this extent before the range - * we're supposed to delete? If so, we're done. + * Ordinary allocation. */ - bno = XFS_FILEOFF_MIN(bno, - got.br_startoff + got.br_blockcount - 1); - if (bno < start) - break; + else { + do_fx = 1; + nblks = del->br_blockcount; + qfield = XFS_TRANS_DQ_BCOUNT; + } /* - * Then deal with the (possibly delayed) allocated space - * we found. + * Set up del_endblock and cur for later. */ - ASSERT(ep != NULL); - del = got; - wasdel = isnullstartblock(del.br_startblock); - if (got.br_startoff < start) { - del.br_startoff = start; - del.br_blockcount -= start - got.br_startoff; - if (!wasdel) - del.br_startblock += start - got.br_startoff; - } - if (del.br_startoff + del.br_blockcount > bno + 1) - del.br_blockcount = bno + 1 - del.br_startoff; - sum = del.br_startblock + del.br_blockcount; - if (isrt && - (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { - /* - * Realtime extent not lined up at the end. - * The extent could have been split into written - * and unwritten pieces, or we could just be - * unmapping part of it. But we can't really - * get rid of part of a realtime extent. - */ - if (del.br_state == XFS_EXT_UNWRITTEN || - !xfs_sb_version_hasextflgbit(&mp->m_sb)) { - /* - * This piece is unwritten, or we're not - * using unwritten extents. Skip over it. - */ - ASSERT(bno >= mod); - bno -= mod > del.br_blockcount ? - del.br_blockcount : mod; - if (bno < got.br_startoff) { - if (--lastx >= 0) - xfs_bmbt_get_all(xfs_iext_get_ext( - ifp, lastx), &got); - } - continue; - } - /* - * It's written, turn it unwritten. - * This is better than zeroing it. - */ - ASSERT(del.br_state == XFS_EXT_NORM); - ASSERT(xfs_trans_get_block_res(tp) > 0); - /* - * If this spans a realtime extent boundary, - * chop it back to the start of the one we end at. - */ - if (del.br_blockcount > mod) { - del.br_startoff += del.br_blockcount - mod; - del.br_startblock += del.br_blockcount - mod; - del.br_blockcount = mod; - } - del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent_unwritten_real(tp, ip, - &lastx, &cur, &del, firstblock, flist, - &logflags); - if (error) - goto error0; - goto nodelete; + del_endblock = del->br_startblock + del->br_blockcount; + if (cur) { + if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, got.br_blockcount, + &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) { - /* - * Realtime extent is lined up at the end but not - * at the front. We'll get rid of full extents if - * we can. - */ - mod = mp->m_sb.sb_rextsize - mod; - if (del.br_blockcount > mod) { - del.br_blockcount -= mod; - del.br_startoff += mod; - del.br_startblock += mod; - } else if ((del.br_startoff == start && - (del.br_state == XFS_EXT_UNWRITTEN || - xfs_trans_get_block_res(tp) == 0)) || - !xfs_sb_version_hasextflgbit(&mp->m_sb)) { - /* - * Can't make it unwritten. There isn't - * a full extent here so just skip it. - */ - ASSERT(bno >= del.br_blockcount); - bno -= del.br_blockcount; - if (got.br_startoff > bno) { - if (--lastx >= 0) { - ep = xfs_iext_get_ext(ifp, - lastx); - xfs_bmbt_get_all(ep, &got); - } - } - continue; - } else if (del.br_state == XFS_EXT_UNWRITTEN) { - /* - * This one is already unwritten. - * It must have a written left neighbor. - * Unwrite the killed part of that one and - * try again. - */ - ASSERT(lastx > 0); - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, - lastx - 1), &prev); - ASSERT(prev.br_state == XFS_EXT_NORM); - ASSERT(!isnullstartblock(prev.br_startblock)); - ASSERT(del.br_startblock == - prev.br_startblock + prev.br_blockcount); - if (prev.br_startoff < start) { - mod = start - prev.br_startoff; - prev.br_blockcount -= mod; - prev.br_startblock += mod; - prev.br_startoff = start; - } - prev.br_state = XFS_EXT_UNWRITTEN; - lastx--; - error = xfs_bmap_add_extent_unwritten_real(tp, - ip, &lastx, &cur, &prev, - firstblock, flist, &logflags); - if (error) - goto error0; - goto nodelete; - } else { - ASSERT(del.br_state == XFS_EXT_NORM); - del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent_unwritten_real(tp, - ip, &lastx, &cur, &del, - firstblock, flist, &logflags); - if (error) - goto error0; - goto nodelete; - } + da_old = da_new = 0; + } else { + da_old = startblockval(got.br_startblock); + da_new = 0; + nblks = 0; + do_fx = 0; + } + /* + * Set flag value to use in switch statement. + * Left-contig is 2, right-contig is 1. + */ + switch (((got.br_startoff == del->br_startoff) << 1) | + (got_endoff == del_endoff)) { + case 3: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, *idx, 1, + whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); + --*idx; + if (delay) + break; + + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + flags |= XFS_ILOG_CORE; + if (!cur) { + flags |= xfs_ilog_fext(whichfork); + break; } - if (wasdel) { - ASSERT(startblockval(del.br_startblock) > 0); - /* Update realtime/data freespace, unreserve quota */ - if (isrt) { - xfs_filblks_t rtexts; + if ((error = xfs_btree_delete(cur, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + break; - rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); - do_div(rtexts, mp->m_sb.sb_rextsize); - xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - (int64_t)rtexts, 0); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, - XFS_QMOPT_RES_RTBLKS); - } else { - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)del.br_blockcount, 0); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, - XFS_QMOPT_RES_REGBLKS); - } - ip->i_delayed_blks -= del.br_blockcount; - if (cur) - cur->bc_private.b.flags |= - XFS_BTCUR_BPRV_WASDEL; - } else if (cur) - cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; + case 2: /* - * If it's the case where the directory code is running - * with no block reservation, and the deleted block is in - * the middle of its extent, and the resulting insert - * of an extent would cause transformation to btree format, - * then reject it. The calling code will then swap - * blocks around instead. - * We have to do this now, rather than waiting for the - * conversion to btree format, since the transaction - * will be dirty. + * Deleting the first part of the extent. */ - if (!wasdel && xfs_trans_get_block_res(tp) == 0 && - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ - XFS_IFORK_MAXEXT(ip, whichfork) && - del.br_startoff > got.br_startoff && - del.br_startoff + del.br_blockcount < - got.br_startoff + got.br_blockcount) { - error = XFS_ERROR(ENOSPC); - goto error0; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_startoff(ep, del_endoff); + temp = got.br_blockcount - del->br_blockcount; + xfs_bmbt_set_blockcount(ep, temp); + if (delay) { + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_old); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + da_new = temp; + break; } - error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, - &tmp_logflags, whichfork); - logflags |= tmp_logflags; - if (error) - goto error0; - bno = del.br_startoff - 1; -nodelete: + xfs_bmbt_set_startblock(ep, del_endblock); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + if (!cur) { + flags |= xfs_ilog_fext(whichfork); + break; + } + if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, + got.br_blockcount - del->br_blockcount, + got.br_state))) + goto done; + break; + + case 1: /* - * If not done go on to the next (previous) record. + * Deleting the last part of the extent. */ - if (bno != (xfs_fileoff_t)-1 && bno >= start) { - if (lastx >= 0) { - ep = xfs_iext_get_ext(ifp, lastx); - if (xfs_bmbt_get_startoff(ep) > bno) { - if (--lastx >= 0) - ep = xfs_iext_get_ext(ifp, - lastx); + temp = got.br_blockcount - del->br_blockcount; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + if (delay) { + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_old); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + da_new = temp; + break; + } + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + if (!cur) { + flags |= xfs_ilog_fext(whichfork); + break; + } + if ((error = xfs_bmbt_update(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount - del->br_blockcount, + got.br_state))) + goto done; + break; + + case 0: + /* + * Deleting the middle of the extent. + */ + temp = del->br_startoff - got.br_startoff; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + new.br_startoff = del_endoff; + temp2 = got_endoff - del_endoff; + new.br_blockcount = temp2; + new.br_state = got.br_state; + if (!delay) { + new.br_startblock = del_endblock; + flags |= XFS_ILOG_CORE; + if (cur) { + if ((error = xfs_bmbt_update(cur, + got.br_startoff, + got.br_startblock, temp, + got.br_state))) + goto done; + if ((error = xfs_btree_increment(cur, 0, &i))) + goto done; + cur->bc_rec.b = new; + error = xfs_btree_insert(cur, &i); + if (error && error != ENOSPC) + goto done; + /* + * If get no-space back from btree insert, + * it tried a split, and we have a zero + * block reservation. + * Fix up our state and return the error. + */ + if (error == ENOSPC) { + /* + * Reset the cursor, don't trust + * it after any insert operation. + */ + if ((error = xfs_bmbt_lookup_eq(cur, + got.br_startoff, + got.br_startblock, + temp, &i))) + goto done; + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + /* + * Update the btree record back + * to the original value. + */ + if ((error = xfs_bmbt_update(cur, + got.br_startoff, + got.br_startblock, + got.br_blockcount, + got.br_state))) + goto done; + /* + * Reset the extent record back + * to the original value. + */ + xfs_bmbt_set_blockcount(ep, + got.br_blockcount); + flags = 0; + error = XFS_ERROR(ENOSPC); + goto done; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + } else + flags |= xfs_ilog_fext(whichfork); + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + } else { + ASSERT(whichfork == XFS_DATA_FORK); + temp = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + temp2 = xfs_bmap_worst_indlen(ip, temp2); + new.br_startblock = nullstartblock((int)temp2); + da_new = temp + temp2; + while (da_new > da_old) { + if (temp) { + temp--; + da_new--; + xfs_bmbt_set_startblock(ep, + nullstartblock((int)temp)); + } + if (da_new == da_old) + break; + if (temp2) { + temp2--; + da_new--; + new.br_startblock = + nullstartblock((int)temp2); } - xfs_bmbt_get_all(ep, &got); } - extno++; } - } - *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; - - /* - * Convert to a btree if necessary. - */ - if (xfs_bmap_needs_btree(ip, whichfork)) { - ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, - &cur, 0, &tmp_logflags, whichfork); - logflags |= tmp_logflags; - if (error) - goto error0; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_insert(ip, *idx + 1, 1, &new, state); + ++*idx; + break; } /* - * transform from btree to extents, give it cur + * If we need to, add to list of extents to delete. */ - else if (xfs_bmap_wants_extents(ip, whichfork)) { - ASSERT(cur != NULL); - error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, - whichfork); - logflags |= tmp_logflags; - if (error) - goto error0; - } + if (do_fx) + xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist, + mp); /* - * transform from extents to local? + * Adjust inode # blocks in the file. */ - error = 0; -error0: + if (nblks) + ip->i_d.di_nblocks -= nblks; /* - * Log everything. Do this after conversion, there's no point in - * logging the extent records if we've converted to btree format. + * Adjust quota data. */ - if ((logflags & xfs_ilog_fext(whichfork)) && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - logflags &= ~xfs_ilog_fext(whichfork); - else if ((logflags & xfs_ilog_fbroot(whichfork)) && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) - logflags &= ~xfs_ilog_fbroot(whichfork); + if (qfield) + xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); + /* - * Log inode even in the error case, if the transaction - * is dirty we'll need to shut down the filesystem. + * Account for change in delayed indirect blocks. + * Nothing to do for disk quota accounting here. */ - if (logflags) - xfs_trans_log_inode(tp, ip, logflags); - if (cur) { - if (!error) { - *firstblock = cur->bc_private.b.firstblock; - cur->bc_private.b.allocated = 0; - } - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + ASSERT(da_old >= da_new); + if (da_old > da_new) { + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + (int64_t)(da_old - da_new), 0); } +done: + *logflagsp = flags; return error; } -#ifdef DEBUG -STATIC struct xfs_buf * -xfs_bmap_get_bp( - struct xfs_btree_cur *cur, - xfs_fsblock_t bno) -{ - struct xfs_log_item_desc *lidp; - int i; - - if (!cur) - return NULL; - - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { - if (!cur->bc_bufs[i]) - break; - if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) - return cur->bc_bufs[i]; - } - - /* Chase down all the log items to see if the bp is there */ - list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) { - struct xfs_buf_log_item *bip; - bip = (struct xfs_buf_log_item *)lidp->lid_item; - if (bip->bli_item.li_type == XFS_LI_BUF && - XFS_BUF_ADDR(bip->bli_buf) == bno) - return bip->bli_buf; - } - - return NULL; -} - -STATIC void -xfs_check_block( - struct xfs_btree_block *block, - xfs_mount_t *mp, - int root, - short sz) -{ - int i, j, dmxr; - __be64 *pp, *thispa; /* pointer to block address */ - xfs_bmbt_key_t *prevp, *keyp; - - ASSERT(be16_to_cpu(block->bb_level) > 0); - - prevp = NULL; - for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { - dmxr = mp->m_bmap_dmxr[0]; - keyp = XFS_BMBT_KEY_ADDR(mp, block, i); - - if (prevp) { - ASSERT(be64_to_cpu(prevp->br_startoff) < - be64_to_cpu(keyp->br_startoff)); - } - prevp = keyp; - - /* - * Compare the block numbers to see if there are dups. - */ - if (root) - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); - else - pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); - - for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { - if (root) - thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); - else - thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); - if (*thispa == *pp) { - xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", - __func__, j, i, - (unsigned long long)be64_to_cpu(*thispa)); - panic("%s: ptrs are equal in node\n", - __func__); - } - } - } -} - /* - * Check that the extents for the inode ip are in the right order in all - * btree leaves. + * Unmap (remove) blocks from a file. + * If nexts is nonzero then the number of extents to remove is limited to + * that value. If not all extents in the block range can be removed then + * *done is set. */ - -STATIC void -xfs_bmap_check_leaf_extents( - xfs_btree_cur_t *cur, /* btree cursor or null */ - xfs_inode_t *ip, /* incore inode pointer */ - int whichfork) /* data or attr fork */ +int /* error */ +xfs_bunmapi( + xfs_trans_t *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting offset to unmap */ + xfs_filblks_t len, /* length to unmap in file */ + int flags, /* misc flags */ + xfs_extnum_t nexts, /* number of extents max */ + xfs_fsblock_t *firstblock, /* first allocated block + controls a.g. for allocs */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + int *done) /* set if not done yet */ { - struct xfs_btree_block *block; /* current btree block */ - xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ - int error; /* error return value */ - xfs_extnum_t i=0, j; /* index into the extents list */ - xfs_ifork_t *ifp; /* fork structure */ - int level; /* btree level, for checking */ - xfs_mount_t *mp; /* file system mount structure */ - __be64 *pp; /* pointer to block address */ - xfs_bmbt_rec_t *ep; /* pointer to current extent */ - xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ - xfs_bmbt_rec_t *nextp; /* pointer to next extent */ - int bp_release = 0; + xfs_btree_cur_t *cur; /* bmap btree cursor */ + xfs_bmbt_irec_t del; /* extent being deleted */ + int eof; /* is deleting at eof */ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ + int error; /* error return value */ + xfs_extnum_t extno; /* extent number in list */ + xfs_bmbt_irec_t got; /* current extent record */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int isrt; /* freeing in rt area */ + xfs_extnum_t lastx; /* last extent index used */ + int logflags; /* transaction logging flags */ + xfs_extlen_t mod; /* rt extent offset */ + xfs_mount_t *mp; /* mount structure */ + xfs_extnum_t nextents; /* number of file extents */ + xfs_bmbt_irec_t prev; /* previous extent record */ + xfs_fileoff_t start; /* first file offset deleted */ + int tmp_logflags; /* partial logging flags */ + int wasdel; /* was a delayed alloc extent */ + int whichfork; /* data or attribute fork */ + xfs_fsblock_t sum; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { - return; - } + trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); - bno = NULLFSBLOCK; - mp = ip->i_mount; + whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; ifp = XFS_IFORK_PTR(ip, whichfork); - block = ifp->if_broot; - /* - * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. - */ - level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); - xfs_check_block(block, mp, 1, ifp->if_broot_bytes); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); - bno = be64_to_cpu(*pp); - - ASSERT(bno != NULLDFSBNO); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); - - /* - * Go down the tree until leaf level is reached, following the first - * pointer (leftmost) at each level. - */ - while (level-- > 0) { - /* See if buf is in cur first */ - bp_release = 0; - bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); - if (!bp) { - bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - goto error_norelse; - } - block = XFS_BUF_TO_BLOCK(bp); - XFS_WANT_CORRUPTED_GOTO( - xfs_bmap_sanity_check(mp, bp, level), - error0); - if (level == 0) - break; + if (unlikely( + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { + XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, + ip->i_mount); + return XFS_ERROR(EFSCORRUPTED); + } + mp = ip->i_mount; + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); - /* - * Check this block for basic sanity (increasing keys and - * no duplicate blocks). - */ + ASSERT(len > 0); + ASSERT(nexts >= 0); - xfs_check_block(block, mp, 0, 0); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); - bno = be64_to_cpu(*pp); - XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); - if (bp_release) { - bp_release = 0; - xfs_trans_brelse(NULL, bp); - } + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *done = 1; + return 0; } + XFS_STATS_INC(xs_blk_unmap); + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); + start = bno; + bno = start + len - 1; + ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, + &prev); /* - * Here with bp and block set to the leftmost leaf node in the tree. - */ - i = 0; - - /* - * Loop over all leaf nodes checking that all extents are in the right order. + * Check to see if the given block number is past the end of the + * file, back up to the last block if so... */ - for (;;) { - xfs_fsblock_t nextbno; - xfs_extnum_t num_recs; - - - num_recs = xfs_btree_get_numrecs(block); + if (eof) { + ep = xfs_iext_get_ext(ifp, --lastx); + xfs_bmbt_get_all(ep, &got); + bno = got.br_startoff + got.br_blockcount - 1; + } + logflags = 0; + if (ifp->if_flags & XFS_IFBROOT) { + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = 0; + } else + cur = NULL; + if (isrt) { /* - * Read-ahead the next leaf block, if any. + * Synchronize by locking the bitmap inode. */ + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); + } - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - + extno = 0; + while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && + (nexts == 0 || extno < nexts)) { /* - * Check all the extents to make sure they are OK. - * If we had a previous block, the last entry should - * conform with the first entry in this one. + * Is the found extent after a hole in which bno lives? + * Just back up to the previous extent, if so. */ - - ep = XFS_BMBT_REC_ADDR(mp, block, 1); - if (i) { - ASSERT(xfs_bmbt_disk_get_startoff(&last) + - xfs_bmbt_disk_get_blockcount(&last) <= - xfs_bmbt_disk_get_startoff(ep)); + if (got.br_startoff > bno) { + if (--lastx < 0) + break; + ep = xfs_iext_get_ext(ifp, lastx); + xfs_bmbt_get_all(ep, &got); } - for (j = 1; j < num_recs; j++) { - nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); - ASSERT(xfs_bmbt_disk_get_startoff(ep) + - xfs_bmbt_disk_get_blockcount(ep) <= - xfs_bmbt_disk_get_startoff(nextp)); - ep = nextp; + /* + * Is the last block of this extent before the range + * we're supposed to delete? If so, we're done. + */ + bno = XFS_FILEOFF_MIN(bno, + got.br_startoff + got.br_blockcount - 1); + if (bno < start) + break; + /* + * Then deal with the (possibly delayed) allocated space + * we found. + */ + ASSERT(ep != NULL); + del = got; + wasdel = isnullstartblock(del.br_startblock); + if (got.br_startoff < start) { + del.br_startoff = start; + del.br_blockcount -= start - got.br_startoff; + if (!wasdel) + del.br_startblock += start - got.br_startoff; + } + if (del.br_startoff + del.br_blockcount > bno + 1) + del.br_blockcount = bno + 1 - del.br_startoff; + sum = del.br_startblock + del.br_blockcount; + if (isrt && + (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { + /* + * Realtime extent not lined up at the end. + * The extent could have been split into written + * and unwritten pieces, or we could just be + * unmapping part of it. But we can't really + * get rid of part of a realtime extent. + */ + if (del.br_state == XFS_EXT_UNWRITTEN || + !xfs_sb_version_hasextflgbit(&mp->m_sb)) { + /* + * This piece is unwritten, or we're not + * using unwritten extents. Skip over it. + */ + ASSERT(bno >= mod); + bno -= mod > del.br_blockcount ? + del.br_blockcount : mod; + if (bno < got.br_startoff) { + if (--lastx >= 0) + xfs_bmbt_get_all(xfs_iext_get_ext( + ifp, lastx), &got); + } + continue; + } + /* + * It's written, turn it unwritten. + * This is better than zeroing it. + */ + ASSERT(del.br_state == XFS_EXT_NORM); + ASSERT(xfs_trans_get_block_res(tp) > 0); + /* + * If this spans a realtime extent boundary, + * chop it back to the start of the one we end at. + */ + if (del.br_blockcount > mod) { + del.br_startoff += del.br_blockcount - mod; + del.br_startblock += del.br_blockcount - mod; + del.br_blockcount = mod; + } + del.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent_unwritten_real(tp, ip, + &lastx, &cur, &del, firstblock, flist, + &logflags); + if (error) + goto error0; + goto nodelete; } + if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) { + /* + * Realtime extent is lined up at the end but not + * at the front. We'll get rid of full extents if + * we can. + */ + mod = mp->m_sb.sb_rextsize - mod; + if (del.br_blockcount > mod) { + del.br_blockcount -= mod; + del.br_startoff += mod; + del.br_startblock += mod; + } else if ((del.br_startoff == start && + (del.br_state == XFS_EXT_UNWRITTEN || + xfs_trans_get_block_res(tp) == 0)) || + !xfs_sb_version_hasextflgbit(&mp->m_sb)) { + /* + * Can't make it unwritten. There isn't + * a full extent here so just skip it. + */ + ASSERT(bno >= del.br_blockcount); + bno -= del.br_blockcount; + if (got.br_startoff > bno) { + if (--lastx >= 0) { + ep = xfs_iext_get_ext(ifp, + lastx); + xfs_bmbt_get_all(ep, &got); + } + } + continue; + } else if (del.br_state == XFS_EXT_UNWRITTEN) { + /* + * This one is already unwritten. + * It must have a written left neighbor. + * Unwrite the killed part of that one and + * try again. + */ + ASSERT(lastx > 0); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, + lastx - 1), &prev); + ASSERT(prev.br_state == XFS_EXT_NORM); + ASSERT(!isnullstartblock(prev.br_startblock)); + ASSERT(del.br_startblock == + prev.br_startblock + prev.br_blockcount); + if (prev.br_startoff < start) { + mod = start - prev.br_startoff; + prev.br_blockcount -= mod; + prev.br_startblock += mod; + prev.br_startoff = start; + } + prev.br_state = XFS_EXT_UNWRITTEN; + lastx--; + error = xfs_bmap_add_extent_unwritten_real(tp, + ip, &lastx, &cur, &prev, + firstblock, flist, &logflags); + if (error) + goto error0; + goto nodelete; + } else { + ASSERT(del.br_state == XFS_EXT_NORM); + del.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent_unwritten_real(tp, + ip, &lastx, &cur, &del, + firstblock, flist, &logflags); + if (error) + goto error0; + goto nodelete; + } + } + if (wasdel) { + ASSERT(startblockval(del.br_startblock) > 0); + /* Update realtime/data freespace, unreserve quota */ + if (isrt) { + xfs_filblks_t rtexts; - last = *ep; - i += num_recs; - if (bp_release) { - bp_release = 0; - xfs_trans_brelse(NULL, bp); + rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, + (int64_t)rtexts, 0); + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, + XFS_QMOPT_RES_RTBLKS); + } else { + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + (int64_t)del.br_blockcount, 0); + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, + XFS_QMOPT_RES_REGBLKS); + } + ip->i_delayed_blks -= del.br_blockcount; + if (cur) + cur->bc_private.b.flags |= + XFS_BTCUR_BPRV_WASDEL; + } else if (cur) + cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; + /* + * If it's the case where the directory code is running + * with no block reservation, and the deleted block is in + * the middle of its extent, and the resulting insert + * of an extent would cause transformation to btree format, + * then reject it. The calling code will then swap + * blocks around instead. + * We have to do this now, rather than waiting for the + * conversion to btree format, since the transaction + * will be dirty. + */ + if (!wasdel && xfs_trans_get_block_res(tp) == 0 && + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ + XFS_IFORK_MAXEXT(ip, whichfork) && + del.br_startoff > got.br_startoff && + del.br_startoff + del.br_blockcount < + got.br_startoff + got.br_blockcount) { + error = XFS_ERROR(ENOSPC); + goto error0; } - bno = nextbno; + error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, + &tmp_logflags, whichfork); + logflags |= tmp_logflags; + if (error) + goto error0; + bno = del.br_startoff - 1; +nodelete: /* - * If we've reached the end, stop. + * If not done go on to the next (previous) record. */ - if (bno == NULLFSBLOCK) - break; - - bp_release = 0; - bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); - if (!bp) { - bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - goto error_norelse; + if (bno != (xfs_fileoff_t)-1 && bno >= start) { + if (lastx >= 0) { + ep = xfs_iext_get_ext(ifp, lastx); + if (xfs_bmbt_get_startoff(ep) > bno) { + if (--lastx >= 0) + ep = xfs_iext_get_ext(ifp, + lastx); + } + xfs_bmbt_get_all(ep, &got); + } + extno++; } - block = XFS_BUF_TO_BLOCK(bp); } - if (bp_release) { - bp_release = 0; - xfs_trans_brelse(NULL, bp); - } - return; - -error0: - xfs_warn(mp, "%s: at error0", __func__); - if (bp_release) - xfs_trans_brelse(NULL, bp); -error_norelse: - xfs_warn(mp, "%s: BAD after btree leaves for %d extents", - __func__, i); - panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); - return; -} -#endif - -/* - * Count fsblocks of the given fork. - */ -int /* error */ -xfs_bmap_count_blocks( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - int whichfork, /* data or attr fork */ - int *count) /* out: count of blocks */ -{ - struct xfs_btree_block *block; /* current btree block */ - xfs_fsblock_t bno; /* block # of "block" */ - xfs_ifork_t *ifp; /* fork structure */ - int level; /* btree level, for checking */ - xfs_mount_t *mp; /* file system mount structure */ - __be64 *pp; /* pointer to block address */ + *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; - bno = NULLFSBLOCK; - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { - xfs_bmap_count_leaves(ifp, 0, - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), - count); - return 0; + /* + * Convert to a btree if necessary. + */ + if (xfs_bmap_needs_btree(ip, whichfork)) { + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, + &cur, 0, &tmp_logflags, whichfork); + logflags |= tmp_logflags; + if (error) + goto error0; } - /* - * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. + * transform from btree to extents, give it cur */ - block = ifp->if_broot; - level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); - bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); - - if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { - XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, - mp); - return XFS_ERROR(EFSCORRUPTED); + else if (xfs_bmap_wants_extents(ip, whichfork)) { + ASSERT(cur != NULL); + error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, + whichfork); + logflags |= tmp_logflags; + if (error) + goto error0; } - - return 0; -} - -/* - * Recursively walks each level of a btree - * to count total fsblocks is use. - */ -STATIC int /* error */ -xfs_bmap_count_tree( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fsblock_t blockno, /* file system block number */ - int levelin, /* level in btree */ - int *count) /* Count of blocks */ -{ - int error; - xfs_buf_t *bp, *nbp; - int level = levelin; - __be64 *pp; - xfs_fsblock_t bno = blockno; - xfs_fsblock_t nextbno; - struct xfs_btree_block *block, *nextblock; - int numrecs; - - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - return error; - *count += 1; - block = XFS_BUF_TO_BLOCK(bp); - - if (--level) { - /* Not at node above leaves, count this level of nodes */ - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - while (nextbno != NULLFSBLOCK) { - error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - return error; - *count += 1; - nextblock = XFS_BUF_TO_BLOCK(nbp); - nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); - xfs_trans_brelse(tp, nbp); - } - - /* Dive to the next level */ - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); - bno = be64_to_cpu(*pp); - if (unlikely((error = - xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { - xfs_trans_brelse(tp, bp); - XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", - XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } - xfs_trans_brelse(tp, bp); - } else { - /* count all level 1 nodes and their leaves */ - for (;;) { - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - numrecs = be16_to_cpu(block->bb_numrecs); - xfs_bmap_disk_count_leaves(mp, block, numrecs, count); - xfs_trans_brelse(tp, bp); - if (nextbno == NULLFSBLOCK) - break; - bno = nextbno; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - return error; - *count += 1; - block = XFS_BUF_TO_BLOCK(bp); + /* + * transform from extents to local? + */ + error = 0; +error0: + /* + * Log everything. Do this after conversion, there's no point in + * logging the extent records if we've converted to btree format. + */ + if ((logflags & xfs_ilog_fext(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + logflags &= ~xfs_ilog_fext(whichfork); + else if ((logflags & xfs_ilog_fbroot(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) + logflags &= ~xfs_ilog_fbroot(whichfork); + /* + * Log inode even in the error case, if the transaction + * is dirty we'll need to shut down the filesystem. + */ + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + if (cur) { + if (!error) { + *firstblock = cur->bc_private.b.firstblock; + cur->bc_private.b.allocated = 0; } + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); } - return 0; -} - -/* - * Count leaf blocks given a range of extent records. - */ -STATIC void -xfs_bmap_count_leaves( - xfs_ifork_t *ifp, - xfs_extnum_t idx, - int numrecs, - int *count) -{ - int b; - - for (b = 0; b < numrecs; b++) { - xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); - *count += xfs_bmbt_get_blockcount(frp); - } -} - -/* - * Count leaf blocks given a range of extent records originally - * in btree format. - */ -STATIC void -xfs_bmap_disk_count_leaves( - struct xfs_mount *mp, - struct xfs_btree_block *block, - int numrecs, - int *count) -{ - int b; - xfs_bmbt_rec_t *frp; - - for (b = 1; b <= numrecs; b++) { - frp = XFS_BMBT_REC_ADDR(mp, block, b); - *count += xfs_bmbt_disk_get_blockcount(frp); - } -} - -/* - * Convert the given file system block to a disk block. We have to treat it - * differently based on whether the file is a real time file or not, because the - * bmap code does. - */ -xfs_daddr_t -xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) -{ - return (XFS_IS_REALTIME_INODE(ip) ? \ - (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ - XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); + return error; } diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c index 7397faa8e..2a99dea1c 100644 --- a/libxfs/xfs_dir2_block.c +++ b/libxfs/xfs_dir2_block.c @@ -339,10 +339,12 @@ xfs_dir2_block_addname( /* * If need to compact the leaf entries, do it now. */ - if (compact) + if (compact) { xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, &lfloghigh, &lfloglow); - else if (btp->stale) { + /* recalculate blp post-compaction */ + blp = xfs_dir2_block_leaf_p(btp); + } else if (btp->stale) { /* * Set leaf logging boundaries to impossible state. * For the no-stale case they're set explicitly. diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c index 529d92d47..f0322c9e7 100644 --- a/libxfs/xfs_ialloc.c +++ b/libxfs/xfs_ialloc.c @@ -260,6 +260,8 @@ xfs_ialloc_ag_alloc( (args.agbno < be32_to_cpu(agi->agi_length)))) { args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); args.type = XFS_ALLOCTYPE_THIS_BNO; + args.mod = args.total = args.wasdel = args.isfl = + args.userdata = args.minalignslop = 0; args.prod = 1; /* @@ -312,6 +314,8 @@ xfs_ialloc_ag_alloc( * Allocate a fixed-size extent of inodes. */ args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.mod = args.total = args.wasdel = args.isfl = + args.userdata = args.minalignslop = 0; args.prod = 1; /* * Allow space for the inode btree to split. @@ -350,7 +354,7 @@ xfs_ialloc_ag_alloc( * number from being easily guessable. */ error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, - args.len, random32()); + args.len, prandom_u32()); if (error) return error; diff --git a/libxfs/xfs_inode.c b/libxfs/xfs_inode.c index 2970f46d0..3cf24237b 100644 --- a/libxfs/xfs_inode.c +++ b/libxfs/xfs_inode.c @@ -1112,6 +1112,7 @@ xfs_iflush_fork( char *cp; xfs_ifork_t *ifp; xfs_mount_t *mp; + static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = diff --git a/libxfs/xfs_mount.c b/libxfs/xfs_mount.c index a9155b39b..b7514fbc8 100644 --- a/libxfs/xfs_mount.c +++ b/libxfs/xfs_mount.c @@ -365,7 +365,7 @@ xfs_sb_quiet_read_verify( return; } /* quietly fail */ - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, EWRONGFS); } static void diff --git a/repair/dinode.c b/repair/dinode.c index 874ac474f..1906ceb9d 100644 --- a/repair/dinode.c +++ b/repair/dinode.c @@ -249,7 +249,8 @@ clear_dinode(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num) /* and clear the forks */ if (dirty && !no_modify) - memset(XFS_DFORK_DPTR(dino), 0, XFS_LITINO(mp)); + memset(XFS_DFORK_DPTR(dino), 0, + XFS_LITINO(mp, dino->di_version)); return(dirty); } @@ -1927,11 +1928,12 @@ _("bad attr fork offset %d in dev inode %" PRIu64 ", should be %d\n"), case XFS_DINODE_FMT_LOCAL: /* fall through ... */ case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ case XFS_DINODE_FMT_BTREE: - if (dino->di_forkoff >= (XFS_LITINO(mp) >> 3)) { + if (dino->di_forkoff >= + (XFS_LITINO(mp, dino->di_version) >> 3)) { do_warn( _("bad attr fork offset %d in inode %" PRIu64 ", max=%d\n"), dino->di_forkoff, lino, - XFS_LITINO(mp) >> 3); + XFS_LITINO(mp, dino->di_version) >> 3); return 1; } break; diff --git a/repair/prefetch.c b/repair/prefetch.c index 67026159f..3a8177e65 100644 --- a/repair/prefetch.c +++ b/repair/prefetch.c @@ -373,7 +373,7 @@ pf_read_inode_dirs( continue; if ((dino->di_forkoff != 0) && - (dino->di_forkoff >= (XFS_LITINO(mp) >> 3))) + (dino->di_forkoff >= XFS_LITINO(mp, dino->di_version) >> 3)) continue; switch (dino->di_format) {