From: Dave Chinner Date: Thu, 9 May 2013 12:23:15 +0000 (-0500) Subject: xfsprogs: sync code to current kernel code X-Git-Tag: v3.2.0-alpha1~140 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a2ceac1f6dd8771fc1cd993a6cffc30589af1ddc;p=thirdparty%2Fxfsprogs-dev.git xfsprogs: sync code to current kernel code Update code base to match kernel 3.8-rc2 code base. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Rich Johnston --- diff --git a/db/check.c b/db/check.c index 4493fca75..ff24e339d 100644 --- a/db/check.c +++ b/db/check.c @@ -31,6 +31,7 @@ #include "output.h" #include "init.h" #include "malloc.h" +#include "dir2.h" typedef enum { IS_USER_QUOTA, IS_PROJECT_QUOTA, IS_GROUP_QUOTA, @@ -2212,7 +2213,7 @@ process_data_dir_v2( bf = data->hdr.bestfree; ptr = (char *)data->u; if (be32_to_cpu(block->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, &block->hdr); lep = xfs_dir2_block_leaf_p(btp); endptr = (char *)lep; if (endptr <= ptr || endptr > (char *)btp) { @@ -2792,7 +2793,7 @@ process_inode( break; } if (ic) { - dqprid = xfs_get_projid(idic); /* dquot ID is u32 */ + dqprid = xfs_get_projid(&idic); /* dquot ID is u32 */ quota_add(&dqprid, &idic.di_gid, &idic.di_uid, 0, bc, ic, rc); } @@ -3003,7 +3004,7 @@ process_leaf_node_dir_v2_free( error++; return; } - maxent = XFS_DIR2_MAX_FREE_BESTS(mp); + maxent = xfs_dir2_free_max_bests(mp); if (be32_to_cpu(free->hdr.firstdb) != xfs_dir2_da_to_db(mp, dabno - mp->m_dirfreeblk) * maxent) { if (!sflag || v) @@ -3415,10 +3416,10 @@ process_sf_dir_v2( if (v) dbprintf(_("dir %lld entry . %lld\n"), id->ino, id->ino); (*dot)++; - sfe = xfs_dir2_sf_firstentry(sf); + sfe = xfs_dir2_sf_firstentry(&sf->hdr); offset = XFS_DIR2_DATA_FIRST_OFFSET; for (i = sf->hdr.count - 1, i8 = 0; i >= 0; i--) { - if ((__psint_t)sfe + xfs_dir2_sf_entsize_byentry(sf, sfe) - + if ((__psint_t)sfe + xfs_dir2_sf_entsize(&sf->hdr,sfe->namelen) - (__psint_t)sf > be64_to_cpu(dip->di_size)) { if (!sflag) dbprintf(_("dir %llu bad size in entry at %d\n"), @@ -3427,7 +3428,7 @@ process_sf_dir_v2( error++; break; } - lino = xfs_dir2_sf_get_inumber(sf, xfs_dir2_sf_inumberp(sfe)); + lino = xfs_dir2_sfe_get_ino(&sf->hdr, sfe); if (lino > XFS_DIR2_MAX_SHORT_INUM) i8++; cid = find_inode(lino, 1); @@ -3457,8 +3458,8 @@ process_sf_dir_v2( } offset = xfs_dir2_sf_get_offset(sfe) + - xfs_dir2_data_entsize(sfe->namelen); - sfe = xfs_dir2_sf_nextentry(sf, sfe); + xfs_dir2_sf_entsize(&sf->hdr, sfe->namelen); + sfe = xfs_dir2_sf_nextentry(&sf->hdr, sfe); } if (i < 0 && (__psint_t)sfe - (__psint_t)sf != be64_to_cpu(dip->di_size)) { @@ -3474,7 +3475,7 @@ process_sf_dir_v2( dbprintf(_("dir %llu offsets too high\n"), id->ino); error++; } - lino = xfs_dir2_sf_get_inumber(sf, &sf->hdr.parent); + lino = xfs_dir2_sf_get_parent_ino(&sf->hdr); if (lino > XFS_DIR2_MAX_SHORT_INUM) i8++; cid = find_inode(lino, 1); diff --git a/db/dir2.c b/db/dir2.c index 0b8b99059..a539f2d08 100644 --- a/db/dir2.c +++ b/db/dir2.c @@ -215,7 +215,7 @@ dir2_block_leaf_count( block = obj; if (be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC) return 0; - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, &block->hdr); return be32_to_cpu(btp->count); } @@ -233,7 +233,7 @@ dir2_block_leaf_offset( ASSERT(startoff == 0); block = obj; ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, &block->hdr); lep = xfs_dir2_block_leaf_p(btp) + idx; return bitize((int)((char *)lep - (char *)block)); } @@ -265,7 +265,7 @@ dir2_block_tail_offset( ASSERT(idx == 0); block = obj; ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, &block->hdr); return bitize((int)((char *)btp - (char *)block)); } @@ -287,7 +287,7 @@ dir2_block_u_count( block = obj; if (be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC) return 0; - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, &block->hdr); ptr = (char *)block->u; endptr = (char *)xfs_dir2_block_leaf_p(btp); for (i = 0; ptr < endptr; i++) { @@ -320,7 +320,7 @@ dir2_block_u_offset( ASSERT(startoff == 0); block = obj; ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, &block->hdr); ptr = (char *)block->u; endptr = (char *)xfs_dir2_block_leaf_p(btp); for (i = 0; i < idx; i++) { diff --git a/db/dir2.h b/db/dir2.h index 0a8467aa9..a5f0bec97 100644 --- a/db/dir2.h +++ b/db/dir2.h @@ -31,5 +31,35 @@ extern const field_t da_blkinfo_flds[]; extern const field_t da_node_entry_flds[]; extern const field_t da_node_hdr_flds[]; +/* + * generic dir2 structures used by xfs_db + */ +typedef union { + xfs_dir2_data_entry_t entry; + xfs_dir2_data_unused_t unused; +} xfs_dir2_data_union_t; + +typedef struct xfs_dir2_data { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */ + xfs_dir2_data_union_t u[1]; +} xfs_dir2_data_t; + +typedef struct xfs_dir2_block { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_union_t u[1]; + xfs_dir2_leaf_entry_t leaf[1]; + xfs_dir2_block_tail_t tail; +} xfs_dir2_block_t; + +typedef struct xfs_dir2_sf { + xfs_dir2_sf_hdr_t hdr; /* shortform header */ + xfs_dir2_sf_entry_t list[1]; /* shortform entries */ +} xfs_dir2_sf_t; + +static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep) +{ + return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]; +} + extern int dir2_data_union_size(void *obj, int startoff, int idx); extern int dir2_size(void *obj, int startoff, int idx); diff --git a/db/dir2sf.c b/db/dir2sf.c index b2db08820..92f8a66cf 100644 --- a/db/dir2sf.c +++ b/db/dir2sf.c @@ -22,6 +22,7 @@ #include "fprint.h" #include "field.h" #include "bit.h" +#include "dir2.h" #include "dir2sf.h" static int dir2_inou_i4_count(void *obj, int startoff); @@ -149,10 +150,10 @@ dir2_sf_entry_size( ASSERT(bitoffs(startoff) == 0); sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff)); - e = xfs_dir2_sf_firstentry(sf); + e = xfs_dir2_sf_firstentry(&sf->hdr); for (i = 0; i < idx; i++) - e = xfs_dir2_sf_nextentry(sf, e); - return bitize((int)xfs_dir2_sf_entsize_byentry(sf, e)); + e = xfs_dir2_sf_nextentry(&sf->hdr, e); + return bitize((int)xfs_dir2_sf_entsize(&sf->hdr, e->namelen)); } /*ARGSUSED*/ @@ -194,9 +195,9 @@ dir2_sf_list_offset( ASSERT(bitoffs(startoff) == 0); sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff)); - e = xfs_dir2_sf_firstentry(sf); + e = xfs_dir2_sf_firstentry(&sf->hdr); for (i = 0; i < idx; i++) - e = xfs_dir2_sf_nextentry(sf, e); + e = xfs_dir2_sf_nextentry(&sf->hdr, e); return bitize((int)((char *)e - (char *)sf)); } @@ -214,8 +215,8 @@ dir2sf_size( ASSERT(bitoffs(startoff) == 0); ASSERT(idx == 0); sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff)); - e = xfs_dir2_sf_firstentry(sf); + e = xfs_dir2_sf_firstentry(&sf->hdr); for (i = 0; i < sf->hdr.count; i++) - e = xfs_dir2_sf_nextentry(sf, e); + e = xfs_dir2_sf_nextentry(&sf->hdr, e); return bitize((int)((char *)e - (char *)sf)); } diff --git a/db/metadump.c b/db/metadump.c index 9f15d9e57..5739f86bb 100644 --- a/db/metadump.c +++ b/db/metadump.c @@ -26,6 +26,10 @@ #include "init.h" #include "sig.h" #include "xfs_metadump.h" +#include "fprint.h" +#include "faddr.h" +#include "field.h" +#include "dir2.h" #define DEFAULT_MAX_EXT_SIZE 1000 @@ -916,7 +920,7 @@ obfuscate_sf_dir( (long long)cur_ino); } - sfep = xfs_dir2_sf_firstentry(sfp); + sfep = xfs_dir2_sf_firstentry(&sfp->hdr); for (i = 0; (i < sfp->hdr.count) && ((char *)sfep - (char *)sfp < ino_dir_size); i++) { @@ -935,7 +939,7 @@ obfuscate_sf_dir( namelen = ino_dir_size - ((char *)&sfep->name[0] - (char *)sfp); } else if ((char *)sfep - (char *)sfp + - xfs_dir2_sf_entsize_byentry(sfp, sfep) > + xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen) > ino_dir_size) { if (show_warnings) print_warning("entry length in dir inode %llu " @@ -946,12 +950,11 @@ obfuscate_sf_dir( (char *)sfp); } - generate_obfuscated_name(xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)), namelen, - &sfep->name[0]); + generate_obfuscated_name(xfs_dir2_sfe_get_ino(&sfp->hdr, sfep), + namelen, &sfep->name[0]); sfep = (xfs_dir2_sf_entry_t *)((char *)sfep + - xfs_dir2_sf_entsize_byname(sfp, namelen)); + xfs_dir2_sf_entsize(&sfp->hdr, namelen)); } } @@ -1107,9 +1110,10 @@ obfuscate_dir_data_blocks( if (is_block_format) { xfs_dir2_leaf_entry_t *blp; xfs_dir2_block_tail_t *btp; + xfs_dir2_block_t *blk; - btp = xfs_dir2_block_tail_p(mp, - (xfs_dir2_block_t *)block); + blk = (xfs_dir2_block_t *)block; + btp = xfs_dir2_block_tail_p(mp, &blk->hdr); blp = xfs_dir2_block_leaf_p(btp); if ((char *)blp > (char *)btp) blp = (xfs_dir2_leaf_entry_t *)btp; diff --git a/estimate/xfs_estimate.c b/estimate/xfs_estimate.c index 310c1f45a..c574a08a0 100644 --- a/estimate/xfs_estimate.c +++ b/estimate/xfs_estimate.c @@ -18,6 +18,8 @@ /* * Estimate space of an XFS filesystem + * + * XXX: assumes dirv1 format. */ #include #include diff --git a/fsr/xfs_fsr.c b/fsr/xfs_fsr.c index d4ec9a3be..66a357095 100644 --- a/fsr/xfs_fsr.c +++ b/fsr/xfs_fsr.c @@ -16,8 +16,8 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include #include +#include #include #include #include diff --git a/include/Makefile b/include/Makefile index 79db532db..22e8726cf 100644 --- a/include/Makefile +++ b/include/Makefile @@ -24,15 +24,14 @@ QAHFILES = libxfs.h libxlog.h \ xfs_ag.h xfs_alloc.h xfs_alloc_btree.h xfs_arch.h xfs_attr_leaf.h \ xfs_attr_sf.h xfs_bit.h xfs_bmap.h xfs_bmap_btree.h xfs_btree.h \ xfs_btree_trace.h xfs_buf_item.h xfs_da_btree.h xfs_dinode.h \ - xfs_dir2.h xfs_dir2_block.h xfs_dir2_data.h xfs_dir2_leaf.h \ - xfs_dir2_node.h xfs_dir2_sf.h \ + xfs_dir2.h xfs_dir2_format.h \ xfs_extfree_item.h xfs_ialloc.h xfs_ialloc_btree.h \ xfs_inode.h xfs_inode_item.h xfs_inum.h \ xfs_log.h xfs_log_priv.h xfs_log_recover.h xfs_metadump.h \ xfs_mount.h xfs_quota.h xfs_rtalloc.h xfs_sb.h xfs_trace.h \ - xfs_trans.h xfs_trans_space.h xfs_types.h xfs_dfrag.h + xfs_trans.h xfs_trans_space.h xfs_dfrag.h -HFILES = handle.h jdm.h xqm.h xfs.h xfs_fs.h +HFILES = handle.h jdm.h xqm.h xfs.h xfs_fs.h xfs_types.h HFILES += $(PKG_PLATFORM).h PHFILES = darwin.h freebsd.h irix.h linux.h gnukfreebsd.h DKHFILES = volume.h fstyp.h dvh.h diff --git a/include/libxfs.h b/include/libxfs.h index f6c7abcad..0e2fc5fcd 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -33,8 +33,8 @@ #include #include -#include #include +#include #include #include #include @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -135,7 +134,6 @@ extern int libxfs_log_clear (dev_t, xfs_daddr_t, uint, uuid_t *, extern int libxfs_log_header (xfs_caddr_t, uuid_t *, int, int, int, libxfs_get_block_t *, void *); - /* * Define a user-level mount structure with all we need * in order to make use of the numerous XFS_* macros. @@ -199,6 +197,7 @@ typedef struct xfs_mount { xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ } xfs_mount_t; + #define LIBXFS_MOUNT_ROOTINOS 0x0001 #define LIBXFS_MOUNT_DEBUGGER 0x0002 #define LIBXFS_MOUNT_32BITINODES 0x0004 @@ -218,11 +217,32 @@ extern void libxfs_rtmount_destroy (xfs_mount_t *); /* * Simple I/O interface */ +typedef struct xfs_buftarg { + struct xfs_mount *bt_mount; + dev_t dev; +} xfs_buftarg_t; + +#define XB_PAGES 2 + +struct xfs_buf_map { + xfs_daddr_t bm_bn; /* block number for I/O */ + int bm_len; /* size of I/O */ +}; + +#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \ + struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) }; + +struct xfs_buf_ops { + void (*verify_read)(struct xfs_buf *); + void (*verify_write)(struct xfs_buf *); +}; + typedef struct xfs_buf { struct cache_node b_node; unsigned int b_flags; xfs_daddr_t b_blkno; unsigned b_bcount; + unsigned int b_length; dev_t b_dev; pthread_mutex_t b_lock; pthread_t b_holder; @@ -230,8 +250,13 @@ typedef struct xfs_buf { void *b_fsprivate; void *b_fsprivate2; void *b_fsprivate3; - char *b_addr; + void *b_addr; int b_error; + const struct xfs_buf_ops *b_ops; + struct xfs_buftarg *b_target; + struct xfs_perag *b_pag; + struct xfs_buf_map *b_map; + int b_nmaps; #ifdef XFS_BUF_TRACING struct list_head b_lock_list; const char *b_func; @@ -244,10 +269,11 @@ enum xfs_buf_flags_t { /* b_flags bits */ LIBXFS_B_EXIT = 0x0001, /* ==LIBXFS_EXIT_ON_FAILURE */ LIBXFS_B_DIRTY = 0x0002, /* buffer has been modified */ LIBXFS_B_STALE = 0x0004, /* buffer marked as invalid */ - LIBXFS_B_UPTODATE = 0x0008 /* buffer is sync'd to disk */ + LIBXFS_B_UPTODATE = 0x0008, /* buffer is sync'd to disk */ + LIBXFS_B_DISCONTIG = 0x0010, /* discontiguous buffer */ }; -#define XFS_BUF_PTR(bp) ((bp)->b_addr) +#define XFS_BUF_PTR(bp) ((char *)(bp)->b_addr) #define xfs_buf_offset(bp, offset) (XFS_BUF_PTR(bp) + (offset)) #define XFS_BUF_ADDR(bp) ((bp)->b_blkno) #define XFS_BUF_SIZE(bp) ((bp)->b_bcount) @@ -274,6 +300,8 @@ enum xfs_buf_flags_t { /* b_flags bits */ (pri)) #define XFS_BUF_PRIORITY(bp) (cache_node_get_priority( \ (struct cache_node *)(bp))) +#define xfs_buf_set_ref(bp,ref) ((void) 0) +#define xfs_buf_ioerror(bp,err) (bp)->b_error = (err); /* Buffer Cache Interfaces */ @@ -287,23 +315,34 @@ extern struct cache_operations libxfs_bcache_operations; #define libxfs_readbuf(dev, daddr, len, flags) \ libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, \ (dev), (daddr), (len), (flags)) +#define libxfs_readbuf_map(dev, map, nmaps, flags) \ + libxfs_trace_readbuf_map(__FUNCTION__, __FILE__, __LINE__, \ + (dev), (map), (nmaps), (flags)) #define libxfs_writebuf(buf, flags) \ libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, \ (buf), (flags)) #define libxfs_getbuf(dev, daddr, len) \ libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \ (dev), (daddr), (len)) +#define libxfs_getbuf_map(dev, map, nmaps) \ + libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \ + (dev), (map), (nmaps)) #define libxfs_getbuf_flags(dev, daddr, len, flags) \ - libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \ + libxfs_trace_getbuf_flags(__FUNCTION__, __FILE__, __LINE__, \ (dev), (daddr), (len), (flags)) #define libxfs_putbuf(buf) \ libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf)) extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int, int); +extern xfs_buf_t *libxfs_trace_readbuf_map(const char *, const char *, int, + dev_t, struct xfs_buf_map *, int, int); extern int libxfs_trace_writebuf(const char *, const char *, int, xfs_buf_t *, int); -extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int); +extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, + dev_t, xfs_daddr_t, int); +extern xfs_buf_t *libxfs_trace_getbuf_map(const char *, const char *, int, + dev_t, struct xfs_buf_map *, int); extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int, dev_t, xfs_daddr_t, int, unsigned int); extern void libxfs_trace_putbuf (const char *, const char *, int, @@ -312,8 +351,10 @@ extern void libxfs_trace_putbuf (const char *, const char *, int, #else extern xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int); +extern xfs_buf_t *libxfs_readbuf_map(dev_t, struct xfs_buf_map *, int, int); extern int libxfs_writebuf(xfs_buf_t *, int); extern xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int); +extern xfs_buf_t *libxfs_getbuf_map(dev_t, struct xfs_buf_map *, int); extern xfs_buf_t *libxfs_getbuf_flags(dev_t, xfs_daddr_t, int, unsigned int); extern void libxfs_putbuf (xfs_buf_t *); @@ -357,6 +398,7 @@ typedef struct xfs_inode_log_item { xfs_log_item_t ili_item; /* common portion */ struct xfs_inode *ili_inode; /* inode pointer */ unsigned short ili_flags; /* misc flags */ + unsigned int ili_fields; /* fields to be logged */ unsigned int ili_last_fields; /* fields when flushed*/ xfs_inode_log_format_t ili_format; /* logged structure */ int ili_lock_flags; @@ -408,11 +450,50 @@ extern void libxfs_trans_bjoin (xfs_trans_t *, struct xfs_buf *); extern void libxfs_trans_bhold (xfs_trans_t *, struct xfs_buf *); extern void libxfs_trans_log_buf (xfs_trans_t *, struct xfs_buf *, uint, uint); +/* extern xfs_buf_t *libxfs_trans_get_buf (xfs_trans_t *, dev_t, xfs_daddr_t, int, uint); extern int libxfs_trans_read_buf (xfs_mount_t *, xfs_trans_t *, dev_t, xfs_daddr_t, int, uint, struct xfs_buf **); - +*/ + +struct xfs_buf *libxfs_trans_get_buf_map(struct xfs_trans *tp, dev_t dev, + struct xfs_buf_map *map, int nmaps, + uint flags); + +static inline struct xfs_buf * +libxfs_trans_get_buf( + struct xfs_trans *tp, + dev_t dev, + xfs_daddr_t blkno, + int numblks, + uint flags) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return libxfs_trans_get_buf_map(tp, dev, &map, 1, flags); +} + +int libxfs_trans_read_buf_map(struct xfs_mount *mp, + struct xfs_trans *tp, dev_t dev, + struct xfs_buf_map *map, int nmaps, + uint flags, struct xfs_buf **bpp, + const struct xfs_buf_ops *ops); + +static inline int +libxfs_trans_read_buf( + struct xfs_mount *mp, + struct xfs_trans *tp, + dev_t dev, + xfs_daddr_t blkno, + int numblks, + uint flags, + struct xfs_buf **bpp, + const struct xfs_buf_ops *ops) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return libxfs_trans_read_buf_map(mp, tp, dev, &map, 1, + flags, bpp, ops); +} /* * Inode interface @@ -437,6 +518,27 @@ typedef struct xfs_inode { #define LIBXFS_ATTR_CREATE 0x0010 /* create, but fail if attr exists */ #define LIBXFS_ATTR_REPLACE 0x0020 /* set, but fail if attr not exists */ +/* + * Project quota id helpers (previously projid was 16bit only and using two + * 16bit values to hold new 32bit projid was chosen to retain compatibility with + * "old" filesystems). + * + * Copied here from xfs_inode.h because it has to be defined after the struct + * xfs_inode... + */ +static inline prid_t +xfs_get_projid(struct xfs_icdinode *id) +{ + return (prid_t)id->di_projid_hi << 16 | id->di_projid_lo; +} + +static inline void +xfs_set_projid(struct xfs_icdinode *id, prid_t projid) +{ + id->di_projid_hi = (__uint16_t) (projid >> 16); + id->di_projid_lo = (__uint16_t) (projid & 0xffff); +} + typedef struct cred { uid_t cr_uid; gid_t cr_gid; @@ -450,8 +552,6 @@ extern void libxfs_trans_inode_alloc_buf (xfs_trans_t *, xfs_buf_t *); extern void libxfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); extern int libxfs_iflush_int (xfs_inode_t *, xfs_buf_t *); -extern int libxfs_iread (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, - xfs_inode_t *, xfs_daddr_t); /* Inode Cache Interfaces */ extern struct cache *libxfs_icache; @@ -461,13 +561,7 @@ extern int libxfs_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, uint, xfs_inode_t **, xfs_daddr_t); extern void libxfs_iput (xfs_inode_t *, uint); -extern int xfs_imap_to_bp(xfs_mount_t *mp, xfs_trans_t *tp, struct xfs_imap *imap, - xfs_buf_t **bpp, uint buf_flags, uint iget_flags); - -#include -#include -#include -#include +#include /* Shared utility routines */ extern unsigned int libxfs_log2_roundup(unsigned int i); @@ -476,11 +570,6 @@ extern int libxfs_alloc_file_space (xfs_inode_t *, xfs_off_t, xfs_off_t, int, int); extern int libxfs_bmap_finish(xfs_trans_t **, xfs_bmap_free_t *, int *); -extern void libxfs_da_bjoin (xfs_trans_t *, xfs_dabuf_t *); -extern void libxfs_da_bhold (xfs_trans_t *, xfs_dabuf_t *); -extern int libxfs_da_read_bufr(xfs_trans_t *, xfs_inode_t *, xfs_dablk_t, - xfs_daddr_t, xfs_dabuf_t **, int); - extern void libxfs_fs_repair_cmn_err(int, struct xfs_mount *, char *, ...); extern void libxfs_fs_cmn_err(int, struct xfs_mount *, char *, ...); @@ -534,7 +623,8 @@ void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); /* xfs_bmap.h */ #define libxfs_bmap_cancel xfs_bmap_cancel #define libxfs_bmap_last_offset xfs_bmap_last_offset -#define libxfs_bmapi xfs_bmapi +#define libxfs_bmapi_write xfs_bmapi_write +#define libxfs_bmapi_read xfs_bmapi_read #define libxfs_bunmapi xfs_bunmapi /* xfs_bmap_btree.h */ @@ -544,6 +634,7 @@ void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); #define libxfs_da_brelse xfs_da_brelse #define libxfs_da_hashname xfs_da_hashname #define libxfs_da_shrink_inode xfs_da_shrink_inode +#define libxfs_da_read_buf xfs_da_read_buf /* xfs_dir2.h */ #define libxfs_dir_createname xfs_dir_createname @@ -564,6 +655,8 @@ void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); /* xfs_inode.h */ #define libxfs_dinode_from_disk xfs_dinode_from_disk #define libxfs_dinode_to_disk xfs_dinode_to_disk +void xfs_dinode_from_disk(struct xfs_icdinode *, + struct xfs_dinode *); #define libxfs_idata_realloc xfs_idata_realloc #define libxfs_idestroy_fork xfs_idestroy_fork diff --git a/include/linux.h b/include/linux.h index e0bcba28d..5bb91cda0 100644 --- a/include/linux.h +++ b/include/linux.h @@ -27,6 +27,7 @@ #include #include #include +#include static __inline__ int xfsctl(const char *path, int fd, int cmd, void *p) { diff --git a/include/platform_defs.h.in b/include/platform_defs.h.in index 4e1e0c403..217d6c0d9 100644 --- a/include/platform_defs.h.in +++ b/include/platform_defs.h.in @@ -34,6 +34,7 @@ #include #include #include +#include #undef HAVE___U32 #ifdef HAVE___U32 @@ -57,6 +58,10 @@ typedef signed long long int __s64; #define __force #endif +typedef __u16 __bitwise __le16; +typedef __u32 __bitwise __le32; +typedef __u64 __bitwise __le64; + typedef __u16 __bitwise __be16; typedef __u32 __bitwise __be32; typedef __u64 __bitwise __be64; diff --git a/include/project.h b/include/project.h index ea1274aca..328e013bc 100644 --- a/include/project.h +++ b/include/project.h @@ -20,10 +20,6 @@ #include -#if !defined(__sgi__) -typedef __uint32_t prid_t; -#endif - extern int setprojid(const char *__name, int __fd, prid_t __id); extern int getprojid(const char *__name, int __fd, prid_t *__id); diff --git a/include/swab.h b/include/swab.h index 2684aa70e..3de44d8ca 100644 --- a/include/swab.h +++ b/include/swab.h @@ -153,4 +153,42 @@ static __inline__ void __swab64s(__u64 *addr) (__extension__ ({__arch__swab64s(addr);})); } +static inline __uint16_t get_unaligned_be16(void *p) +{ + __uint8_t *__p = p; + return __p[0] << 8 | __p[1]; +} + +static inline __uint32_t get_unaligned_be32(void *p) +{ + __uint8_t *__p = p; + return __p[0] << 24 | __p[1] << 16 | __p[2] << 8 | __p[3]; +} + +static inline __uint64_t get_unaligned_be64(void *p) +{ + return (__uint64_t)get_unaligned_be32(p) << 32 | + get_unaligned_be32(p + 4); +} + +static inline void put_unaligned_be16(__uint16_t val, void *p) +{ + __uint8_t *__p = p; + *__p++ = val >> 8; + *__p++ = val; +} + +static inline void put_unaligned_be32(__uint32_t val, void *p) +{ + __uint8_t *__p = p; + put_unaligned_be16(val >> 16, __p); + put_unaligned_be16(val, __p + 2); +} + +static inline void put_unaligned_be64(__uint64_t val, void *p) +{ + put_unaligned_be32(val >> 32, p); + put_unaligned_be32(val, p + 4); +} + #endif /* SWAB_H */ diff --git a/include/xfs.h b/include/xfs.h index 147a5a0d0..e32c4a8db 100644 --- a/include/xfs.h +++ b/include/xfs.h @@ -34,6 +34,7 @@ #define __XFS_H__ #include +#include #include #endif /* __XFS_H__ */ diff --git a/include/xfs_ag.h b/include/xfs_ag.h index 5adce91b6..f2aeedb6a 100644 --- a/include/xfs_ag.h +++ b/include/xfs_ag.h @@ -103,11 +103,13 @@ typedef struct xfs_agf { /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) -#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); +extern const struct xfs_buf_ops xfs_agf_buf_ops; + /* * Size of the unlinked inode hash table in the agi. */ @@ -156,11 +158,13 @@ typedef struct xfs_agi { /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) -#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); +extern const struct xfs_buf_ops xfs_agi_buf_ops; + /* * The third a.g. block contains the a.g. freelist, an array * of block pointers to blocks owned by the allocation btree code. @@ -168,30 +172,12 @@ extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) #define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t)) -#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) typedef struct xfs_agfl { __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ } xfs_agfl_t; -/* - * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that - * have been freed but whose transactions aren't committed to disk yet. - * - * Note that we use the transaction ID to record the transaction, not the - * transaction structure itself. See xfs_alloc_busy_insert() for details. - */ -struct xfs_busy_extent { -#ifdef __KERNEL__ - struct rb_node rb_node; /* ag by-bno indexed search tree */ -#endif - struct list_head list; /* transaction busy extent list */ - xfs_agnumber_t agno; - xfs_agblock_t bno; - xfs_extlen_t length; - xlog_tid_t tid; /* transaction that created this */ -}; - /* * Per-ag incore structure, copies of information in agf and agi, * to improve the performance of allocation group selection. @@ -251,6 +237,7 @@ typedef struct xfs_perag { #define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup in xfs_inode_ag_iterator */ #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ +#define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ diff --git a/include/xfs_alloc.h b/include/xfs_alloc.h index 895009a97..99d0a6101 100644 --- a/include/xfs_alloc.h +++ b/include/xfs_alloc.h @@ -19,10 +19,12 @@ #define __XFS_ALLOC_H__ struct xfs_buf; +struct xfs_btree_cur; struct xfs_mount; struct xfs_perag; struct xfs_trans; -struct xfs_busy_extent; + +extern struct workqueue_struct *xfs_alloc_wq; /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. @@ -73,6 +75,22 @@ typedef unsigned int xfs_alloctype_t; */ #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) +/* + * When deciding how much space to allocate out of an AG, we limit the + * allocation maximum size to the size the AG. However, we cannot use all the + * blocks in the AG - some are permanently used by metadata. These + * blocks are generally: + * - the AG superblock, AGF, AGI and AGFL + * - the AGF (bno and cnt) and AGI btree root blocks + * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits + * + * The AG headers are sector sized, so the amount of space they take up is + * dependent on filesystem geometry. The others are all single blocks. + */ +#define XFS_ALLOC_AG_MAX_USABLE(mp) \ + ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) + + /* * Argument structure for xfs_alloc routines. * This is turned into a structure to avoid having 20 arguments passed @@ -117,19 +135,6 @@ xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, struct xfs_perag *pag); -#ifdef __KERNEL__ - -void -xfs_alloc_busy_insert(xfs_trans_t *tp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len); - -void -xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); - -#endif /* __KERNEL__ */ - /* * Compute and fill in value of m_ag_maxlevels. */ @@ -205,4 +210,28 @@ xfs_free_extent( xfs_fsblock_t bno, /* starting block number of extent */ xfs_extlen_t len); /* length of extent */ +int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +int /* error */ +xfs_alloc_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +int /* error */ +xfs_alloc_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat); /* output: success/failure */ + +extern const struct xfs_buf_ops xfs_agf_buf_ops; +extern const struct xfs_buf_ops xfs_agfl_buf_ops; + #endif /* __XFS_ALLOC_H__ */ diff --git a/include/xfs_alloc_btree.h b/include/xfs_alloc_btree.h index a6caa0022..7e89a2b42 100644 --- a/include/xfs_alloc_btree.h +++ b/include/xfs_alloc_btree.h @@ -50,20 +50,6 @@ typedef struct xfs_alloc_rec_incore { /* btree pointer type */ typedef __be32 xfs_alloc_ptr_t; -/* - * Minimum and maximum blocksize and sectorsize. - * The blocksize upper limit is pretty much arbitrary. - * The sectorsize upper limit is due to sizeof(sb_sectsize). - */ -#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ -#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ -#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) -#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) -#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ -#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ -#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) -#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG) - /* * Block numbers in the AG: * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. @@ -107,4 +93,6 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, xfs_agnumber_t, xfs_btnum_t); extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); +extern const struct xfs_buf_ops xfs_allocbt_buf_ops; + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/include/xfs_attr_leaf.h b/include/xfs_attr_leaf.h index 9c7d22fdc..77de139a5 100644 --- a/include/xfs_attr_leaf.h +++ b/include/xfs_attr_leaf.h @@ -31,7 +31,6 @@ struct attrlist; struct attrlist_cursor_kern; struct xfs_attr_list_context; -struct xfs_dabuf; struct xfs_da_args; struct xfs_da_state; struct xfs_da_state_blk; @@ -215,7 +214,7 @@ int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_list(struct xfs_attr_list_context *context); -int xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp); +int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); @@ -223,7 +222,7 @@ int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); * Internal routines when attribute fork size == XFS_LBSIZE(mp). */ int xfs_attr_leaf_to_node(struct xfs_da_args *args); -int xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp, +int xfs_attr_leaf_to_shortform(struct xfs_buf *bp, struct xfs_da_args *args, int forkoff); int xfs_attr_leaf_clearflag(struct xfs_da_args *args); int xfs_attr_leaf_setflag(struct xfs_da_args *args); @@ -235,14 +234,14 @@ int xfs_attr_leaf_flipflags(xfs_da_args_t *args); int xfs_attr_leaf_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); -int xfs_attr_leaf_lookup_int(struct xfs_dabuf *leaf, +int xfs_attr_leaf_lookup_int(struct xfs_buf *leaf, struct xfs_da_args *args); -int xfs_attr_leaf_getvalue(struct xfs_dabuf *bp, struct xfs_da_args *args); -int xfs_attr_leaf_add(struct xfs_dabuf *leaf_buffer, +int xfs_attr_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args); +int xfs_attr_leaf_add(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); -int xfs_attr_leaf_remove(struct xfs_dabuf *leaf_buffer, +int xfs_attr_leaf_remove(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); -int xfs_attr_leaf_list_int(struct xfs_dabuf *bp, +int xfs_attr_leaf_list_int(struct xfs_buf *bp, struct xfs_attr_list_context *context); /* @@ -257,9 +256,15 @@ int xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); /* * Utility routines. */ -xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count); -int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp, - struct xfs_dabuf *leaf2_bp); +xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count); +int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp, + struct xfs_buf *leaf2_bp); int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local); +int xfs_attr_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t bno, xfs_daddr_t mappedbno, + struct xfs_buf **bpp); + +extern const struct xfs_buf_ops xfs_attr_leaf_buf_ops; + #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/include/xfs_bmap.h b/include/xfs_bmap.h index 3651191da..de451a273 100644 --- a/include/xfs_bmap.h +++ b/include/xfs_bmap.h @@ -62,36 +62,32 @@ typedef struct xfs_bmap_free #define XFS_BMAP_MAX_NMAP 4 /* - * Flags for xfs_bmapi + * Flags for xfs_bmapi_* */ -#define XFS_BMAPI_WRITE 0x001 /* write operation: allocate space */ -#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */ -#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ -#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ -#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ -#define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */ -#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ -#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ +#define XFS_BMAPI_ENTIRE 0x001 /* return entire extent, not trimmed */ +#define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */ +#define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */ +#define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */ +#define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */ /* combine contig. space */ -#define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */ +#define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */ /* * unwritten extent conversion - this needs write cache flushing and no additional * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts * from written to unwritten, otherwise convert from unwritten to written. */ -#define XFS_BMAPI_CONVERT 0x200 +#define XFS_BMAPI_CONVERT 0x040 +#define XFS_BMAPI_STACK_SWITCH 0x080 #define XFS_BMAPI_FLAGS \ - { XFS_BMAPI_WRITE, "WRITE" }, \ - { XFS_BMAPI_DELAY, "DELAY" }, \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ - { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ - { XFS_BMAPI_CONVERT, "CONVERT" } + { XFS_BMAPI_CONVERT, "CONVERT" }, \ + { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" } static inline int xfs_bmapi_aflag(int w) @@ -115,23 +111,32 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) * Argument structure for xfs_bmap_alloc. */ typedef struct xfs_bmalloca { - xfs_fsblock_t firstblock; /* i/o first block allocated */ - xfs_fsblock_t rval; /* starting block of new extent */ - xfs_fileoff_t off; /* offset in file filling in */ + xfs_fsblock_t *firstblock; /* i/o first block allocated */ + struct xfs_bmap_free *flist; /* bmap freelist */ struct xfs_trans *tp; /* transaction pointer */ struct xfs_inode *ip; /* incore inode pointer */ - struct xfs_bmbt_irec *prevp; /* extent before the new one */ - struct xfs_bmbt_irec *gotp; /* extent after, or delayed */ - xfs_extlen_t alen; /* i/o length asked/allocated */ + struct xfs_bmbt_irec prev; /* extent before the new one */ + struct xfs_bmbt_irec got; /* extent after, or delayed */ + + xfs_fileoff_t offset; /* offset in file filling in */ + xfs_extlen_t length; /* i/o length asked/allocated */ + xfs_fsblock_t blkno; /* starting block of new extent */ + + struct xfs_btree_cur *cur; /* btree cursor */ + xfs_extnum_t idx; /* current extent index */ + int nallocs;/* number of extents alloc'd */ + int logflags;/* flags for transaction logging */ + xfs_extlen_t total; /* total blocks needed for xaction */ xfs_extlen_t minlen; /* minimum allocation size (blocks) */ xfs_extlen_t minleft; /* amount must be left after alloc */ char eof; /* set if allocating past last extent */ char wasdel; /* replacing a delayed allocation */ char userdata;/* set if is user data */ - char low; /* low on space, using seq'l ags */ char aeof; /* allocated space at eof */ char conv; /* overwriting unwritten extents */ + int flags; + int stack_switch; } xfs_bmalloca_t; /* @@ -154,251 +159,65 @@ typedef struct xfs_bmalloca { { BMAP_RIGHT_FILLING, "RF" }, \ { BMAP_ATTRFORK, "ATTR" } -/* - * Add bmap trace insert entries for all the contents of the extent list. - * - * Quite excessive tracing. Only do this for debug builds. - */ #if defined(__KERNEL) && defined(DEBUG) -void -xfs_bmap_trace_exlist( - struct xfs_inode *ip, /* incore inode pointer */ - xfs_extnum_t cnt, /* count of entries in list */ - int whichfork, - unsigned long caller_ip); /* data or attr fork */ +void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, + int whichfork, unsigned long caller_ip); #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_) #else #define XFS_BMAP_TRACE_EXLIST(ip,c,w) #endif -/* - * Convert inode from non-attributed to attributed. - * Must not be in a transaction, ip must not be locked. - */ -int /* error code */ -xfs_bmap_add_attrfork( - struct xfs_inode *ip, /* incore inode pointer */ - int size, /* space needed for new attribute */ - int rsvd); /* flag for reserved block allocation */ - -/* - * Add the extent to the list of extents to be free at transaction end. - * The list is maintained sorted (by block number). - */ -void -xfs_bmap_add_free( - xfs_fsblock_t bno, /* fs block number of extent */ - xfs_filblks_t len, /* length of extent */ - xfs_bmap_free_t *flist, /* list of extents */ - struct xfs_mount *mp); /* mount point structure */ - -/* - * Routine to clean up the free list data structure when - * an error occurs during a transaction. - */ -void -xfs_bmap_cancel( - xfs_bmap_free_t *flist); /* free list to clean up */ - -/* - * Compute and fill in the value of the maximum depth of a bmap btree - * in this filesystem. Done once, during mount. - */ -void -xfs_bmap_compute_maxlevels( - struct xfs_mount *mp, /* file system mount structure */ - int whichfork); /* data or attr fork */ - -/* - * Returns the file-relative block number of the first unused block in the file. - * This is the lowest-address hole if the file has holes, else the first block - * past the end of file. - */ -int /* error */ -xfs_bmap_first_unused( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - xfs_extlen_t len, /* size of hole to find */ - xfs_fileoff_t *unused, /* unused block num */ - int whichfork); /* data or attr fork */ - -/* - * Returns the file-relative block number of the last block + 1 before - * last_block (input value) in the file. - * This is not based on i_size, it is based on the extent list. - * Returns 0 for local files, as they do not have an extent list. - */ -int /* error */ -xfs_bmap_last_before( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t *last_block, /* last block */ - int whichfork); /* data or attr fork */ - -/* - * Returns the file-relative block number of the first block past eof in - * the file. This is not based on i_size, it is based on the extent list. - * Returns 0 for local files, as they do not have an extent list. - */ -int /* error */ -xfs_bmap_last_offset( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t *unused, /* last block num */ - int whichfork); /* data or attr fork */ - -/* - * Returns whether the selected fork of the inode has exactly one - * block or not. For the data fork we check this matches di_size, - * implying the file's range is 0..bsize-1. - */ -int -xfs_bmap_one_block( - struct xfs_inode *ip, /* incore inode */ - int whichfork); /* data or attr fork */ - -/* - * Read in the extents to iu_extents. - * All inode fields are set up by caller, we just traverse the btree - * and copy the records in. - */ -int /* error */ -xfs_bmap_read_extents( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - int whichfork); /* data or attr fork */ - -/* - * Map file blocks to filesystem blocks. - * File range is given by the bno/len pair. - * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set) - * into a hole or past eof. - * Only allocates blocks from a single allocation group, - * to avoid locking problems. - * The returned value in "firstblock" from the first call in a transaction - * must be remembered and presented to subsequent calls in "firstblock". - * An upper bound for the number of blocks to be allocated is supplied to - * the first call in "total"; if no allocation group has that many free - * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). - */ -int /* error */ -xfs_bmapi( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting file offs. mapped */ - xfs_filblks_t len, /* length to map in file */ - int flags, /* XFS_BMAPI_... */ - xfs_fsblock_t *firstblock, /* first allocated block - controls a.g. for allocs */ - xfs_extlen_t total, /* total blocks needed */ - struct xfs_bmbt_irec *mval, /* output: map values */ - int *nmap, /* i/o: mval size/count */ - xfs_bmap_free_t *flist); /* i/o: list extents to free */ - -/* - * Map file blocks to filesystem blocks, simple version. - * One block only, read-only. - * For flags, only the XFS_BMAPI_ATTRFORK flag is examined. - * For the other flag values, the effect is as if XFS_BMAPI_METADATA - * was set and all the others were clear. - */ -int /* error */ -xfs_bmapi_single( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - int whichfork, /* data or attr fork */ - xfs_fsblock_t *fsb, /* output: mapped block */ - xfs_fileoff_t bno); /* starting file offs. mapped */ - -/* - * Unmap (remove) blocks from a file. - * If nexts is nonzero then the number of extents to remove is limited to - * that value. If not all extents in the block range can be removed then - * *done is set. - */ -int /* error */ -xfs_bunmapi( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting offset to unmap */ - xfs_filblks_t len, /* length to unmap in file */ - int flags, /* XFS_BMAPI_... */ - xfs_extnum_t nexts, /* number of extents max */ - xfs_fsblock_t *firstblock, /* first allocated block - controls a.g. for allocs */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - int *done); /* set if not done yet */ - -/* - * Check an extent list, which has just been read, for - * any bit in the extent flag field. - */ -int -xfs_check_nostate_extents( - struct xfs_ifork *ifp, - xfs_extnum_t idx, - xfs_extnum_t num); - -uint -xfs_default_attroffset( - struct xfs_inode *ip); +int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); +void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, + struct xfs_bmap_free *flist, struct xfs_mount *mp); +void xfs_bmap_cancel(struct xfs_bmap_free *flist); +void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); +int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); +int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t *last_block, int whichfork); +int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t *unused, int whichfork); +int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); +int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork); +int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, + xfs_filblks_t len, struct xfs_bmbt_irec *mval, + int *nmap, int flags); +int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno, + xfs_filblks_t len, struct xfs_bmbt_irec *mval, + int *nmap, int flags); +int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t bno, xfs_filblks_t len, int flags, + xfs_fsblock_t *firstblock, xfs_extlen_t total, + struct xfs_bmbt_irec *mval, int *nmap, + struct xfs_bmap_free *flist); +int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t bno, xfs_filblks_t len, int flags, + xfs_extnum_t nexts, xfs_fsblock_t *firstblock, + struct xfs_bmap_free *flist, int *done); +int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, + xfs_extnum_t num); +uint xfs_default_attroffset(struct xfs_inode *ip); #ifdef __KERNEL__ - -/* - * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi - * caller. Frees all the extents that need freeing, which must be done - * last due to locking considerations. - * - * Return 1 if the given transaction was committed and a new one allocated, - * and 0 otherwise. - */ -int /* error */ -xfs_bmap_finish( - struct xfs_trans **tp, /* transaction pointer addr */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - int *committed); /* xact committed or not */ - /* bmap to userspace formatter - copy to user & advance pointer */ typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); -/* - * Get inode's extents as described in bmv, and format for output. - */ -int /* error code */ -xfs_getbmap( - xfs_inode_t *ip, - struct getbmapx *bmv, /* user bmap structure */ - xfs_bmap_format_t formatter, /* format to user */ - void *arg); /* formatter arg */ +int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, + int *committed); +int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, + xfs_bmap_format_t formatter, void *arg); +int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, + int whichfork, int *eof); +int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork, int *count); +int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, + xfs_fileoff_t start_fsb, xfs_fileoff_t length); -/* - * Check if the endoff is outside the last extent. If so the caller will grow - * the allocation to a stripe unit boundary - */ -int -xfs_bmap_eof( - struct xfs_inode *ip, - xfs_fileoff_t endoff, - int whichfork, - int *eof); - -/* - * Count fsblocks of the given fork. - */ -int -xfs_bmap_count_blocks( - xfs_trans_t *tp, - struct xfs_inode *ip, - int whichfork, - int *count); +xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb); -int -xfs_bmap_punch_delalloc_range( - struct xfs_inode *ip, - xfs_fileoff_t start_fsb, - xfs_fileoff_t length); #endif /* __KERNEL__ */ #endif /* __XFS_BMAP_H__ */ diff --git a/include/xfs_bmap_btree.h b/include/xfs_bmap_btree.h index 0e66c4ea0..88469ca08 100644 --- a/include/xfs_bmap_btree.h +++ b/include/xfs_bmap_btree.h @@ -236,5 +236,6 @@ extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); +extern const struct xfs_buf_ops xfs_bmbt_buf_ops; #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/include/xfs_btree.h b/include/xfs_btree.h index 82fafc66b..be1eb23a1 100644 --- a/include/xfs_btree.h +++ b/include/xfs_btree.h @@ -188,6 +188,8 @@ struct xfs_btree_ops { __int64_t (*key_diff)(struct xfs_btree_cur *cur, union xfs_btree_key *key); + const struct xfs_buf_ops *buf_ops; + #ifdef DEBUG /* check that k1 is lower than k2 */ int (*keys_inorder)(struct xfs_btree_cur *cur, @@ -281,7 +283,7 @@ typedef struct xfs_btree_cur /* * Convert from buffer to btree block header. */ -#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) /* @@ -374,7 +376,8 @@ xfs_btree_read_bufl( xfs_fsblock_t fsbno, /* file system block number */ uint lock, /* lock flags for read_buf */ struct xfs_buf **bpp, /* buffer for fsbno */ - int refval);/* ref count value for buffer */ + int refval, /* ref count value for buffer */ + const struct xfs_buf_ops *ops); /* * Read-ahead the block, don't wait for it, don't return a buffer. @@ -384,7 +387,8 @@ void /* error */ xfs_btree_reada_bufl( struct xfs_mount *mp, /* file system mount point */ xfs_fsblock_t fsbno, /* file system block number */ - xfs_extlen_t count); /* count of filesystem blocks */ + xfs_extlen_t count, /* count of filesystem blocks */ + const struct xfs_buf_ops *ops); /* * Read-ahead the block, don't wait for it, don't return a buffer. @@ -395,8 +399,20 @@ xfs_btree_reada_bufs( struct xfs_mount *mp, /* file system mount point */ xfs_agnumber_t agno, /* allocation group number */ xfs_agblock_t agbno, /* allocation group block number */ - xfs_extlen_t count); /* count of filesystem blocks */ + xfs_extlen_t count, /* count of filesystem blocks */ + const struct xfs_buf_ops *ops); +/* + * Initialise a new btree block header + */ +void +xfs_btree_init_block( + struct xfs_mount *mp, + struct xfs_buf *bp, + __u32 magic, + __u16 level, + __u16 numrecs, + unsigned int flags); /* * Common btree core entry points. diff --git a/include/xfs_buf_item.h b/include/xfs_buf_item.h index a5efba911..ee36c88ec 100644 --- a/include/xfs_buf_item.h +++ b/include/xfs_buf_item.h @@ -20,23 +20,6 @@ extern kmem_zone_t *xfs_buf_item_zone; -/* - * This is the structure used to lay out a buf log item in the - * log. The data map describes which 128 byte chunks of the buffer - * have been logged. - * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. - */ -typedef struct xfs_buf_log_format { - unsigned short blf_type; /* buf log item type indicator */ - unsigned short blf_size; /* size of this item */ - ushort blf_flags; /* misc state */ - ushort blf_len; /* number of blocks in this buf */ - __int64_t blf_blkno; /* starting blkno of this buf */ - unsigned int blf_map_size; /* size of data bitmap in words */ - unsigned int blf_data_map[1];/* variable size bitmap of */ - /* regions of buffer in this item */ -} xfs_buf_log_format_t; - /* * This flag indicates that the buffer contains on disk inodes * and requires special recovery handling. @@ -60,6 +43,23 @@ typedef struct xfs_buf_log_format { #define BIT_TO_WORD_SHIFT 5 #define NBWORD (NBBY * sizeof(unsigned int)) +/* + * This is the structure used to lay out a buf log item in the + * log. The data map describes which 128 byte chunks of the buffer + * have been logged. + */ +#define XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) + +typedef struct xfs_buf_log_format { + unsigned short blf_type; /* buf log item type indicator */ + unsigned short blf_size; /* size of this item */ + ushort blf_flags; /* misc state */ + ushort blf_len; /* number of blocks in this buf */ + __int64_t blf_blkno; /* starting blkno of this buf */ + unsigned int blf_map_size; /* used size of data bitmap in words */ + unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */ +} xfs_buf_log_format_t; + /* * buf log item flags */ @@ -69,7 +69,7 @@ typedef struct xfs_buf_log_format { #define XFS_BLI_LOGGED 0x08 #define XFS_BLI_INODE_ALLOC_BUF 0x10 #define XFS_BLI_STALE_INODE 0x20 -#define XFS_BLI_INODE_BUF 0x40 +#define XFS_BLI_INODE_BUF 0x40 #define XFS_BLI_FLAGS \ { XFS_BLI_HOLD, "HOLD" }, \ @@ -80,6 +80,7 @@ typedef struct xfs_buf_log_format { { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ { XFS_BLI_INODE_BUF, "INODE_BUF" } + #ifdef __KERNEL__ struct xfs_buf; @@ -97,11 +98,9 @@ typedef struct xfs_buf_log_item { unsigned int bli_flags; /* misc flags */ unsigned int bli_recur; /* lock recursion count */ atomic_t bli_refcount; /* cnt of tp refs */ -#ifdef XFS_TRANS_DEBUG - char *bli_orig; /* original buffer copy */ - char *bli_logged; /* bytes logged (bitmap) */ -#endif - xfs_buf_log_format_t bli_format; /* in-log header */ + int bli_format_count; /* count of headers */ + struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ + struct xfs_buf_log_format __bli_format; /* embedded in-log header */ } xfs_buf_log_item_t; void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); @@ -114,16 +113,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *, void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); -#ifdef XFS_TRANS_DEBUG -void -xfs_buf_item_flush_log_debug( - struct xfs_buf *bp, - uint first, - uint last); -#else -#define xfs_buf_item_flush_log_debug(bp, first, last) -#endif - #endif /* __KERNEL__ */ #endif /* __XFS_BUF_ITEM_H__ */ diff --git a/include/xfs_da_btree.h b/include/xfs_da_btree.h index fe9f5a8c1..ee5170c46 100644 --- a/include/xfs_da_btree.h +++ b/include/xfs_da_btree.h @@ -18,7 +18,6 @@ #ifndef __XFS_DA_BTREE_H__ #define __XFS_DA_BTREE_H__ -struct xfs_buf; struct xfs_bmap_free; struct xfs_inode; struct xfs_mount; @@ -32,7 +31,7 @@ struct zone; /* * This structure is common to both leaf nodes and non-leaf nodes in the Btree. * - * Is is used to manage a doubly linked list of all blocks at the same + * It is used to manage a doubly linked list of all blocks at the same * level in the Btree, and to identify which type of block this is. */ #define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ @@ -132,35 +131,6 @@ typedef struct xfs_da_args { { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ { XFS_DA_OP_CILOOKUP, "CILOOKUP" } -/* - * Structure to describe buffer(s) for a block. - * This is needed in the directory version 2 format case, when - * multiple non-contiguous fsblocks might be needed to cover one - * logical directory block. - * If the buffer count is 1 then the data pointer points to the - * same place as the b_addr field for the buffer, else to kmem_alloced memory. - */ -typedef struct xfs_dabuf { - int nbuf; /* number of buffer pointers present */ - short dirty; /* data needs to be copied back */ - short bbcount; /* how large is data in bbs */ - void *data; /* pointer for buffers' data */ -#ifdef XFS_DABUF_DEBUG - inst_t *ra; /* return address of caller to make */ - struct xfs_dabuf *next; /* next in global chain */ - struct xfs_dabuf *prev; /* previous in global chain */ - struct xfs_buftarg *target; /* device for buffer */ - xfs_daddr_t blkno; /* daddr first in bps[0] */ -#endif - struct xfs_buf *bps[1]; /* actually nbuf of these */ -} xfs_dabuf_t; -#define XFS_DA_BUF_SIZE(n) \ - (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1)) - -#ifdef XFS_DABUF_DEBUG -extern xfs_dabuf_t *xfs_dabuf_global_list; -#endif - /* * Storage for holding state during Btree searches and split/join ops. * @@ -169,7 +139,7 @@ extern xfs_dabuf_t *xfs_dabuf_global_list; * which is slightly more than enough. */ typedef struct xfs_da_state_blk { - xfs_dabuf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ xfs_dablk_t blkno; /* filesystem blkno of buffer */ xfs_daddr_t disk_blkno; /* on-disk blkno (in BBs) of buffer */ int index; /* relevant index into block */ @@ -222,7 +192,7 @@ struct xfs_nameops { * Routines used for growing the Btree. */ int xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, - xfs_dabuf_t **bpp, int whichfork); + struct xfs_buf **bpp, int whichfork); int xfs_da_split(xfs_da_state_t *state); /* @@ -243,21 +213,28 @@ int xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, */ int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, xfs_da_state_blk_t *new_blk); +int xfs_da_node_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t bno, xfs_daddr_t mappedbno, + struct xfs_buf **bpp, int which_fork); /* * Utility routines. */ int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno); +int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno, + int count); int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, - xfs_dabuf_t **bp, int whichfork); + struct xfs_buf **bp, int whichfork); int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, int whichfork); + struct xfs_buf **bpp, int whichfork, + const struct xfs_buf_ops *ops); xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, - xfs_dablk_t bno, int whichfork); + xfs_dablk_t bno, xfs_daddr_t mapped_bno, + int whichfork, const struct xfs_buf_ops *ops); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, - xfs_dabuf_t *dead_buf); + struct xfs_buf *dead_buf); uint xfs_da_hashname(const __uint8_t *name_string, int name_length); enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, @@ -267,15 +244,7 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, xfs_da_state_t *xfs_da_state_alloc(void); void xfs_da_state_free(xfs_da_state_t *state); -void xfs_da_buf_done(xfs_dabuf_t *dabuf); -void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first, - uint last); -void xfs_da_brelse(struct xfs_trans *tp, xfs_dabuf_t *dabuf); -void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf); -xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf); - extern struct kmem_zone *xfs_da_state_zone; -extern struct kmem_zone *xfs_dabuf_zone; extern const struct xfs_nameops xfs_default_nameops; #endif /* __XFS_DA_BTREE_H__ */ diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h index dffba9ba0..1d9643b3d 100644 --- a/include/xfs_dinode.h +++ b/include/xfs_dinode.h @@ -33,7 +33,7 @@ typedef struct xfs_timestamp { * variable size the leftover area split into a data and an attribute fork. * The format of the data and attribute fork depends on the format of the * inode as indicated by di_format and di_aformat. To access the data and - * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros + * attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros * below. * * There is a very similar struct icdinode in xfs_inode which matches the @@ -148,7 +148,7 @@ typedef enum xfs_dinode_fmt { be32_to_cpu((dip)->di_nextents) : \ be16_to_cpu((dip)->di_anextents)) -#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)((bp)->b_addr)) /* * For block and character special files the 32bit dev_t is stored at the diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h index 74a3b1057..8ab59b5c8 100644 --- a/include/xfs_dir2.h +++ b/include/xfs_dir2.h @@ -16,37 +16,20 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __XFS_DIR2_H__ -#define __XFS_DIR2_H__ +#define __XFS_DIR2_H__ -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_dir2_put_args; struct xfs_bmap_free; +struct xfs_da_args; struct xfs_inode; struct xfs_mount; struct xfs_trans; +struct xfs_dir2_sf_hdr; +struct xfs_dir2_sf_entry; +struct xfs_dir2_data_hdr; +struct xfs_dir2_data_entry; +struct xfs_dir2_data_unused; +struct xfs_dir2_data_free; -/* - * Directory version 2. - * There are 4 possible formats: - * shortform - * single block - data with embedded leaf at the end - * multiple data blocks, single leaf+freeindex block - * data blocks, node&leaf blocks (btree), freeindex blocks - * - * The shortform format is in xfs_dir2_sf.h. - * The single block format is in xfs_dir2_block.h. - * The data block format is in xfs_dir2_data.h. - * The leaf and freeindex block formats are in xfs_dir2_leaf.h. - * Node blocks are the same as the other version, in xfs_da_btree.h. - */ - -/* - * Byte offset in data block and shortform entry. - */ -typedef __uint16_t xfs_dir2_data_off_t; -#define NULLDATAOFF 0xffffU typedef uint xfs_dir2_data_aoff_t; /* argument form */ /* @@ -54,11 +37,6 @@ typedef uint xfs_dir2_data_aoff_t; /* argument form */ */ typedef __uint32_t xfs_dir2_db_t; -/* - * Byte offset in a directory. - */ -typedef xfs_off_t xfs_dir2_off_t; - extern struct xfs_name xfs_name_dotdot; /* @@ -86,21 +64,44 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_bmap_free *flist, xfs_extlen_t tot); extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, uint resblks); -extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); /* - * Utility routines for v2 directories. + * Direct call from the bmap code, bypassing the generic directory layer. + */ +extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); + +/* + * used by db */ -extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, - xfs_dir2_db_t *dbp); -extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, - int *vp); -extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, - int *vp); +extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); +extern void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *sfp, + xfs_ino_t ino); +extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp, + struct xfs_dir2_sf_entry *sfep); +extern void xfs_dir2_sfe_put_ino( struct xfs_dir2_sf_hdr *, + struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino); + +extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); +extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, - struct xfs_dabuf *bp); + struct xfs_buf *bp); + +extern void xfs_dir2_data_freescan(struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr, int *loghead); +extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_dir2_data_entry *dep); +extern void xfs_dir2_data_log_header(struct xfs_trans *tp, + struct xfs_buf *bp); +extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_dir2_data_unused *dup); +extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp, + xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, + int *needlogp, int *needscanp); +extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); -extern int xfs_dir_cilookup_result(struct xfs_da_args *args, - const unsigned char *name, int len); +extern struct xfs_dir2_data_free *xfs_dir2_data_freefind( + struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup); #endif /* __XFS_DIR2_H__ */ diff --git a/include/xfs_dir2_block.h b/include/xfs_dir2_block.h deleted file mode 100644 index 10e689676..000000000 --- a/include/xfs_dir2_block.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_BLOCK_H__ -#define __XFS_DIR2_BLOCK_H__ - -/* - * xfs_dir2_block.h - * Directory version 2, single block format structures - */ - -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_dir2_data_hdr; -struct xfs_dir2_leaf_entry; -struct xfs_inode; -struct xfs_mount; -struct xfs_trans; - -/* - * The single block format is as follows: - * xfs_dir2_data_hdr_t structure - * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures - * xfs_dir2_leaf_entry_t structures - * xfs_dir2_block_tail_t structure - */ - -#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: for one block dirs */ - -typedef struct xfs_dir2_block_tail { - __be32 count; /* count of leaf entries */ - __be32 stale; /* count of stale lf entries */ -} xfs_dir2_block_tail_t; - -/* - * Generic single-block structure, for xfs_db. - */ -typedef struct xfs_dir2_block { - xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */ - xfs_dir2_data_union_t u[1]; - xfs_dir2_leaf_entry_t leaf[1]; - xfs_dir2_block_tail_t tail; -} xfs_dir2_block_t; - -/* - * Pointer to the leaf header embedded in a data block (1-block format) - */ -static inline xfs_dir2_block_tail_t * -xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block) -{ - return (((xfs_dir2_block_tail_t *) - ((char *)(block) + (mp)->m_dirblksize)) - 1); -} - -/* - * Pointer to the leaf entries embedded in a data block (1-block format) - */ -static inline struct xfs_dir2_leaf_entry * -xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp) -{ - return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); -} - -/* - * Function declarations. - */ -extern int xfs_dir2_block_addname(struct xfs_da_args *args); -extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, - xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_block_lookup(struct xfs_da_args *args); -extern int xfs_dir2_block_removename(struct xfs_da_args *args); -extern int xfs_dir2_block_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, - struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); -extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); - -#endif /* __XFS_DIR2_BLOCK_H__ */ diff --git a/include/xfs_dir2_data.h b/include/xfs_dir2_data.h deleted file mode 100644 index efbc290c7..000000000 --- a/include/xfs_dir2_data.h +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_DATA_H__ -#define __XFS_DIR2_DATA_H__ - -/* - * Directory format 2, data block structures. - */ - -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_inode; -struct xfs_trans; - -/* - * Constants. - */ -#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: for multiblock dirs */ -#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ -#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) -#define XFS_DIR2_DATA_FREE_TAG 0xffff -#define XFS_DIR2_DATA_FD_COUNT 3 - -/* - * Directory address space divided into sections, - * spaces separated by 32GB. - */ -#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) -#define XFS_DIR2_DATA_SPACE 0 -#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_DATA_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) - -/* - * Offsets of . and .. in data space (always block 0) - */ -#define XFS_DIR2_DATA_DOT_OFFSET \ - ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t)) -#define XFS_DIR2_DATA_DOTDOT_OFFSET \ - (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) -#define XFS_DIR2_DATA_FIRST_OFFSET \ - (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) - -/* - * Structures. - */ - -/* - * Describe a free area in the data block. - * The freespace will be formatted as a xfs_dir2_data_unused_t. - */ -typedef struct xfs_dir2_data_free { - __be16 offset; /* start of freespace */ - __be16 length; /* length of freespace */ -} xfs_dir2_data_free_t; - -/* - * Header for the data blocks. - * Always at the beginning of a directory-sized block. - * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. - */ -typedef struct xfs_dir2_data_hdr { - __be32 magic; /* XFS_DIR2_DATA_MAGIC */ - /* or XFS_DIR2_BLOCK_MAGIC */ - xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; -} xfs_dir2_data_hdr_t; - -/* - * Active entry in a data block. Aligned to 8 bytes. - * Tag appears as the last 2 bytes. - */ -typedef struct xfs_dir2_data_entry { - __be64 inumber; /* inode number */ - __u8 namelen; /* name length */ - __u8 name[1]; /* name bytes, no null */ - /* variable offset */ - __be16 tag; /* starting offset of us */ -} xfs_dir2_data_entry_t; - -/* - * Unused entry in a data block. Aligned to 8 bytes. - * Tag appears as the last 2 bytes. - */ -typedef struct xfs_dir2_data_unused { - __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ - __be16 length; /* total free length */ - /* variable offset */ - __be16 tag; /* starting offset of us */ -} xfs_dir2_data_unused_t; - -typedef union { - xfs_dir2_data_entry_t entry; - xfs_dir2_data_unused_t unused; -} xfs_dir2_data_union_t; - -/* - * Generic data block structure, for xfs_db. - */ -typedef struct xfs_dir2_data { - xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */ - xfs_dir2_data_union_t u[1]; -} xfs_dir2_data_t; - -/* - * Macros. - */ - -/* - * Size of a data entry. - */ -static inline int xfs_dir2_data_entsize(int n) -{ - return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \ - (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN); -} - -/* - * Pointer to an entry's tag word. - */ -static inline __be16 * -xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); -} - -/* - * Pointer to a freespace's tag word. - */ -static inline __be16 * -xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup) -{ - return (__be16 *)((char *)dup + - be16_to_cpu(dup->length) - sizeof(__be16)); -} - -/* - * Function declarations. - */ -#ifdef DEBUG -extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); -#else -#define xfs_dir2_data_check(dp,bp) -#endif -extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d, - xfs_dir2_data_unused_t *dup); -extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d, - xfs_dir2_data_unused_t *dup, int *loghead); -extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, - int *loghead); -extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, - struct xfs_dabuf **bpp); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_entry_t *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_unused_t *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_aoff_t offset, - xfs_dir2_data_aoff_t len, int *needlogp, - int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_unused_t *dup, - xfs_dir2_data_aoff_t offset, - xfs_dir2_data_aoff_t len, int *needlogp, - int *needscanp); - -#endif /* __XFS_DIR2_DATA_H__ */ diff --git a/include/xfs_dir2_format.h b/include/xfs_dir2_format.h new file mode 100644 index 000000000..f5c264aed --- /dev/null +++ b/include/xfs_dir2_format.h @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_FORMAT_H__ +#define __XFS_DIR2_FORMAT_H__ + +/* + * Directory version 2. + * + * There are 4 possible formats: + * - shortform - embedded into the inode + * - single block - data with embedded leaf at the end + * - multiple data blocks, single leaf+freeindex block + * - data blocks, node and leaf blocks (btree), freeindex blocks + * + * Note: many node blocks structures and constants are shared with the attr + * code and defined in xfs_da_btree.h. + */ + +#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */ +#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */ +#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */ + +/* + * Byte offset in data block and shortform entry. + */ +typedef __uint16_t xfs_dir2_data_off_t; +#define NULLDATAOFF 0xffffU + +/* + * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. + * Only need 16 bits, this is the byte offset into the single block form. + */ +typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; + +/* + * Offset in data space of a data entry. + */ +typedef __uint32_t xfs_dir2_dataptr_t; +#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) +#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) + +/* + * Byte offset in a directory. + */ +typedef xfs_off_t xfs_dir2_off_t; + +/* + * Inode number stored as 8 8-bit values. + */ +typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; + +/* + * Inode number stored as 4 8-bit values. + * Works a lot of the time, when all the inode numbers in a directory + * fit in 32 bits. + */ +typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; + +typedef union { + xfs_dir2_ino8_t i8; + xfs_dir2_ino4_t i4; +} xfs_dir2_inou_t; +#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) + +/* + * Directory layout when stored internal to an inode. + * + * Small directories are packed as tightly as possible so as to fit into the + * literal area of the inode. These "shortform" directories consist of a + * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry + * structures. Due the different inode number storage size and the variable + * length name field in the xfs_dir2_sf_entry all these structure are + * variable length, and the accessors in this file should be used to iterate + * over them. + */ +typedef struct xfs_dir2_sf_hdr { + __uint8_t count; /* count of entries */ + __uint8_t i8count; /* count of 8-byte inode #s */ + xfs_dir2_inou_t parent; /* parent dir inode number */ +} __arch_pack xfs_dir2_sf_hdr_t; + +typedef struct xfs_dir2_sf_entry { + __u8 namelen; /* actual name length */ + xfs_dir2_sf_off_t offset; /* saved offset */ + __u8 name[]; /* name, variable size */ + /* + * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a + * variable offset after the name. + */ +} __arch_pack xfs_dir2_sf_entry_t; + +static inline int xfs_dir2_sf_hdr_size(int i8count) +{ + return sizeof(struct xfs_dir2_sf_hdr) - + (i8count == 0) * + (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t)); +} + +static inline xfs_dir2_data_aoff_t +xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) +{ + return get_unaligned_be16(&sfep->offset.i); +} + +static inline void +xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) +{ + put_unaligned_be16(off, &sfep->offset.i); +} + +static inline int +xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len) +{ + return sizeof(struct xfs_dir2_sf_entry) + /* namelen + offset */ + len + /* name */ + (hdr->i8count ? /* ino */ + sizeof(xfs_dir2_ino8_t) : + sizeof(xfs_dir2_ino4_t)); +} + +static inline struct xfs_dir2_sf_entry * +xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count)); +} + +static inline struct xfs_dir2_sf_entry * +xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); +} + + +/* + * Data block structures. + * + * A pure data block looks like the following drawing on disk: + * + * +-------------------------------------------------+ + * | xfs_dir2_data_hdr_t | + * +-------------------------------------------------+ + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | ... | + * +-------------------------------------------------+ + * | unused space | + * +-------------------------------------------------+ + * + * As all the entries are variable size structures the accessors below should + * be used to iterate over them. + * + * In addition to the pure data blocks for the data and node formats, + * most structures are also used for the combined data/freespace "block" + * format below. + */ + +#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ +#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) +#define XFS_DIR2_DATA_FREE_TAG 0xffff +#define XFS_DIR2_DATA_FD_COUNT 3 + +/* + * Directory address space divided into sections, + * spaces separated by 32GB. + */ +#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) +#define XFS_DIR2_DATA_SPACE 0 +#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_DATA_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) + +/* + * Offsets of . and .. in data space (always block 0) + */ +#define XFS_DIR2_DATA_DOT_OFFSET \ + ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr)) +#define XFS_DIR2_DATA_DOTDOT_OFFSET \ + (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) +#define XFS_DIR2_DATA_FIRST_OFFSET \ + (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) + +/* + * Describe a free area in the data block. + * + * The freespace will be formatted as a xfs_dir2_data_unused_t. + */ +typedef struct xfs_dir2_data_free { + __be16 offset; /* start of freespace */ + __be16 length; /* length of freespace */ +} xfs_dir2_data_free_t; + +/* + * Header for the data blocks. + * + * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. + */ +typedef struct xfs_dir2_data_hdr { + __be32 magic; /* XFS_DIR2_DATA_MAGIC or */ + /* XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; +} xfs_dir2_data_hdr_t; + +/* + * Active entry in a data block. + * + * Aligned to 8 bytes. After the variable length name field there is a + * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p. + */ +typedef struct xfs_dir2_data_entry { + __be64 inumber; /* inode number */ + __u8 namelen; /* name length */ + __u8 name[]; /* name bytes, no null */ + /* __be16 tag; */ /* starting offset of us */ +} xfs_dir2_data_entry_t; + +/* + * Unused entry in a data block. + * + * Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed + * using xfs_dir2_data_unused_tag_p. + */ +typedef struct xfs_dir2_data_unused { + __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ + __be16 length; /* total free length */ + /* variable offset */ + __be16 tag; /* starting offset of us */ +} xfs_dir2_data_unused_t; + +/* + * Size of a data entry. + */ +static inline int xfs_dir2_data_entsize(int n) +{ + return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n + + (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN); +} + +/* + * Pointer to an entry's tag word. + */ +static inline __be16 * +xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); +} + +/* + * Pointer to a freespace's tag word. + */ +static inline __be16 * +xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) +{ + return (__be16 *)((char *)dup + + be16_to_cpu(dup->length) - sizeof(__be16)); +} + +/* + * Leaf block structures. + * + * A pure leaf block looks like the following drawing on disk: + * + * +---------------------------+ + * | xfs_dir2_leaf_hdr_t | + * +---------------------------+ + * | xfs_dir2_leaf_entry_t | + * | xfs_dir2_leaf_entry_t | + * | xfs_dir2_leaf_entry_t | + * | xfs_dir2_leaf_entry_t | + * | ... | + * +---------------------------+ + * | xfs_dir2_data_off_t | + * | xfs_dir2_data_off_t | + * | xfs_dir2_data_off_t | + * | ... | + * +---------------------------+ + * | xfs_dir2_leaf_tail_t | + * +---------------------------+ + * + * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block + * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present + * for directories with separate leaf nodes and free space blocks + * (magic = XFS_DIR2_LEAFN_MAGIC). + * + * As all the entries are variable size structures the accessors below should + * be used to iterate over them. + */ + +/* + * Offset of the leaf/node space. First block in this space + * is the btree root. + */ +#define XFS_DIR2_LEAF_SPACE 1 +#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_LEAF_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) + +/* + * Leaf block header. + */ +typedef struct xfs_dir2_leaf_hdr { + xfs_da_blkinfo_t info; /* header for da routines */ + __be16 count; /* count of entries */ + __be16 stale; /* count of stale entries */ +} xfs_dir2_leaf_hdr_t; + +/* + * Leaf block entry. + */ +typedef struct xfs_dir2_leaf_entry { + __be32 hashval; /* hash value of name */ + __be32 address; /* address of data entry */ +} xfs_dir2_leaf_entry_t; + +/* + * Leaf block tail. + */ +typedef struct xfs_dir2_leaf_tail { + __be32 bestcount; +} xfs_dir2_leaf_tail_t; + +/* + * Leaf block. + */ +typedef struct xfs_dir2_leaf { + xfs_dir2_leaf_hdr_t hdr; /* leaf header */ + xfs_dir2_leaf_entry_t ents[]; /* entries */ +} xfs_dir2_leaf_t; + +/* + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) / + (uint)sizeof(struct xfs_dir2_leaf_entry); +} + +/* + * Get address of the bestcount field in the single-leaf block. + */ +static inline struct xfs_dir2_leaf_tail * +xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) +{ + return (struct xfs_dir2_leaf_tail *) + ((char *)lp + mp->m_dirblksize - + sizeof(struct xfs_dir2_leaf_tail)); +} + +/* + * Get address of the bests array in the single-leaf block. + */ +static inline __be16 * +xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp) +{ + return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); +} + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_db_t) + (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_data_aoff_t)(by & + ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return ((xfs_dir2_off_t)db << + (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o; +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) +{ + return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) +{ + return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); +} + +/* + * Free space block defintions for the node format. + */ + +/* + * Offset of the freespace index. + */ +#define XFS_DIR2_FREE_SPACE 2 +#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_FREE_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) + +typedef struct xfs_dir2_free_hdr { + __be32 magic; /* XFS_DIR2_FREE_MAGIC */ + __be32 firstdb; /* db of first entry */ + __be32 nvalid; /* count of valid entries */ + __be32 nused; /* count of used entries */ +} xfs_dir2_free_hdr_t; + +typedef struct xfs_dir2_free { + xfs_dir2_free_hdr_t hdr; /* block header */ + __be16 bests[]; /* best free counts */ + /* unused entries are -1 */ +} xfs_dir2_free_t; + +static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / + sizeof(xfs_dir2_data_off_t); +} + +/* + * Convert data space db to the corresponding free db. + */ +static inline xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static inline int +xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return db % xfs_dir2_free_max_bests(mp); +} + +/* + * Single block format. + * + * The single block format looks like the following drawing on disk: + * + * +-------------------------------------------------+ + * | xfs_dir2_data_hdr_t | + * +-------------------------------------------------+ + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t : + * | ... | + * +-------------------------------------------------+ + * | unused space | + * +-------------------------------------------------+ + * | ... | + * | xfs_dir2_leaf_entry_t | + * | xfs_dir2_leaf_entry_t | + * +-------------------------------------------------+ + * | xfs_dir2_block_tail_t | + * +-------------------------------------------------+ + * + * As all the entries are variable size structures the accessors below should + * be used to iterate over them. + */ + +typedef struct xfs_dir2_block_tail { + __be32 count; /* count of leaf entries */ + __be32 stale; /* count of stale lf entries */ +} xfs_dir2_block_tail_t; + +/* + * Pointer to the leaf header embedded in a data block (1-block format) + */ +static inline struct xfs_dir2_block_tail * +xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) +{ + return ((struct xfs_dir2_block_tail *) + ((char *)hdr + mp->m_dirblksize)) - 1; +} + +/* + * Pointer to the leaf entries embedded in a data block (1-block format) + */ +static inline struct xfs_dir2_leaf_entry * +xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) +{ + return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); +} + +#endif /* __XFS_DIR2_FORMAT_H__ */ diff --git a/include/xfs_dir2_leaf.h b/include/xfs_dir2_leaf.h deleted file mode 100644 index 6c9539f06..000000000 --- a/include/xfs_dir2_leaf.h +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_LEAF_H__ -#define __XFS_DIR2_LEAF_H__ - -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_inode; -struct xfs_mount; -struct xfs_trans; - -/* - * Offset of the leaf/node space. First block in this space - * is the btree root. - */ -#define XFS_DIR2_LEAF_SPACE 1 -#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_LEAF_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) - -/* - * Offset in data space of a data entry. - */ -typedef __uint32_t xfs_dir2_dataptr_t; -#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) -#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) - -/* - * Leaf block header. - */ -typedef struct xfs_dir2_leaf_hdr { - xfs_da_blkinfo_t info; /* header for da routines */ - __be16 count; /* count of entries */ - __be16 stale; /* count of stale entries */ -} xfs_dir2_leaf_hdr_t; - -/* - * Leaf block entry. - */ -typedef struct xfs_dir2_leaf_entry { - __be32 hashval; /* hash value of name */ - __be32 address; /* address of data entry */ -} xfs_dir2_leaf_entry_t; - -/* - * Leaf block tail. - */ -typedef struct xfs_dir2_leaf_tail { - __be32 bestcount; -} xfs_dir2_leaf_tail_t; - -/* - * Leaf block. - * bests and tail are at the end of the block for single-leaf only - * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC). - */ -typedef struct xfs_dir2_leaf { - xfs_dir2_leaf_hdr_t hdr; /* leaf header */ - xfs_dir2_leaf_entry_t ents[1]; /* entries */ - /* ... */ - xfs_dir2_data_off_t bests[1]; /* best free counts */ - xfs_dir2_leaf_tail_t tail; /* leaf tail */ -} xfs_dir2_leaf_t; - -/* - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) -{ - return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / - (uint)sizeof(xfs_dir2_leaf_entry_t)); -} - -/* - * Get address of the bestcount field in the single-leaf block. - */ -static inline xfs_dir2_leaf_tail_t * -xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp) -{ - return (xfs_dir2_leaf_tail_t *) - ((char *)(lp) + (mp)->m_dirblksize - - (uint)sizeof(xfs_dir2_leaf_tail_t)); -} - -/* - * Get address of the bests array in the single-leaf block. - */ -static inline __be16 * -xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp) -{ - return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); -} - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr. It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_db_t)((by) >> \ - ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_data_aoff_t)((by) & \ - ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return ((xfs_dir2_off_t)(db) << \ - ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o); -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) -{ - return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) -{ - return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); -} - -/* - * Function declarations. - */ -extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, - struct xfs_dabuf *dbp); -extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); -extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, - struct xfs_dabuf *bp); -extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, - int *lowstalep, int *highstalep, - int *lowlogp, int *highlogp); -extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, - size_t bufsize, xfs_off_t *offset, - filldir_t filldir); -extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, - struct xfs_dabuf **bpp, int magic); -extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, - int first, int last); -extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); -extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, - struct xfs_dabuf *lbp, xfs_dir2_db_t db); -extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); - -#endif /* __XFS_DIR2_LEAF_H__ */ diff --git a/include/xfs_dir2_node.h b/include/xfs_dir2_node.h deleted file mode 100644 index 82dfe7147..000000000 --- a/include/xfs_dir2_node.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_NODE_H__ -#define __XFS_DIR2_NODE_H__ - -/* - * Directory version 2, btree node format structures - */ - -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_da_state; -struct xfs_da_state_blk; -struct xfs_inode; -struct xfs_trans; - -/* - * Offset of the freespace index. - */ -#define XFS_DIR2_FREE_SPACE 2 -#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_FREE_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) - -#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */ - -typedef struct xfs_dir2_free_hdr { - __be32 magic; /* XFS_DIR2_FREE_MAGIC */ - __be32 firstdb; /* db of first entry */ - __be32 nvalid; /* count of valid entries */ - __be32 nused; /* count of used entries */ -} xfs_dir2_free_hdr_t; - -typedef struct xfs_dir2_free { - xfs_dir2_free_hdr_t hdr; /* block header */ - __be16 bests[1]; /* best free counts */ - /* unused entries are -1 */ -} xfs_dir2_free_t; - -#define XFS_DIR2_MAX_FREE_BESTS(mp) \ - (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \ - (uint)sizeof(xfs_dir2_data_off_t)) - -/* - * Convert data space db to the corresponding free db. - */ -static inline xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp)); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static inline int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); -} - -extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); -extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, - struct xfs_da_args *args, int *indexp, - struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, - struct xfs_dabuf *leaf2_bp); -extern int xfs_dir2_leafn_split(struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, - struct xfs_da_state_blk *newblk); -extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); -extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk); -extern int xfs_dir2_node_addname(struct xfs_da_args *args); -extern int xfs_dir2_node_lookup(struct xfs_da_args *args); -extern int xfs_dir2_node_removename(struct xfs_da_args *args); -extern int xfs_dir2_node_replace(struct xfs_da_args *args); -extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, - int *rvalp); - -#endif /* __XFS_DIR2_NODE_H__ */ diff --git a/include/xfs_dir2_sf.h b/include/xfs_dir2_sf.h deleted file mode 100644 index 6ac44b550..000000000 --- a/include/xfs_dir2_sf.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_SF_H__ -#define __XFS_DIR2_SF_H__ - -/* - * Directory layout when stored internal to an inode. - * - * Small directories are packed as tightly as possible so as to - * fit into the literal area of the inode. - */ - -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_dir2_block; -struct xfs_inode; -struct xfs_mount; -struct xfs_trans; - -/* - * Inode number stored as 8 8-bit values. - */ -typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; - -/* - * Inode number stored as 4 8-bit values. - * Works a lot of the time, when all the inode numbers in a directory - * fit in 32 bits. - */ -typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; - -typedef union { - xfs_dir2_ino8_t i8; - xfs_dir2_ino4_t i4; -} xfs_dir2_inou_t; -#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) - -/* - * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. - * Only need 16 bits, this is the byte offset into the single block form. - */ -typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; - -/* - * The parent directory has a dedicated field, and the self-pointer must - * be calculated on the fly. - * - * Entries are packed toward the top as tightly as possible. The header - * and the elements must be memcpy'd out into a work area to get correct - * alignment for the inode number fields. - */ -typedef struct xfs_dir2_sf_hdr { - __uint8_t count; /* count of entries */ - __uint8_t i8count; /* count of 8-byte inode #s */ - xfs_dir2_inou_t parent; /* parent dir inode number */ -} __arch_pack xfs_dir2_sf_hdr_t; - -typedef struct xfs_dir2_sf_entry { - __uint8_t namelen; /* actual name length */ - xfs_dir2_sf_off_t offset; /* saved offset */ - __uint8_t name[1]; /* name, variable size */ - xfs_dir2_inou_t inumber; /* inode number, var. offset */ -} __arch_pack xfs_dir2_sf_entry_t; - -typedef struct xfs_dir2_sf { - xfs_dir2_sf_hdr_t hdr; /* shortform header */ - xfs_dir2_sf_entry_t list[1]; /* shortform entries */ -} xfs_dir2_sf_t; - -static inline int xfs_dir2_sf_hdr_size(int i8count) -{ - return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \ - ((i8count) == 0) * \ - ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); -} - -static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep) -{ - return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]; -} - -static inline xfs_intino_t -xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from) -{ - return ((sfp)->hdr.i8count == 0 ? \ - (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \ - (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8)); -} - -static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, - xfs_dir2_inou_t *to) -{ - if ((sfp)->hdr.i8count == 0) - XFS_PUT_DIR_INO4(*(from), (to)->i4); - else - XFS_PUT_DIR_INO8(*(from), (to)->i8); -} - -static inline xfs_dir2_data_aoff_t -xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) -{ - return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i); -} - -static inline void -xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) -{ - INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off); -} - -static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len) -{ - return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \ - ((sfp)->hdr.i8count == 0) * \ - ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); -} - -static inline int -xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) -{ - return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \ - ((sfp)->hdr.i8count == 0) * \ - ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); -} - -static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp) -{ - return ((xfs_dir2_sf_entry_t *) \ - ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count))); -} - -static inline xfs_dir2_sf_entry_t * -xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) -{ - return ((xfs_dir2_sf_entry_t *) \ - ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep))); -} - -/* - * Functions. - */ -extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, - struct xfs_dir2_block *block, - xfs_dir2_sf_hdr_t *sfhp); -extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, - int size, xfs_dir2_sf_hdr_t *sfhp); -extern int xfs_dir2_sf_addname(struct xfs_da_args *args); -extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); -extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, - xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_sf_removename(struct xfs_da_args *args); -extern int xfs_dir2_sf_replace(struct xfs_da_args *args); - -#endif /* __XFS_DIR2_SF_H__ */ diff --git a/include/xfs_fs.h b/include/xfs_fs.h index faac5afd1..1cc1aa048 100644 --- a/include/xfs_fs.h +++ b/include/xfs_fs.h @@ -233,7 +233,8 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ #define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ #define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ -#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ +#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ +#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ @@ -249,6 +250,11 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) +/* Used for sanity checks on superblock */ +#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks) +#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \ + (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) + /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT */ @@ -333,6 +339,35 @@ typedef struct xfs_error_injection { } xfs_error_injection_t; +/* + * Speculative preallocation trimming. + */ +#define XFS_EOFBLOCKS_VERSION 1 +struct xfs_eofblocks { + __u32 eof_version; + __u32 eof_flags; + uid_t eof_uid; + gid_t eof_gid; + prid_t eof_prid; + __u32 pad32; + __u64 eof_min_file_size; + __u64 pad64[12]; +}; + +/* eof_flags values */ +#define XFS_EOF_FLAGS_SYNC (1 << 0) /* sync/wait mode scan */ +#define XFS_EOF_FLAGS_UID (1 << 1) /* filter by uid */ +#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ +#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ +#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ +#define XFS_EOF_FLAGS_VALID \ + (XFS_EOF_FLAGS_SYNC | \ + XFS_EOF_FLAGS_UID | \ + XFS_EOF_FLAGS_GID | \ + XFS_EOF_FLAGS_PRID | \ + XFS_EOF_FLAGS_MINFILESIZE) + + /* * The user-level Handle Request interface structure. */ @@ -451,6 +486,7 @@ typedef struct xfs_handle { /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) +#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/include/xfs_ialloc.h b/include/xfs_ialloc.h index bb5385475..c8da3df27 100644 --- a/include/xfs_ialloc.h +++ b/include/xfs_ialloc.h @@ -46,15 +46,6 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog)); } -/* - * Find a free (set) bit in the inode bitmask. - */ -static inline int xfs_ialloc_find_free(xfs_inofree_t *fp) -{ - return xfs_lowbit64(*fp); -} - - /* * Allocate an inode on disk. * Mode is used to tell whether the new inode will need space, and whether @@ -81,11 +72,9 @@ int /* error */ xfs_dialloc( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t parent, /* parent inode (directory) */ - mode_t mode, /* mode bits for new inode */ + umode_t mode, /* mode bits for new inode */ int okalloc, /* ok to allocate more space */ struct xfs_buf **agbp, /* buf for a.g. inode header */ - boolean_t *alloc_done, /* an allocation was done to replenish - the free inodes */ xfs_ino_t *inop); /* inode number allocated */ /* @@ -158,7 +147,9 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, /* * Get the data from the pointed-to record. */ -extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, +int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_inobt_rec_incore_t *rec, int *stat); +extern const struct xfs_buf_ops xfs_agi_buf_ops; + #endif /* __XFS_IALLOC_H__ */ diff --git a/include/xfs_ialloc_btree.h b/include/xfs_ialloc_btree.h index f782ad0c4..25c0239a8 100644 --- a/include/xfs_ialloc_btree.h +++ b/include/xfs_ialloc_btree.h @@ -109,4 +109,6 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); +extern const struct xfs_buf_ops xfs_inobt_buf_ops; + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/include/xfs_inode.h b/include/xfs_inode.h index ca5654424..437b3af99 100644 --- a/include/xfs_inode.h +++ b/include/xfs_inode.h @@ -66,8 +66,6 @@ typedef struct xfs_ifork { struct xfs_btree_block *if_broot; /* file's incore btree root */ short if_broot_bytes; /* bytes allocated for root */ unsigned char if_flags; /* per-fork flags */ - unsigned char if_ext_max; /* max # of extent records */ - xfs_extnum_t if_lastex; /* last if_extents used */ union { xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ @@ -111,7 +109,7 @@ struct xfs_imap { * Generally, we do not want to hold the i_rlock while holding the * i_ilock. Hierarchy is i_iolock followed by i_rlock. * - * xfs_iptr_t contains all the inode fields upto and including the + * xfs_iptr_t contains all the inode fields up to and including the * i_mnext and i_mprev fields, it is used as a marker in the inode * chain off the mount structure by xfs_sync calls. */ @@ -207,29 +205,12 @@ typedef struct xfs_icdinode { ((w) == XFS_DATA_FORK ? \ ((ip)->i_d.di_nextents = (n)) : \ ((ip)->i_d.di_anextents = (n))) +#define XFS_IFORK_MAXEXT(ip, w) \ + (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t)) -/* - * Project quota id helpers (previously projid was 16bit only - * and using two 16bit values to hold new 32bit projid was choosen - * to retain compatibility with "old" filesystems). - */ -static inline __uint32_t -xfs_get_projid(struct xfs_icdinode i_d) -{ - return (__uint32_t)i_d.di_projid_hi << 16 | i_d.di_projid_lo; -} - -static inline void -xfs_set_projid(struct xfs_icdinode *i_d, - __uint32_t projid) -{ - i_d->di_projid_hi = (__uint16_t) (projid >> 16); - i_d->di_projid_lo = (__uint16_t) (projid & 0xffff); -} #ifdef __KERNEL__ -struct bhv_desc; struct xfs_buf; struct xfs_bmap_free; struct xfs_bmbt_irec; @@ -238,12 +219,6 @@ struct xfs_mount; struct xfs_trans; struct xfs_dquot; -typedef struct dm_attrs_s { - __uint32_t da_dmevmask; /* DMIG event mask */ - __uint16_t da_dmstate; /* DMIG state info */ - __uint16_t da_pad; /* DMIG extra padding */ -} dm_attrs_t; - typedef struct xfs_inode { /* Inode linking and identification information. */ struct xfs_mount *i_mount; /* fs mount struct ptr */ @@ -259,32 +234,21 @@ typedef struct xfs_inode { xfs_ifork_t i_df; /* data fork */ /* Transaction and locking information. */ - struct xfs_trans *i_transp; /* ptr to owning transaction*/ struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ - struct completion i_flush; /* inode flush completion q */ atomic_t i_pincount; /* inode pin count */ - wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ spinlock_t i_flags_lock; /* inode i_flags lock */ /* Miscellaneous state. */ - unsigned short i_flags; /* see defined flags below */ - unsigned char i_update_core; /* timestamps/size is dirty */ + unsigned long i_flags; /* see defined flags below */ unsigned int i_delayed_blks; /* count of delay alloc blks */ xfs_icdinode_t i_d; /* most of ondisk inode */ - xfs_fsize_t i_size; /* in-memory size */ - xfs_fsize_t i_new_size; /* size when write completes */ - atomic_t i_iocount; /* outstanding I/O count */ - /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ } xfs_inode_t; -#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ - (ip)->i_size : (ip)->i_d.di_size; - /* Convert from vfs inode to xfs inode */ static inline struct xfs_inode *XFS_I(struct inode *inode) { @@ -297,6 +261,32 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) return &ip->i_vnode; } +/* + * For regular files we only update the on-disk filesize when actually + * writing data back to disk. Until then only the copy in the VFS inode + * is uptodate. + */ +static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip) +{ + if (S_ISREG(ip->i_d.di_mode)) + return i_size_read(VFS_I(ip)); + return ip->i_d.di_size; +} + +/* + * If this I/O goes past the on-disk inode size update it unless it would + * be past the current in-core inode size. + */ +static inline xfs_fsize_t +xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size) +{ + xfs_fsize_t i_size = i_size_read(VFS_I(ip)); + + if (new_size > i_size) + new_size = i_size; + return new_size > ip->i_d.di_size ? new_size : 0; +} + /* * i_flags helper functions */ @@ -351,9 +341,22 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) return ret; } +static inline int +xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags) +{ + int ret; + + spin_lock(&ip->i_flags_lock); + ret = ip->i_flags & flags; + if (!ret) + ip->i_flags |= flags; + spin_unlock(&ip->i_flags_lock); + return ret; +} + /* * Project quota id helpers (previously projid was 16bit only - * and using two 16bit values to hold new 32bit projid was choosen + * and using two 16bit values to hold new 32bit projid was chosen * to retain compatibility with "old" filesystems). */ static inline prid_t @@ -371,35 +374,58 @@ xfs_set_projid(struct xfs_inode *ip, } /* - * Manage the i_flush queue embedded in the inode. This completion - * queue synchronizes processes attempting to flush the in-core - * inode back to disk. + * In-core inode flags. + */ +#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ +#define XFS_ISTALE (1 << 1) /* inode has been staled */ +#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ +#define XFS_INEW (1 << 3) /* inode has just been allocated */ +#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */ +#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ +#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ +#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ +#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT) +#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ +#define XFS_IPINNED (1 << __XFS_IPINNED_BIT) +#define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */ + +/* + * Per-lifetime flags need to be reset when re-using a reclaimable inode during + * inode lookup. This prevents unintended behaviour on the new inode from + * ocurring. */ -static inline void xfs_iflock(xfs_inode_t *ip) +#define XFS_IRECLAIM_RESET_FLAGS \ + (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ + XFS_IFILESTREAM); + +/* + * Synchronize processes attempting to flush the in-core inode back to disk. + */ + +extern void __xfs_iflock(struct xfs_inode *ip); + +static inline int xfs_iflock_nowait(struct xfs_inode *ip) { - wait_for_completion(&ip->i_flush); + return !xfs_iflags_test_and_set(ip, XFS_IFLOCK); } -static inline int xfs_iflock_nowait(xfs_inode_t *ip) +static inline void xfs_iflock(struct xfs_inode *ip) { - return try_wait_for_completion(&ip->i_flush); + if (!xfs_iflock_nowait(ip)) + __xfs_iflock(ip); } -static inline void xfs_ifunlock(xfs_inode_t *ip) +static inline void xfs_ifunlock(struct xfs_inode *ip) { - complete(&ip->i_flush); + xfs_iflags_clear(ip, XFS_IFLOCK); + wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); } -/* - * In-core inode flags. - */ -#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ -#define XFS_ISTALE 0x0002 /* inode has been staled */ -#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ -#define XFS_INEW 0x0008 /* inode has just been allocated */ -#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ -#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ -#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ +static inline int xfs_isiflocked(struct xfs_inode *ip) +{ + return xfs_iflags_test(ip, XFS_IFLOCK); +} /* * Flags for inode locking. @@ -410,7 +436,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_IOLOCK_SHARED (1<<1) #define XFS_ILOCK_EXCL (1<<2) #define XFS_ILOCK_SHARED (1<<3) -#define XFS_IUNLOCK_NONOTIFY (1<<4) #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) @@ -419,35 +444,41 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ - { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ - { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" } + { XFS_ILOCK_SHARED, "ILOCK_SHARED" } /* * Flags for lockdep annotations. * - * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes - * (ie directory operations that require locking a directory inode and - * an entry inode). The first inode gets locked with this flag so it - * gets a lockdep subclass of 1 and the second lock will have a lockdep - * subclass of 0. + * XFS_LOCK_PARENT - for directory operations that require locking a + * parent directory inode and a child entry inode. The parent gets locked + * with this flag so it gets a lockdep subclass of 1 and the child entry + * lock will have a lockdep subclass of 0. + * + * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary + * inodes do not participate in the normal lock order, and thus have their + * own subclasses. * * XFS_LOCK_INUMORDER - for locking several inodes at the some time * with xfs_lock_inodes(). This flag is used as the starting subclass * and each subsequent lock acquired will increment the subclass by one. - * So the first lock acquired will have a lockdep subclass of 2, the - * second lock will have a lockdep subclass of 3, and so on. It is + * So the first lock acquired will have a lockdep subclass of 4, the + * second lock will have a lockdep subclass of 5, and so on. It is * the responsibility of the class builder to shift this to the correct * portion of the lock_mode lockdep mask. */ #define XFS_LOCK_PARENT 1 -#define XFS_LOCK_INUMORDER 2 +#define XFS_LOCK_RTBITMAP 2 +#define XFS_LOCK_RTSUM 3 +#define XFS_LOCK_INUMORDER 4 #define XFS_IOLOCK_SHIFT 16 #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) #define XFS_ILOCK_SHIFT 24 #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) #define XFS_IOLOCK_DEP_MASK 0x00ff0000 #define XFS_ILOCK_DEP_MASK 0xff000000 @@ -456,18 +487,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) -extern struct lock_class_key xfs_iolock_reclaimable; - -/* - * Flags for xfs_itruncate_start(). - */ -#define XFS_ITRUNC_DEFINITE 0x1 -#define XFS_ITRUNC_MAYBE 0x2 - -#define XFS_ITRUNC_FLAGS \ - { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \ - { XFS_ITRUNC_MAYBE, "MAYBE" } - /* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and @@ -477,11 +496,10 @@ extern struct lock_class_key xfs_iolock_reclaimable; (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ ((pip)->i_d.di_mode & S_ISGID)) + /* - * xfs_iget.c prototypes. + * xfs_inode.c prototypes. */ -int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - uint, uint, xfs_inode_t **); void xfs_ilock(xfs_inode_t *, uint); int xfs_ilock_nowait(xfs_inode_t *, uint); void xfs_iunlock(xfs_inode_t *, uint); @@ -489,33 +507,25 @@ void xfs_ilock_demote(xfs_inode_t *, uint); int xfs_isilocked(xfs_inode_t *, uint); uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); -void xfs_inode_free(struct xfs_inode *ip); - -/* - * xfs_inode.c prototypes. - */ -int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, +int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, xfs_nlink_t, xfs_dev_t, prid_t, int, - struct xfs_buf **, boolean_t *, xfs_inode_t **); + struct xfs_buf **, xfs_inode_t **); uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); int xfs_ifree(struct xfs_trans *, xfs_inode_t *, struct xfs_bmap_free *); -int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); -int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, - xfs_fsize_t, int, int); +int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, + int, xfs_fsize_t); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_iunpin_wait(xfs_inode_t *); -int xfs_iflush(xfs_inode_t *, uint); +int xfs_iflush(struct xfs_inode *, struct xfs_buf **); void xfs_lock_inodes(xfs_inode_t **, int, uint); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); -void xfs_synchronize_times(xfs_inode_t *); -void xfs_mark_inode_dirty(xfs_inode_t *); -void xfs_mark_inode_dirty_sync(xfs_inode_t *); +xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); #define IHOLD(ip) \ do { \ @@ -537,19 +547,15 @@ do { \ */ #define XFS_IGET_CREATE 0x1 #define XFS_IGET_UNTRUSTED 0x2 +#define XFS_IGET_DONTCACHE 0x4 -int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, - xfs_ino_t, struct xfs_dinode **, - struct xfs_buf **, int *, uint); -int xfs_itobp(struct xfs_mount *, struct xfs_trans *, - struct xfs_inode *, struct xfs_dinode **, - struct xfs_buf **, uint); +int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, + struct xfs_imap *, struct xfs_dinode **, + struct xfs_buf **, uint, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, uint); void xfs_dinode_to_disk(struct xfs_dinode *, struct xfs_icdinode *); -void xfs_dinode_from_disk(struct xfs_icdinode *, - struct xfs_dinode *); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); void xfs_iroot_realloc(struct xfs_inode *, int, int); @@ -579,16 +585,10 @@ void xfs_iext_irec_compact(xfs_ifork_t *); void xfs_iext_irec_compact_pages(xfs_ifork_t *); void xfs_iext_irec_compact_full(xfs_ifork_t *); void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); +bool xfs_can_free_eofblocks(struct xfs_inode *, bool); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) -#ifdef DEBUG -void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, - xfs_fsize_t); -#else /* DEBUG */ -#define xfs_isize_check(mp, ip, isize) -#endif /* DEBUG */ - #if defined(DEBUG) void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #else @@ -598,5 +598,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); extern struct kmem_zone *xfs_ifork_zone; extern struct kmem_zone *xfs_inode_zone; extern struct kmem_zone *xfs_ili_zone; +extern const struct xfs_buf_ops xfs_inode_buf_ops; #endif /* __XFS_INODE_H__ */ diff --git a/include/xfs_inode_item.h b/include/xfs_inode_item.h index d3dee61e6..779812fb3 100644 --- a/include/xfs_inode_item.h +++ b/include/xfs_inode_item.h @@ -86,6 +86,15 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ + +/* + * The timestamps are dirty, but not necessarily anything else in the inode + * core. Unlike the other fields above this one must never make it to disk + * in the ilf_fields of the inode_log_format, but is purely store in-memory in + * ili_fields in the inode_log_item. + */ +#define XFS_ILOG_TIMESTAMP 0x4000 + #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ XFS_ILOG_UUID | XFS_ILOG_ADATA | \ @@ -101,7 +110,7 @@ typedef struct xfs_inode_log_format_64 { XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ XFS_ILOG_DEV | XFS_ILOG_UUID | \ XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ - XFS_ILOG_ABROOT) + XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP) static inline int xfs_ilog_fbroot(int w) { @@ -134,30 +143,25 @@ typedef struct xfs_inode_log_item { unsigned short ili_lock_flags; /* lock flags */ unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ + unsigned int ili_fields; /* fields to be logged */ struct xfs_bmbt_rec *ili_extents_buf; /* array of logged data exts */ struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged attr exts */ -#ifdef XFS_TRANS_DEBUG - int ili_root_size; - char *ili_orig_root; -#endif xfs_inode_log_format_t ili_format; /* logged structure */ } xfs_inode_log_item_t; static inline int xfs_inode_clean(xfs_inode_t *ip) { - return (!ip->i_itemp || - !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && - !ip->i_update_core; + return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL); } extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); extern void xfs_inode_item_destroy(struct xfs_inode *); extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *); extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); -extern void xfs_iflush_abort(struct xfs_inode *); +extern void xfs_iflush_abort(struct xfs_inode *, bool); extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, xfs_inode_log_format_t *); diff --git a/include/xfs_inum.h b/include/xfs_inum.h index b8e4ee4e8..90efdaf17 100644 --- a/include/xfs_inum.h +++ b/include/xfs_inum.h @@ -26,22 +26,6 @@ * high agno_log-agblklog-inopblog bits - 0 */ -typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */ - -/* - * Useful inode bits for this kernel. - * Used in some places where having 64-bits in the 32-bit kernels - * costs too much. - */ -#if XFS_BIG_INUMS -typedef xfs_ino_t xfs_intino_t; -#else -typedef __uint32_t xfs_intino_t; -#endif - -#define NULLFSINO ((xfs_ino_t)-1) -#define NULLAGINO ((xfs_agino_t)-1) - struct xfs_mount; #define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) diff --git a/include/xfs_log.h b/include/xfs_log.h index 916eb7db1..5caee9605 100644 --- a/include/xfs_log.h +++ b/include/xfs_log.h @@ -52,15 +52,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) */ #define XFS_LOG_REL_PERM_RESERV 0x1 -/* - * Flags to xfs_log_reserve() - * - * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are - * performed against this type of reservation, the reservation - * is not decreased. Long running transactions should use this. - */ -#define XFS_LOG_PERM_RESERV 0x2 - /* * Flags to xfs_log_force() * @@ -137,7 +128,7 @@ struct xfs_trans; void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, int type, - struct xfs_item_ops *ops); + const struct xfs_item_ops *ops); xfs_lsn_t xfs_log_done(struct xfs_mount *mp, struct xlog_ticket *ticket, @@ -160,8 +151,9 @@ int xfs_log_mount(struct xfs_mount *mp, xfs_daddr_t start_block, int num_bblocks); int xfs_log_mount_finish(struct xfs_mount *mp); -void xfs_log_move_tail(struct xfs_mount *mp, - xfs_lsn_t tail_lsn); +xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); +xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); +void xfs_log_space_wake(struct xfs_mount *mp); int xfs_log_notify(struct xfs_mount *mp, struct xlog_in_core *iclog, xfs_log_callback_t *callback_entry); @@ -172,13 +164,9 @@ int xfs_log_reserve(struct xfs_mount *mp, int count, struct xlog_ticket **ticket, __uint8_t clientid, - uint flags, + bool permanent, uint t_type); -int xfs_log_write(struct xfs_mount *mp, - xfs_log_iovec_t region[], - int nentries, - struct xlog_ticket *ticket, - xfs_lsn_t *start_lsn); +int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); int xfs_log_unmount_write(struct xfs_mount *mp); void xfs_log_unmount(struct xfs_mount *mp); int xfs_log_force_umount(struct xfs_mount *mp, int logerror); @@ -189,12 +177,13 @@ void xlog_iodone(struct xfs_buf *); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); void xfs_log_ticket_put(struct xlog_ticket *ticket); -xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); - int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, - struct xfs_log_vec *log_vector, xfs_lsn_t *commit_lsn, int flags); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); +void xfs_log_work_queue(struct xfs_mount *mp); +void xfs_log_worker(struct work_struct *work); +void xfs_log_quiesce(struct xfs_mount *mp); + #endif #endif /* __XFS_LOG_H__ */ diff --git a/include/xfs_log_priv.h b/include/xfs_log_priv.h index d5f8be8f4..16d8d12ea 100644 --- a/include/xfs_log_priv.h +++ b/include/xfs_log_priv.h @@ -19,7 +19,7 @@ #define __XFS_LOG_PRIV_H__ struct xfs_buf; -struct log; +struct xlog; struct xlog_ticket; struct xfs_mount; @@ -87,10 +87,6 @@ static inline uint xlog_get_client_id(__be32 i) return be32_to_cpu(i) >> 24; } -#define xlog_panic(args...) cmn_err(CE_PANIC, ## args) -#define xlog_exit(args...) cmn_err(CE_PANIC, ## args) -#define xlog_warn(args...) cmn_err(CE_WARN, ## args) - /* * In core log state */ @@ -143,11 +139,13 @@ static inline uint xlog_get_client_id(__be32 i) /* * Flags for log structure */ -#define XLOG_CHKSUM_MISMATCH 0x1 /* used only during recovery */ #define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */ #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being shutdown */ +#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ + +typedef __uint32_t xlog_tid_t; #ifdef __KERNEL__ /* @@ -240,8 +238,8 @@ typedef struct xlog_res { } xlog_res_t; typedef struct xlog_ticket { - wait_queue_head_t t_wait; /* ticket wait queue */ struct list_head t_queue; /* reserve/write queue */ + struct task_struct *t_task; /* task that owns this ticket */ xlog_tid_t t_tid; /* transaction identifier : 4 */ atomic_t t_ref; /* ticket reference count : 4 */ int t_curr_res; /* current reservation in bytes : 4 */ @@ -292,7 +290,7 @@ typedef struct xlog_rec_header { __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */ __be64 h_lsn; /* lsn of this LR : 8 */ __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ - __be32 h_chksum; /* may not be used; non-zero if used : 4 */ + __le32 h_crc; /* crc of log record : 4 */ __be32 h_prev_block; /* block number to previous LR : 4 */ __be32 h_num_logops; /* number of log operations in this LR : 4 */ __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; @@ -353,7 +351,7 @@ typedef struct xlog_in_core { struct xlog_in_core *ic_next; struct xlog_in_core *ic_prev; struct xfs_buf *ic_bp; - struct log *ic_log; + struct xlog *ic_log; int ic_size; int ic_offset; int ic_bwritecnt; @@ -410,7 +408,7 @@ struct xfs_cil_ctx { * operations almost as efficient as the old logging methods. */ struct xfs_cil { - struct log *xc_log; + struct xlog *xc_log; struct list_head xc_cil; spinlock_t xc_cil_lock; struct xfs_cil_ctx *xc_ctx; @@ -418,6 +416,8 @@ struct xfs_cil { struct list_head xc_committing; wait_queue_head_t xc_commit_wait; xfs_lsn_t xc_current_sequence; + struct work_struct xc_push_work; + xfs_lsn_t xc_push_seq; }; /* @@ -470,13 +470,23 @@ struct xfs_cil { #define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) +/* + * ticket grant locks, queues and accounting have their own cachlines + * as these are quite hot and can be operated on concurrently. + */ +struct xlog_grant_head { + spinlock_t lock ____cacheline_aligned_in_smp; + struct list_head waiters; + atomic64_t grant; +}; + /* * The reservation head lsn is not made up of a cycle number and block number. * Instead, it uses a cycle number and byte number. Logs don't expect to * overflow 31 bits worth of byte offset, so using a byte number will mean * that round off problems won't occur when releasing partial reservations. */ -typedef struct log { +struct xlog { /* The following fields don't need locking */ struct xfs_mount *l_mp; /* mount point */ struct xfs_ail *l_ailp; /* AIL log is working with */ @@ -484,6 +494,7 @@ typedef struct log { struct xfs_buf *l_xbuf; /* extra buffer for log * wrapping */ struct xfs_buftarg *l_targ; /* buftarg of log */ + struct delayed_work l_work; /* background flush work */ uint l_flags; uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ struct list_head *l_buf_cancel_table; @@ -521,24 +532,15 @@ typedef struct log { /* lsn of 1st LR with unflushed * buffers */ atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; - /* - * ticket grant locks, queues and accounting have their own cachlines - * as these are quite hot and can be operated on concurrently. - */ - spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp; - struct list_head l_reserveq; - atomic64_t l_grant_reserve_head; - - spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp; - struct list_head l_writeq; - atomic64_t l_grant_write_head; + struct xlog_grant_head l_reserve_head; + struct xlog_grant_head l_write_head; /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG char *l_iclog_bak[XLOG_MAX_ICLOGS]; #endif -} xlog_t; +}; #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE)) @@ -546,15 +548,25 @@ typedef struct log { #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) /* common routines */ -extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); -extern int xlog_recover(xlog_t *log); -extern int xlog_recover_finish(xlog_t *log); -extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); +extern int +xlog_recover( + struct xlog *log); +extern int +xlog_recover_finish( + struct xlog *log); + +extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, + char *dp, int size); extern kmem_zone_t *xfs_log_ticket_zone; -struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, - int count, char client, uint xflags, - int alloc_flags); +struct xlog_ticket * +xlog_ticket_alloc( + struct xlog *log, + int unit_bytes, + int count, + char client, + bool permanent, + xfs_km_flags_t alloc_flags); static inline void @@ -566,15 +578,20 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) } void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); -int xlog_write(struct log *log, struct xfs_log_vec *log_vector, - struct xlog_ticket *tic, xfs_lsn_t *start_lsn, - xlog_in_core_t **commit_iclog, uint flags); +int +xlog_write( + struct xlog *log, + struct xfs_log_vec *log_vector, + struct xlog_ticket *tic, + xfs_lsn_t *start_lsn, + struct xlog_in_core **commit_iclog, + uint flags); /* * When we crack an atomic LSN, we sample it first so that the value will not * change while we are cracking it into the component values. This means we * will always get consistent component values to work from. This should always - * be used to smaple and crack LSNs taht are stored and updated in atomic + * be used to sample and crack LSNs that are stored and updated in atomic * variables. */ static inline void @@ -628,17 +645,23 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space) /* * Committed Item List interfaces */ -int xlog_cil_init(struct log *log); -void xlog_cil_init_post_recovery(struct log *log); -void xlog_cil_destroy(struct log *log); +int +xlog_cil_init(struct xlog *log); +void +xlog_cil_init_post_recovery(struct xlog *log); +void +xlog_cil_destroy(struct xlog *log); /* * CIL force routines */ -xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); +xfs_lsn_t +xlog_cil_force_lsn( + struct xlog *log, + xfs_lsn_t sequence); static inline void -xlog_cil_force(struct log *log) +xlog_cil_force(struct xlog *log) { xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); } diff --git a/include/xfs_mount.h b/include/xfs_mount.h index 9be9a050d..c267379c0 100644 --- a/include/xfs_mount.h +++ b/include/xfs_mount.h @@ -52,15 +52,8 @@ typedef struct xfs_trans_reservations { #else /* __KERNEL__ */ -#include "xfs_sync.h" - -struct log; -struct xfs_mount_args; +struct xlog; struct xfs_inode; -struct xfs_bmbt_irec; -struct xfs_bmap_free; -struct xfs_extdelta; -struct xfs_swapext; struct xfs_mru_cache; struct xfs_nameops; struct xfs_ail; @@ -134,7 +127,7 @@ typedef struct xfs_mount { uint m_readio_blocks; /* min read size blocks */ uint m_writeio_log; /* min write size log bytes */ uint m_writeio_blocks; /* min write size blocks */ - struct log *m_log; /* log specific stuff */ + struct xlog *m_log; /* log specific stuff */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ uint m_rsumlevels; /* rt summary levels */ @@ -177,7 +170,6 @@ typedef struct xfs_mount { uint m_qflags; /* quota status flags */ xfs_trans_reservations_t m_reservations;/* precomputed res values */ __uint64_t m_maxicount; /* maximum inode count */ - __uint64_t m_maxioffset; /* maximum inode offset */ __uint64_t m_resblks; /* total reserved blocks */ __uint64_t m_resblks_avail;/* available reserved blocks */ __uint64_t m_resblks_save; /* reserved blks @ remount,ro */ @@ -204,17 +196,21 @@ typedef struct xfs_mount { struct mutex m_icsb_mutex; /* balancer sync lock */ #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ - struct task_struct *m_sync_task; /* generalised sync thread */ - xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ - struct list_head m_sync_list; /* sync thread work item list */ - spinlock_t m_sync_lock; /* work item list lock */ - int m_sync_seq; /* sync thread generation no. */ - wait_queue_head_t m_wait_single_sync_task; + struct delayed_work m_reclaim_work; /* background inode reclaim */ + struct delayed_work m_eofblocks_work; /* background eof blocks + trimming */ __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ struct shrinker m_inode_shrink; /* inode reclaim shrinker */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ + + struct workqueue_struct *m_data_workqueue; + struct workqueue_struct *m_unwritten_workqueue; + struct workqueue_struct *m_cil_workqueue; + struct workqueue_struct *m_reclaim_workqueue; + struct workqueue_struct *m_log_workqueue; + struct workqueue_struct *m_eofblocks_workqueue; } xfs_mount_t; /* @@ -223,11 +219,11 @@ typedef struct xfs_mount { #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops must be synchronous except for space allocations */ -#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */ #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem operations, typically for disk errors in metadata */ +#define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to user */ #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment @@ -297,8 +293,6 @@ xfs_preferred_iosize(xfs_mount_t *mp) PAGE_CACHE_SIZE)); } -#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) - #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \ ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN) #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) @@ -314,9 +308,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, #define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ #define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ -#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) -#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) - /* * Flags for xfs_mountfs */ @@ -366,12 +357,11 @@ typedef struct xfs_mod_sb { int64_t msb_delta; /* Change to make to specified field */ } xfs_mod_sb_t; -extern int xfs_log_sbcount(xfs_mount_t *, uint); +extern int xfs_log_sbcount(xfs_mount_t *); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); -extern int xfs_unmountfs_writesb(xfs_mount_t *); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int); @@ -402,4 +392,6 @@ extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); +extern const struct xfs_buf_ops xfs_sb_buf_ops; + #endif /* __XFS_MOUNT_H__ */ diff --git a/include/xfs_quota.h b/include/xfs_quota.h index 5d1f57d82..b50ec5b95 100644 --- a/include/xfs_quota.h +++ b/include/xfs_quota.h @@ -87,8 +87,7 @@ typedef struct xfs_dqblk { #define XFS_DQ_PROJ 0x0002 /* project quota */ #define XFS_DQ_GROUP 0x0004 /* a group quota */ #define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ -#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */ -#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */ +#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */ #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) @@ -97,8 +96,7 @@ typedef struct xfs_dqblk { { XFS_DQ_PROJ, "PROJ" }, \ { XFS_DQ_GROUP, "GROUP" }, \ { XFS_DQ_DIRTY, "DIRTY" }, \ - { XFS_DQ_WANT, "WANT" }, \ - { XFS_DQ_INACTIVE, "INACTIVE" } + { XFS_DQ_FREEING, "FREEING" } /* * In the worst case, when both user and group quotas are on, @@ -176,6 +174,8 @@ typedef struct xfs_qoff_logformat { #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ +#define XFS_ALL_QUOTA_ACTIVE \ + (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) /* * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees @@ -199,7 +199,6 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ -#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ @@ -299,11 +298,6 @@ typedef struct xfs_dqtrx { long qt_delrtb_delta; /* delayed RT blk count changes */ } xfs_dqtrx_t; -extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); -extern int xfs_mount_reset_sbqflags(struct xfs_mount *); - -#endif /* __KERNEL__ */ - #ifdef CONFIG_XFS_QUOTA extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); extern void xfs_trans_free_dqinfo(struct xfs_trans *); @@ -331,25 +325,36 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); extern void xfs_qm_dqdetach(struct xfs_inode *); extern void xfs_qm_dqrele(struct xfs_dquot *); extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); -extern int xfs_qm_sync(struct xfs_mount *, int); extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); extern void xfs_qm_mount_quotas(struct xfs_mount *); extern void xfs_qm_unmount(struct xfs_mount *); extern void xfs_qm_unmount_quotas(struct xfs_mount *); #else -#define xfs_qm_vop_dqalloc(ip, uid, gid, prid, flags, udqp, gdqp) ({ \ - *(udqp) = NULL; \ - *(gdqp) = NULL; \ - 0; \ -}) +static inline int +xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, + uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) +{ + *udqp = NULL; + *gdqp = NULL; + return 0; +} #define xfs_trans_dup_dqinfo(tp, tp2) #define xfs_trans_free_dqinfo(tp) #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) #define xfs_trans_apply_dquot_deltas(tp) #define xfs_trans_unreserve_and_mod_dquots(tp) -#define xfs_trans_reserve_quota_nblks(tp, ip, blks, inos, flg) (0) -#define xfs_trans_reserve_quota_bydquots(tp, mp, uqp, gqp, blks, inos, flg) (0) +static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, + struct xfs_inode *ip, long nblks, long ninos, uint flags) +{ + return 0; +} +static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, + struct xfs_mount *mp, struct xfs_dquot *udqp, + struct xfs_dquot *gdqp, long nblks, long nions, uint flags) +{ + return 0; +} #define xfs_qm_vop_create_dqattach(tp, ip, u, g) #define xfs_qm_vop_rename_dqattach(it) (0) #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) @@ -359,7 +364,6 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *); #define xfs_qm_dqdetach(ip) #define xfs_qm_dqrele(d) #define xfs_qm_statvfs(ip, s) -#define xfs_qm_sync(mp, flags) (0) #define xfs_qm_newmount(mp, a, b) (0) #define xfs_qm_mount_quotas(mp) #define xfs_qm_unmount(mp) @@ -372,4 +376,9 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *); xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) +extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, + xfs_dqid_t, uint, uint, char *); +extern int xfs_mount_reset_sbqflags(struct xfs_mount *); + +#endif /* __KERNEL__ */ #endif /* __XFS_QUOTA_H__ */ diff --git a/include/xfs_rtalloc.h b/include/xfs_rtalloc.h index ff614c29b..f7f3a359c 100644 --- a/include/xfs_rtalloc.h +++ b/include/xfs_rtalloc.h @@ -47,7 +47,7 @@ struct xfs_trans; #define XFS_SUMOFFSTOBLOCK(mp,s) \ (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) #define XFS_SUMPTR(mp,bp,so) \ - ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \ + ((xfs_suminfo_t *)((bp)->b_addr + \ (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) #define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) @@ -154,7 +154,7 @@ xfs_rtmount_init( if (mp->m_sb.sb_rblocks == 0) return 0; - cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); + xfs_warn(mp, "Not built with CONFIG_XFS_RT"); return ENOSYS; } # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) diff --git a/include/xfs_sb.h b/include/xfs_sb.h index 5dcc2d72a..6a7f8b0e4 100644 --- a/include/xfs_sb.h +++ b/include/xfs_sb.h @@ -81,6 +81,7 @@ struct xfs_mount; #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ #define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ +#define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */ #define XFS_SB_VERSION2_OKREALFBITS \ (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ @@ -510,13 +511,19 @@ static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp) sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT; } +static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp) +{ + return (xfs_sb_version_hasmorebits(sbp) && + (sbp->sb_features2 & XFS_SB_VERSION2_CRCBIT)); +} + /* * end of superblock version macros */ #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) -#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ @@ -536,7 +543,6 @@ static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp) #define XFS_BB_TO_FSB(mp,bb) \ (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) #define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) -#define XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1)) /* * File system block to byte conversions. diff --git a/include/xfs_trace.h b/include/xfs_trace.h index 1abcf2846..5f3209735 100644 --- a/include/xfs_trace.h +++ b/include/xfs_trace.h @@ -26,11 +26,14 @@ #define trace_xfs_alloc_near_greater(a) ((void) 0) #define trace_xfs_alloc_near_lesser(a) ((void) 0) #define trace_xfs_alloc_near_error(a) ((void) 0) +#define trace_xfs_alloc_near_noentry(a) ((void) 0) +#define trace_xfs_alloc_near_busy(a) ((void) 0) #define trace_xfs_alloc_size_neither(a) ((void) 0) #define trace_xfs_alloc_size_noentry(a) ((void) 0) #define trace_xfs_alloc_size_nominleft(a) ((void) 0) #define trace_xfs_alloc_size_done(a) ((void) 0) #define trace_xfs_alloc_size_error(a) ((void) 0) +#define trace_xfs_alloc_size_busy(a) ((void) 0) #define trace_xfs_alloc_small_freelist(a) ((void) 0) #define trace_xfs_alloc_small_notenough(a) ((void) 0) #define trace_xfs_alloc_small_done(a) ((void) 0) @@ -91,12 +94,74 @@ #define trace_xfs_dir2_sf_toino4(a) ((void) 0) #define trace_xfs_dir2_sf_toino8(a) ((void) 0) +#define trace_xfs_da_node_create(a) ((void) 0) +#define trace_xfs_da_split(a) ((void) 0) +#define trace_xfs_attr_leaf_split_before(a) ((void) 0) +#define trace_xfs_attr_leaf_split_after(a) ((void) 0) +#define trace_xfs_da_root_split(a) ((void) 0) +#define trace_xfs_da_node_split(a) ((void) 0) +#define trace_xfs_da_node_rebalance(a) ((void) 0) +#define trace_xfs_da_node_add(a) ((void) 0) +#define trace_xfs_da_join(a) ((void) 0) +#define trace_xfs_da_root_join(a) ((void) 0) +#define trace_xfs_da_node_toosmall(a) ((void) 0) +#define trace_xfs_da_fixhashpath(a) ((void) 0) +#define trace_xfs_da_node_remove(a) ((void) 0) +#define trace_xfs_da_node_unbalance(a) ((void) 0) +#define trace_xfs_da_link_before(a) ((void) 0) +#define trace_xfs_da_link_after(a) ((void) 0) +#define trace_xfs_da_unlink_back(a) ((void) 0) +#define trace_xfs_da_unlink_forward(a) ((void) 0) +#define trace_xfs_da_path_shift(a) ((void) 0) +#define trace_xfs_da_grow_inode(a) ((void) 0) +#define trace_xfs_da_swap_lastblock(a) ((void) 0) +#define trace_xfs_da_shrink_inode(a) ((void) 0) + +#define trace_xfs_attr_sf_create(a) ((void) 0) +#define trace_xfs_attr_sf_add(a) ((void) 0) +#define trace_xfs_attr_sf_remove(a) ((void) 0) +#define trace_xfs_attr_sf_lookup(a) ((void) 0) +#define trace_xfs_attr_sf_to_leaf(a) ((void) 0) +#define trace_xfs_attr_leaf_to_sf(a) ((void) 0) +#define trace_xfs_attr_leaf_to_node(a) ((void) 0) +#define trace_xfs_attr_leaf_create(a) ((void) 0) +#define trace_xfs_attr_leaf_split(a) ((void) 0) +#define trace_xfs_attr_leaf_add_old(a) ((void) 0) +#define trace_xfs_attr_leaf_add_new(a) ((void) 0) +#define trace_xfs_attr_leaf_add(a) ((void) 0) +#define trace_xfs_attr_leaf_add_work(a) ((void) 0) +#define trace_xfs_attr_leaf_compact(a) ((void) 0) +#define trace_xfs_attr_leaf_rebalance(a) ((void) 0) +#define trace_xfs_attr_leaf_toosmall(a) ((void) 0) +#define trace_xfs_attr_leaf_remove(a) ((void) 0) +#define trace_xfs_attr_leaf_unbalance(a) ((void) 0) +#define trace_xfs_attr_leaf_lookup(a) ((void) 0) +#define trace_xfs_attr_leaf_clearflag(a) ((void) 0) +#define trace_xfs_attr_leaf_setflag(a) ((void) 0) +#define trace_xfs_attr_leaf_flipflags(a) ((void) 0) + +#define trace_xfs_attr_sf_addname(a) ((void) 0) +#define trace_xfs_attr_leaf_addname(a) ((void) 0) +#define trace_xfs_attr_leaf_replace(a) ((void) 0) +#define trace_xfs_attr_leaf_removename(a) ((void) 0) +#define trace_xfs_attr_leaf_get(a) ((void) 0) +#define trace_xfs_attr_node_addname(a) ((void) 0) +#define trace_xfs_attr_node_replace(a) ((void) 0) +#define trace_xfs_attr_node_removename(a) ((void) 0) +#define trace_xfs_attr_fillstate(a) ((void) 0) +#define trace_xfs_attr_refillstate(a) ((void) 0) +#define trace_xfs_attr_node_get(a) ((void) 0) +#define trace_xfs_attr_rmtval_get(a) ((void) 0) +#define trace_xfs_attr_rmtval_set(a) ((void) 0) +#define trace_xfs_attr_rmtval_remove(a) ((void) 0) + #define trace_xfs_bmap_pre_update(a,b,c,d) ((void) 0) #define trace_xfs_bmap_post_update(a,b,c,d) ((void) 0) #define trace_xfs_extlist(a,b,c,d) ((void) 0) #define trace_xfs_bunmap(a,b,c,d,e) ((void) 0) -#define trace_xfs_perag_get(a,b,c,d) ((void) 0) -#define trace_xfs_perag_put(a,b,c,d) ((void) 0) +/* set c = c to avoid unused var warnings */ +#define trace_xfs_perag_get(a,b,c,d) ((c) = (c)) +#define trace_xfs_perag_put(a,b,c,d) ((c) = (c)) #endif /* __TRACE_H__ */ diff --git a/include/xfs_trans.h b/include/xfs_trans.h index 2870308de..acf1381f7 100644 --- a/include/xfs_trans.h +++ b/include/xfs_trans.h @@ -163,9 +163,8 @@ typedef struct xfs_trans_header { */ struct xfs_log_item_desc { struct xfs_log_item *lid_item; - ushort lid_size; - unsigned char lid_flags; struct list_head lid_trans; + unsigned char lid_flags; }; #define XFS_LID_DIRTY 0x1 @@ -180,6 +179,8 @@ struct xfs_log_item_desc { #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ +#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer + count in superblock */ /* * Values for call flags parameter. @@ -323,7 +324,7 @@ typedef struct xfs_log_item { struct xfs_log_item *); /* buffer item iodone */ /* callback func */ - struct xfs_item_ops *li_ops; /* function list */ + const struct xfs_item_ops *li_ops; /* function list */ /* delayed logging */ struct list_head li_cil; /* CIL pointers */ @@ -338,37 +339,33 @@ typedef struct xfs_log_item { { XFS_LI_IN_AIL, "IN_AIL" }, \ { XFS_LI_ABORTED, "ABORTED" } -typedef struct xfs_item_ops { +struct xfs_item_ops { uint (*iop_size)(xfs_log_item_t *); void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); void (*iop_pin)(xfs_log_item_t *); void (*iop_unpin)(xfs_log_item_t *, int remove); - uint (*iop_trylock)(xfs_log_item_t *); + uint (*iop_push)(struct xfs_log_item *, struct list_head *); void (*iop_unlock)(xfs_log_item_t *); xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); - void (*iop_push)(xfs_log_item_t *); - void (*iop_pushbuf)(xfs_log_item_t *); void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); -} xfs_item_ops_t; +}; #define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) #define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove) -#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) +#define IOP_PUSH(ip, list) (*(ip)->li_ops->iop_push)(ip, list) #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) -#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) -#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) /* - * Return values for the IOP_TRYLOCK() routines. + * Return values for the IOP_PUSH() routines. */ -#define XFS_ITEM_SUCCESS 0 -#define XFS_ITEM_PINNED 1 -#define XFS_ITEM_LOCKED 2 -#define XFS_ITEM_PUSHBUF 3 +#define XFS_ITEM_SUCCESS 0 +#define XFS_ITEM_PINNED 1 +#define XFS_ITEM_LOCKED 2 +#define XFS_ITEM_FLUSHING 3 /* * This is the type of function which can be given to xfs_trans_callback() @@ -445,16 +442,53 @@ typedef struct xfs_trans { * XFS transaction mechanism exported interfaces. */ xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); -xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint); +xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); -struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t, - int, uint); -int xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *, - struct xfs_buftarg *, xfs_daddr_t, int, uint, - struct xfs_buf **); + +struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, + struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + uint flags); + +static inline struct xfs_buf * +xfs_trans_get_buf( + struct xfs_trans *tp, + struct xfs_buftarg *target, + xfs_daddr_t blkno, + int numblks, + uint flags) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return xfs_trans_get_buf_map(tp, target, &map, 1, flags); +} + +int xfs_trans_read_buf_map(struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf **bpp, + const struct xfs_buf_ops *ops); + +static inline int +xfs_trans_read_buf( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buftarg *target, + xfs_daddr_t blkno, + int numblks, + xfs_buf_flags_t flags, + struct xfs_buf **bpp, + const struct xfs_buf_ops *ops) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return xfs_trans_read_buf_map(mp, tp, target, &map, 1, + flags, bpp, ops); +} + struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); @@ -466,11 +500,8 @@ void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); -int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, - xfs_ino_t , uint, uint, struct xfs_inode **); void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); -void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); -void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); +void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); @@ -486,10 +517,7 @@ void xfs_trans_log_efd_extent(xfs_trans_t *, struct xfs_efd_log_item *, xfs_fsblock_t, xfs_extlen_t); -int _xfs_trans_commit(xfs_trans_t *, - uint flags, - int *); -#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) +int xfs_trans_commit(xfs_trans_t *, uint flags); void xfs_trans_cancel(xfs_trans_t *, int); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); diff --git a/include/xfs_types.h b/include/xfs_types.h index 26d1867d8..e9bd5c3b6 100644 --- a/include/xfs_types.h +++ b/include/xfs_types.h @@ -33,7 +33,6 @@ typedef signed long long int __int64_t; typedef unsigned long long int __uint64_t; typedef enum { B_FALSE,B_TRUE } boolean_t; -typedef __uint32_t prid_t; /* project ID */ typedef __uint32_t inst_t; /* an instruction */ typedef __s64 xfs_off_t; /* type */ @@ -56,7 +55,10 @@ typedef __uint64_t __psunsigned_t; #endif /* __KERNEL__ */ +typedef __uint32_t prid_t; /* project ID */ + typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ +typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */ typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ typedef __uint32_t xfs_agnumber_t; /* allocation group number */ typedef __int32_t xfs_extnum_t; /* # of extents in a file */ @@ -73,8 +75,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ -typedef __uint32_t xlog_tid_t; /* transaction ID type */ - /* * These types are 64 bits on disk but are either 32 or 64 bits in memory. * Disk based types: @@ -103,6 +103,7 @@ typedef __uint64_t xfs_fileoff_t; /* block number in a file */ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ + /* * Null values for the types. */ @@ -122,6 +123,9 @@ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ #define NULLCOMMITLSN ((xfs_lsn_t)-1) +#define NULLFSINO ((xfs_ino_t)-1) +#define NULLAGINO ((xfs_agino_t)-1) + /* * Max values for extlen, extnum, aextnum. */ @@ -129,6 +133,20 @@ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ #define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ #define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ +/* + * Minimum and maximum blocksize and sectorsize. + * The blocksize upper limit is pretty much arbitrary. + * The sectorsize upper limit is due to sizeof(sb_sectsize). + */ +#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ +#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) +#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) +#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ +#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) +#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG) + /* * Min numbers of data/attr fork btree root pointers. */ diff --git a/libxfs/Makefile b/libxfs/Makefile index b3fd85cfd..dc9b22cef 100644 --- a/libxfs/Makefile +++ b/libxfs/Makefile @@ -10,7 +10,7 @@ LT_CURRENT = 0 LT_REVISION = 0 LT_AGE = 0 -HFILES = xfs.h init.h +HFILES = xfs.h init.h xfs_dir2_priv.h CFILES = cache.c init.c kmem.c logitem.c radix-tree.c rdwr.c trans.c util.c \ xfs_alloc.c xfs_ialloc.c xfs_inode.c xfs_btree.c xfs_alloc_btree.c \ xfs_ialloc_btree.c xfs_bmap_btree.c xfs_da_btree.c \ diff --git a/libxfs/init.c b/libxfs/init.c index fce344512..71da69bc3 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -371,7 +371,6 @@ manage_zones(int release) extern kmem_zone_t *xfs_ili_zone; extern kmem_zone_t *xfs_inode_zone; extern kmem_zone_t *xfs_ifork_zone; - extern kmem_zone_t *xfs_dabuf_zone; extern kmem_zone_t *xfs_buf_item_zone; extern kmem_zone_t *xfs_da_state_zone; extern kmem_zone_t *xfs_btree_cur_zone; @@ -383,7 +382,6 @@ manage_zones(int release) kmem_free(xfs_buf_zone); kmem_free(xfs_inode_zone); kmem_free(xfs_ifork_zone); - kmem_free(xfs_dabuf_zone); kmem_free(xfs_buf_item_zone); kmem_free(xfs_da_state_zone); kmem_free(xfs_btree_cur_zone); @@ -395,7 +393,6 @@ manage_zones(int release) xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buffer"); xfs_inode_zone = kmem_zone_init(sizeof(xfs_inode_t), "xfs_inode"); xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); - xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); xfs_ili_zone = kmem_zone_init( sizeof(xfs_inode_log_item_t), "xfs_inode_log_item"); xfs_buf_item_zone = kmem_zone_init( diff --git a/libxfs/logitem.c b/libxfs/logitem.c index 116d3edfd..84e4c1498 100644 --- a/libxfs/logitem.c +++ b/libxfs/logitem.c @@ -32,21 +32,27 @@ kmem_zone_t *xfs_ili_zone; /* inode log item zone */ xfs_buf_t * xfs_trans_buf_item_match( xfs_trans_t *tp, - xfs_buftarg_t *target, - xfs_daddr_t blkno, - int len) + dev_t dev, + struct xfs_buf_map *map, + int nmaps) { struct xfs_log_item_desc *lidp; struct xfs_buf_log_item *blip; + int len = 0; + int i; + + for (i = 0; i < nmaps; i++) + len += map[i].bm_len; - len = BBTOB(len); list_for_each_entry(lidp, &tp->t_items, lid_trans) { blip = (struct xfs_buf_log_item *)lidp->lid_item; if (blip->bli_item.li_type == XFS_LI_BUF && - XFS_BUF_TARGET(blip->bli_buf) == target->dev && - XFS_BUF_ADDR(blip->bli_buf) == blkno && - XFS_BUF_COUNT(blip->bli_buf) == len) + blip->bli_buf->b_dev == dev && + XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn && + blip->bli_buf->b_bcount == BBTOB(len)) { + ASSERT(blip->bli_buf->b_map_count == nmaps); return blip->bli_buf; + } } return NULL; diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 432a1af5f..e75edd0a7 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -159,7 +159,7 @@ libxfs_log_header( head->h_len = cpu_to_be32(sunit - BBSIZE); else head->h_len = cpu_to_be32(20); - head->h_chksum = cpu_to_be32(0); + head->h_crc = cpu_to_be32(0); head->h_prev_block = cpu_to_be32(-1); head->h_num_logops = cpu_to_be32(1); head->h_cycle_data[0] = cpu_to_be32(0xb0c0d0d0); @@ -193,72 +193,86 @@ libxfs_log_header( #ifdef XFS_BUF_TRACING #undef libxfs_readbuf +#undef libxfs_readbuf_map #undef libxfs_writebuf #undef libxfs_getbuf +#undef libxfs_getbuf_map #undef libxfs_getbuf_flags #undef libxfs_putbuf -xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int); +xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int); +xfs_buf_t *libxfs_readbuf_map(dev_t, struct xfs_buf_map *, int, int); int libxfs_writebuf(xfs_buf_t *, int); -xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int); +xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int); +xfs_buf_t *libxfs_getbuf_map(dev_t, struct xfs_buf_map *, int); +xfs_buf_t *libxfs_getbuf_flags(dev_t, xfs_daddr_t, int, unsigned int); void libxfs_putbuf (xfs_buf_t *); +#define __add_trace(bp, func, file, line) \ +do { \ + if (bp) { \ + (bp)->b_func = (func); \ + (bp)->b_file = (file); \ + (bp)->b_line = (line); \ + } \ +} while (0) + xfs_buf_t * -libxfs_trace_readbuf(const char *func, const char *file, int line, dev_t dev, xfs_daddr_t blkno, int len, int flags) +libxfs_trace_readbuf(const char *func, const char *file, int line, + dev_t dev, xfs_daddr_t blkno, int len, int flags) { xfs_buf_t *bp = libxfs_readbuf(dev, blkno, len, flags); + __add_trace(bp, func, file, line); + return bp; +} - if (bp){ - bp->b_func = func; - bp->b_file = file; - bp->b_line = line; - } - +xfs_buf_t * +libxfs_trace_readbuf_map(const char *func, const char *file, int line, + dev_t dev, struct xfs_buf_map *map, int nmaps, int flags) +{ + xfs_buf_t *bp = libxfs_readbuf_map(dev, map, nmaps, flags); + __add_trace(bp, func, file, line); return bp; } int libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags) { - bp->b_func = func; - bp->b_file = file; - bp->b_line = line; - + __add_trace(bp, func, file, line); return libxfs_writebuf(bp, flags); } xfs_buf_t * -libxfs_trace_getbuf(const char *func, const char *file, int line, dev_t device, xfs_daddr_t blkno, int len) +libxfs_trace_getbuf(const char *func, const char *file, int line, + dev_t device, xfs_daddr_t blkno, int len) { xfs_buf_t *bp = libxfs_getbuf(device, blkno, len); + __add_trace(bp, func, file, line); + return bp; +} - bp->b_func = func; - bp->b_file = file; - bp->b_line = line; - +xfs_buf_t * +libxfs_trace_getbuf_map(const char *func, const char *file, int line, + dev_t device, struct xfs_buf_map *map, int nmaps) +{ + xfs_buf_t *bp = libxfs_getbuf_map(device, map, nmaps); + __add_trace(bp, func, file, line); return bp; } xfs_buf_t * libxfs_trace_getbuf_flags(const char *func, const char *file, int line, - dev_t device, xfs_daddr_t blkno, int len, unsigned long flags) + dev_t device, xfs_daddr_t blkno, int len, unsigned int flags) { - xfs_buf_t *bp = libxfs_getbuf(device, blkno, len, flags); - - bp->b_func = func; - bp->b_file = file; - bp->b_line = line; - + xfs_buf_t *bp = libxfs_getbuf_flags(device, blkno, len, flags); + __add_trace(bp, func, file, line); return bp; } void libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp) { - bp->b_func = func; - bp->b_file = file; - bp->b_line = line; - + __add_trace(bp, func, file, line); libxfs_putbuf(bp); } @@ -279,30 +293,40 @@ static struct cache_mru xfs_buf_freelist = {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list}, 0, PTHREAD_MUTEX_INITIALIZER }; -typedef struct { - dev_t device; - xfs_daddr_t blkno; - unsigned int bblen; -} xfs_bufkey_t; +/* + * The bufkey is used to pass the new buffer information to the cache object + * allocation routine. Because discontiguous buffers need to pass different + * information, we need fields to pass that information. However, because the + * blkno and bblen is needed for the initial cache entry lookup (i.e. for + * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous + * buffer initialisation instead of a contiguous buffer. + */ +struct xfs_bufkey { + dev_t device; + xfs_daddr_t blkno; + unsigned int bblen; + struct xfs_buf_map *map; + int nmaps; +}; static unsigned int libxfs_bhash(cache_key_t key, unsigned int hashsize) { - return (((unsigned int)((xfs_bufkey_t *)key)->blkno) >> 5) % hashsize; + return (((unsigned int)((struct xfs_bufkey *)key)->blkno) >> 5) % hashsize; } static int libxfs_bcompare(struct cache_node *node, cache_key_t key) { - xfs_buf_t *bp = (xfs_buf_t *)node; - xfs_bufkey_t *bkey = (xfs_bufkey_t *)key; + struct xfs_buf *bp = (struct xfs_buf *)node; + struct xfs_bufkey *bkey = (struct xfs_bufkey *)key; #ifdef IO_BCOMPARE_CHECK if (bp->b_dev == bkey->device && bp->b_blkno == bkey->blkno && bp->b_bcount != BBTOB(bkey->bblen)) fprintf(stderr, "%lx: Badness in key lookup (length)\n" - "bp=(bno %llu, len %u bytes) key=(bno %llu, len %u bytes)\n", + "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n", pthread_self(), (unsigned long long)bp->b_blkno, (int)bp->b_bcount, (unsigned long long)bkey->blkno, BBTOB(bkey->bblen)); @@ -322,11 +346,12 @@ libxfs_bprint(xfs_buf_t *bp) } static void -libxfs_initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes) +__initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes) { bp->b_flags = 0; bp->b_blkno = bno; bp->b_bcount = bytes; + bp->b_length = BTOBB(bytes); bp->b_dev = device; bp->b_error = 0; if (!bp->b_addr) @@ -346,11 +371,44 @@ libxfs_initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes) bp->b_recur = 0; } +static void +libxfs_initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes) +{ + __initbuf(bp, device, bno, bytes); +} + +static void +libxfs_initbuf_map(xfs_buf_t *bp, dev_t device, struct xfs_buf_map *map, int nmaps) +{ + unsigned int bytes = 0; + int i; + + bytes = sizeof(struct xfs_buf_map) * nmaps; + bp->b_map = malloc(bytes); + if (!bp->b_map) { + fprintf(stderr, + _("%s: %s can't malloc %u bytes: %s\n"), + progname, __FUNCTION__, bytes, + strerror(errno)); + exit(1); + } + bp->b_nmaps = nmaps; + + bytes = 0; + for ( i = 0; i < nmaps; i++) { + bp->b_map[i].bm_bn = map[i].bm_bn; + bp->b_map[i].bm_len = map[i].bm_len; + bytes += BBTOB(map[i].bm_len); + } + + __initbuf(bp, device, map[0].bm_bn, bytes); + bp->b_flags |= LIBXFS_B_DISCONTIG; +} + xfs_buf_t * -libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen) +__libxfs_getbufr(int blen) { xfs_buf_t *bp; - int blen = BBTOB(bblen); /* * first look for a buffer that can be used as-is, @@ -372,15 +430,27 @@ libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen) list_del_init(&bp->b_node.cn_mru); free(bp->b_addr); bp->b_addr = NULL; + free(bp->b_map); + bp->b_map = NULL; } } else bp = kmem_zone_zalloc(xfs_buf_zone, 0); pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); - if (bp != NULL) + return bp; +} + +xfs_buf_t * +libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen) +{ + xfs_buf_t *bp; + int blen = BBTOB(bblen); + + bp =__libxfs_getbufr(blen); + if (bp) libxfs_initbuf(bp, device, blkno, blen); #ifdef IO_DEBUG - printf("%lx: %s: allocated %u bytes buffer, key=%llu(%llu), %p\n", + printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n", pthread_self(), __FUNCTION__, blen, (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); #endif @@ -388,6 +458,38 @@ libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen) return bp; } +xfs_buf_t * +libxfs_getbufr_map(dev_t device, xfs_daddr_t blkno, int bblen, + struct xfs_buf_map *map, int nmaps) +{ + xfs_buf_t *bp; + int blen = BBTOB(bblen); + + if (!map || !nmaps) { + fprintf(stderr, + _("%s: %s invalid map %p or nmaps %d\n"), + progname, __FUNCTION__, map, nmaps); + exit(1); + } + + if (blkno != map[0].bm_bn) { + fprintf(stderr, + _("%s: %s map blkno %lx doesn't match key %lx\n"), + progname, __FUNCTION__, map[0].bm_bn, blkno); + exit(1); + } + + bp =__libxfs_getbufr(blen); + if (bp) + libxfs_initbuf_map(bp, device, map, nmaps); +#ifdef IO_DEBUG + printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n", + pthread_self(), __FUNCTION__, blen, + (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); +#endif + + return bp; +} #ifdef XFS_BUF_TRACING struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list}; @@ -396,18 +498,12 @@ int lock_buf_count = 0; extern int use_xfs_buf_lock; -struct xfs_buf * -libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags) +static struct xfs_buf * +__cache_lookup(struct xfs_bufkey *key, unsigned int flags) { - xfs_buf_t *bp; - xfs_bufkey_t key; - int miss; - - key.device = device; - key.blkno = blkno; - key.bblen = len; + struct xfs_buf *bp; - miss = cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp); + cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp); if (!bp) return NULL; @@ -423,7 +519,7 @@ libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags if (pthread_equal(bp->b_holder, pthread_self())) { fprintf(stderr, _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"), - blkno); + key->blkno); bp->b_recur++; return bp; } else { @@ -444,9 +540,9 @@ libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags pthread_mutex_unlock(&libxfs_bcache->c_mutex); #endif #ifdef IO_DEBUG - printf("%lx %s: %s buffer %p for bno = %llu\n", - pthread_self(), __FUNCTION__, miss ? "miss" : "hit", - bp, (long long)LIBXFS_BBTOOFF64(blkno)); + printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n", + pthread_self(), __FUNCTION__, + bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno)); #endif return bp; @@ -455,12 +551,41 @@ out_put: return NULL; } +struct xfs_buf * +libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags) +{ + struct xfs_bufkey key = {0}; + + key.device = device; + key.blkno = blkno; + key.bblen = len; + + return __cache_lookup(&key, flags); +} + struct xfs_buf * libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len) { return libxfs_getbuf_flags(device, blkno, len, 0); } +struct xfs_buf * +libxfs_getbuf_map(dev_t device, struct xfs_buf_map *map, int nmaps) +{ + struct xfs_bufkey key = {0}; + int i; + + key.device = device; + key.blkno = map[0].bm_bn; + for (i = 0; i < nmaps; i++) { + key.bblen += map[i].bm_len; + } + key.map = map; + key.nmaps = nmaps; + + return __cache_lookup(&key, 0); +} + void libxfs_putbuf(xfs_buf_t *bp) { @@ -485,7 +610,7 @@ libxfs_putbuf(xfs_buf_t *bp) void libxfs_purgebuf(xfs_buf_t *bp) { - xfs_bufkey_t key; + struct xfs_bufkey key = {0}; key.device = bp->b_dev; key.blkno = bp->b_blkno; @@ -497,47 +622,62 @@ libxfs_purgebuf(xfs_buf_t *bp) static struct cache_node * libxfs_balloc(cache_key_t key) { - xfs_bufkey_t *bufkey = (xfs_bufkey_t *)key; + struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key; + if (bufkey->map) + return (struct cache_node *) + libxfs_getbufr_map(bufkey->device, + bufkey->blkno, bufkey->bblen, + bufkey->map, bufkey->nmaps); return (struct cache_node *)libxfs_getbufr(bufkey->device, - bufkey->blkno, bufkey->bblen); + bufkey->blkno, bufkey->bblen); } -int -libxfs_readbufr(dev_t dev, xfs_daddr_t blkno, xfs_buf_t *bp, int len, int flags) + +static int +__read_buf(int fd, void *buf, int len, off64_t offset, int flags) { - int fd = libxfs_device_to_fd(dev); - int bytes = BBTOB(len); - int error; int sts; - ASSERT(BBTOB(len) <= bp->b_bcount); - - sts = pread64(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno)); + sts = pread64(fd, buf, len, offset); if (sts < 0) { - error = errno; + int error = errno; fprintf(stderr, _("%s: read failed: %s\n"), progname, strerror(error)); if (flags & LIBXFS_EXIT_ON_FAILURE) exit(1); return error; - } else if (sts != bytes) { + } else if (sts != len) { fprintf(stderr, _("%s: error - read only %d of %d bytes\n"), - progname, sts, bytes); + progname, sts, len); if (flags & LIBXFS_EXIT_ON_FAILURE) exit(1); return EIO; } -#ifdef IO_DEBUG - printf("%lx: %s: read %u bytes, blkno=%llu(%llu), %p\n", - pthread_self(), __FUNCTION__, bytes, - (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); -#endif - if (bp->b_dev == dev && + return 0; +} + +int +libxfs_readbufr(dev_t dev, xfs_daddr_t blkno, xfs_buf_t *bp, int len, int flags) +{ + int fd = libxfs_device_to_fd(dev); + int bytes = BBTOB(len); + int error; + + ASSERT(BBTOB(len) <= bp->b_bcount); + + error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags); + if (!error && + bp->b_dev == dev && bp->b_blkno == blkno && bp->b_bcount == bytes) bp->b_flags |= LIBXFS_B_UPTODATE; - return 0; +#ifdef IO_DEBUG + printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n", + pthread_self(), __FUNCTION__, bytes, error, + (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); +#endif + return error; } xfs_buf_t * @@ -555,37 +695,113 @@ libxfs_readbuf(dev_t dev, xfs_daddr_t blkno, int len, int flags) return bp; } -int -libxfs_writebufr(xfs_buf_t *bp) +struct xfs_buf * +libxfs_readbuf_map(dev_t dev, struct xfs_buf_map *map, int nmaps, int flags) +{ + xfs_buf_t *bp; + int error = 0; + int fd; + int i; + char *buf; + + if (nmaps == 1) + return libxfs_readbuf(dev, map[0].bm_bn, map[0].bm_len, flags); + + bp = libxfs_getbuf_map(dev, map, nmaps); + if (!bp || (bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) + return bp; + + ASSERT(bp->b_nmaps = nmaps); + + fd = libxfs_device_to_fd(dev); + buf = bp->b_addr; + for (i = 0; i < bp->b_nmaps; i++) { + off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn); + int len = BBTOB(bp->b_map[i].bm_len); + + ASSERT(bp->b_map[i].bm_bn == map[i].bm_bn); + ASSERT(bp->b_map[i].bm_len == map[i].bm_len); + + error = __read_buf(fd, buf, len, offset, flags); + if (error) { + bp->b_error = error; + break; + } + buf += len; + offset += len; + } + + if (!error) + bp->b_flags |= LIBXFS_B_UPTODATE; +#ifdef IO_DEBUG + printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n", + pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error, + (long long)LIBXFS_BBTOOFF64(bp->b_blkno), (long long)bp->b_blkno, bp); +#endif + return bp; +} + +static int +__write_buf(int fd, void *buf, int len, off64_t offset, int flags) { int sts; - int fd = libxfs_device_to_fd(bp->b_dev); - int error; - sts = pwrite64(fd, bp->b_addr, bp->b_bcount, LIBXFS_BBTOOFF64(bp->b_blkno)); + sts = pwrite64(fd, buf, len, offset); if (sts < 0) { - error = errno; + int error = errno; fprintf(stderr, _("%s: pwrite64 failed: %s\n"), progname, strerror(error)); - if (bp->b_flags & LIBXFS_B_EXIT) + if (flags & LIBXFS_B_EXIT) exit(1); return error; - } else if (sts != bp->b_bcount) { - fprintf(stderr, _("%s: error - wrote only %d of %d bytes\n"), - progname, sts, bp->b_bcount); - if (bp->b_flags & LIBXFS_B_EXIT) + } else if (sts != len) { + fprintf(stderr, _("%s: error - pwrite64 only %d of %d bytes\n"), + progname, sts, len); + if (flags & LIBXFS_B_EXIT) exit(1); return EIO; } + return 0; +} + +int +libxfs_writebufr(xfs_buf_t *bp) +{ + int fd = libxfs_device_to_fd(bp->b_dev); + int error = 0; + + if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) { + error = __write_buf(fd, bp->b_addr, bp->b_bcount, + LIBXFS_BBTOOFF64(bp->b_blkno), bp->b_flags); + } else { + int i; + char *buf = bp->b_addr; + + for (i = 0; i < bp->b_nmaps; i++) { + off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn); + int len = BBTOB(bp->b_map[i].bm_len); + + error = __write_buf(fd, buf, len, offset, bp->b_flags); + if (error) { + bp->b_error = error; + break; + } + buf += len; + offset += len; + } + } + #ifdef IO_DEBUG printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p\n", pthread_self(), __FUNCTION__, bp->b_bcount, (long long)LIBXFS_BBTOOFF64(bp->b_blkno), (long long)bp->b_blkno, bp); #endif - bp->b_flags |= LIBXFS_B_UPTODATE; - bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT); - return 0; + if (!error) { + bp->b_flags |= LIBXFS_B_UPTODATE; + bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT); + } + return error; } int @@ -609,7 +825,7 @@ libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags) #ifdef IO_DEBUG if (boff + len > bp->b_bcount) { printf("Badness, iomove out of range!\n" - "bp=(bno %llu, bytes %u) range=(boff %u, bytes %u)\n", + "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n", (long long)bp->b_blkno, bp->b_bcount, boff, len); abort(); } @@ -742,7 +958,10 @@ libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags, fprintf(stderr, "%s: allocated inode, ino=%llu(%llu), %p\n", __FUNCTION__, (unsigned long long)ino, bno, ip); #endif - if ((error = libxfs_iread(mp, tp, ino, ip, bno))) { + ip->i_ino = ino; + ip->i_mount = mp; + error = xfs_iread(mp, tp, ip, bno); + if (error) { cache_node_purge(libxfs_icache, &ino, (struct cache_node *)ip); ip = NULL; diff --git a/libxfs/trans.c b/libxfs/trans.c index a745d5157..7cb3c8c57 100644 --- a/libxfs/trans.c +++ b/libxfs/trans.c @@ -252,7 +252,7 @@ xfs_trans_log_inode( * this coordination mechanism. */ flags |= ip->i_itemp->ili_last_fields; - ip->i_itemp->ili_format.ilf_fields |= flags; + ip->i_itemp->ili_fields |= flags; } /* @@ -338,7 +338,7 @@ libxfs_trans_binval( if (bip->bli_flags & XFS_BLI_STALE) return; XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_STALE(bp); + xfs_buf_stale(bp); bip->bli_flags |= XFS_BLI_STALE; bip->bli_flags &= ~XFS_BLI_DIRTY; bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; @@ -383,22 +383,20 @@ libxfs_trans_bhold( } xfs_buf_t * -libxfs_trans_get_buf( +libxfs_trans_get_buf_map( xfs_trans_t *tp, dev_t dev, - xfs_daddr_t d, - int len, + struct xfs_buf_map *map, + int nmaps, uint f) { xfs_buf_t *bp; xfs_buf_log_item_t *bip; - xfs_buftarg_t bdev; if (tp == NULL) - return libxfs_getbuf(dev, d, len); + return libxfs_getbuf_map(dev, map, nmaps); - bdev.dev = dev; - bp = xfs_trans_buf_item_match(tp, &bdev, d, len); + bp = xfs_trans_buf_item_match(tp, dev, map, nmaps); if (bp != NULL) { ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); @@ -407,7 +405,7 @@ libxfs_trans_get_buf( return bp; } - bp = libxfs_getbuf(dev, d, len); + bp = libxfs_getbuf_map(dev, map, nmaps); if (bp == NULL) return NULL; #ifdef XACT_DEBUG @@ -432,15 +430,13 @@ libxfs_trans_getsb( { xfs_buf_t *bp; xfs_buf_log_item_t *bip; - xfs_buftarg_t bdev; - int len; + int len = XFS_FSS_TO_BB(mp, 1); + DEFINE_SINGLE_BUF_MAP(map, XFS_SB_DADDR, len); if (tp == NULL) return libxfs_getsb(mp, flags); - bdev.dev = mp->m_dev; - len = XFS_FSS_TO_BB(mp, 1); - bp = xfs_trans_buf_item_match(tp, &bdev, XFS_SB_DADDR, len); + bp = xfs_trans_buf_item_match(tp, mp->m_dev, &map, 1); if (bp != NULL) { ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); @@ -465,24 +461,24 @@ libxfs_trans_getsb( } int -libxfs_trans_read_buf( +libxfs_trans_read_buf_map( xfs_mount_t *mp, xfs_trans_t *tp, dev_t dev, - xfs_daddr_t blkno, - int len, + struct xfs_buf_map *map, + int nmaps, uint flags, - xfs_buf_t **bpp) + xfs_buf_t **bpp, + const struct xfs_buf_ops *ops) { xfs_buf_t *bp; xfs_buf_log_item_t *bip; - xfs_buftarg_t bdev; int error; *bpp = NULL; if (tp == NULL) { - bp = libxfs_readbuf(dev, blkno, len, flags); + bp = libxfs_readbuf_map(dev, map, nmaps, flags); if (!bp) { return (flags & XBF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); @@ -492,8 +488,7 @@ libxfs_trans_read_buf( goto done; } - bdev.dev = dev; - bp = xfs_trans_buf_item_match(tp, &bdev, blkno, len); + bp = xfs_trans_buf_item_match(tp, dev, map, nmaps); if (bp != NULL) { ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); @@ -502,7 +497,7 @@ libxfs_trans_read_buf( goto done; } - bp = libxfs_readbuf(dev, blkno, len, flags); + bp = libxfs_readbuf_map(dev, map, nmaps, flags); if (!bp) { return (flags & XBF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); @@ -588,7 +583,7 @@ inode_item_done( mp = iip->ili_item.li_mountp; ASSERT(ip != NULL); - if (!(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) { + if (!(iip->ili_fields & XFS_ILOG_ALL)) { ip->i_transp = NULL; /* disassociate from transaction */ iip->ili_flags = 0; /* reset all flags */ goto ili_done; @@ -597,9 +592,9 @@ inode_item_done( /* * Get the buffer containing the on-disk inode. */ - error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0); + error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, 0, 0); if (error) { - fprintf(stderr, _("%s: warning - itobp failed (%d)\n"), + fprintf(stderr, _("%s: warning - imap_to_bp failed (%d)\n"), progname, error); goto ili_done; } @@ -674,6 +669,7 @@ trans_committed( struct xfs_log_item *lip = lidp->lid_item; xfs_trans_del_item(lip); + if (lip->li_type == XFS_LI_BUF) buf_item_done((xfs_buf_log_item_t *)lip); else if (lip->li_type == XFS_LI_INODE) diff --git a/libxfs/util.c b/libxfs/util.c index bffbac057..2ad4bfd19 100644 --- a/libxfs/util.c +++ b/libxfs/util.c @@ -49,130 +49,6 @@ libxfs_trans_ichgtime( } } -/* - * Given a mount structure and an inode number, return a pointer - * to a newly allocated in-core inode coresponding to the given - * inode number. - * - * Initialize the inode's attributes and extent pointers if it - * already has them (it will not if the inode has no links). - * - * NOTE: this has slightly different behaviour to the kernel in - * that this version requires the already allocated *ip being - * passed in while the kernel version does the allocation and - * returns it in **ip. - */ -int -libxfs_iread( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - xfs_inode_t *ip, - xfs_daddr_t bno) -{ - xfs_buf_t *bp; - xfs_dinode_t *dip; - int error; - - ip->i_ino = ino; - ip->i_mount = mp; - - /* - * Fill in the location information in the in-core inode. - */ - error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, 0); - if (error) - return error; - - /* - * Get pointers to the on-disk inode and the buffer containing it. - */ - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, XBF_LOCK, 0); - if (error) - return error; - dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); - - /* - * If we got something that isn't an inode it means someone - * (nfs or dmi) has a stale handle. - */ - if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { - xfs_trans_brelse(tp, bp); - return EINVAL; - } - - /* - * If the on-disk inode is already linked to a directory - * entry, copy all of the inode into the in-core inode. - * xfs_iformat() handles copying in the inode format - * specific information. - * Otherwise, just get the truly permanent information. - */ - if (dip->di_mode) { - xfs_dinode_from_disk(&ip->i_d, dip); - error = xfs_iformat(ip, dip); - if (error) { - xfs_trans_brelse(tp, bp); - return error; - } - } else { - ip->i_d.di_magic = be16_to_cpu(dip->di_magic); - ip->i_d.di_version = dip->di_version; - ip->i_d.di_gen = be32_to_cpu(dip->di_gen); - ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); - /* - * Make sure to pull in the mode here as well in - * case the inode is released without being used. - * This ensures that xfs_inactive() will see that - * the inode is already free and not try to mess - * with the uninitialized part of it. - */ - ip->i_d.di_mode = 0; - /* - * Initialize the per-fork minima and maxima for a new - * inode here. xfs_iformat will do it for old inodes. - */ - ip->i_df.if_ext_max = - XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); - } - - /* - * The inode format changed when we moved the link count and - * made it 32 bits long. If this is an old format inode, - * convert it in memory to look like a new one. If it gets - * flushed to disk we will convert back before flushing or - * logging it. We zero out the new projid_lo/hi field and the old link - * count field. We'll handle clearing the pad field (the remains - * of the old uuid field) when we actually convert the inode to - * the new format. We don't change the version number so that we - * can distinguish this from a real new format inode. - */ - if (ip->i_d.di_version == 1) { - ip->i_d.di_nlink = ip->i_d.di_onlink; - ip->i_d.di_onlink = 0; - xfs_set_projid(&ip->i_d, 0); - } - - ip->i_delayed_blks = 0; - ip->i_size = ip->i_d.di_size; - - /* - * Use xfs_trans_brelse() to release the buffer containing the - * on-disk inode, because it was acquired with xfs_trans_read_buf() - * in xfs_itobp() above. If tp is NULL, this is just a normal - * brelse(). If we're within a transaction, then xfs_trans_brelse() - * will only release the buffer if it is not dirty within the - * transaction. It will be OK to release the buffer in this case, - * because inodes on disk are never destroyed and we will be - * locking the new in-core inode before putting it in the hash - * table where other processes can find it. Thus we don't have - * to worry about the inode being changed just because we released - * the buffer. - */ - xfs_trans_brelse(tp, bp); - return 0; -} - /* * Allocate an inode on disk and return a copy of its in-core version. * Set mode, nlink, and rdev appropriately within the inode. @@ -193,7 +69,6 @@ libxfs_ialloc( struct fsxattr *fsx, int okalloc, xfs_buf_t **ialloc_context, - boolean_t *call_again, xfs_inode_t **ipp) { xfs_ino_t ino; @@ -206,10 +81,10 @@ libxfs_ialloc( * the on-disk inode to be allocated. */ error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, - ialloc_context, call_again, &ino); + ialloc_context, &ino); if (error != 0) return error; - if (*call_again || ino == NULLFSINO) { + if (*ialloc_context || ino == NULLFSINO) { *ipp = NULL; return 0; } @@ -455,7 +330,7 @@ libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp) dip->di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); memset(&(dip->di_pad[0]), 0, sizeof(dip->di_pad)); - ASSERT(xfs_get_projid(ip->i_d) == 0); + ASSERT(xfs_get_projid(&ip->i_d) == 0); } } @@ -560,7 +435,7 @@ libxfs_alloc_file_space( error = 0; imapp = &imaps[0]; reccount = 1; - xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); + xfs_bmapi_flags = alloc_type ? XFS_BMAPI_PREALLOC : 0; mp = ip->i_mount; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); @@ -578,7 +453,7 @@ libxfs_alloc_file_space( xfs_trans_ihold(tp, ip); xfs_bmap_init(&free_list, &firstfsb); - error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb, + error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb, xfs_bmapi_flags, &firstfsb, 0, imapp, &reccount, &free_list); @@ -616,56 +491,6 @@ libxfs_log2_roundup(unsigned int i) return rval; } -/* - * Get a buffer for the dir/attr block, fill in the contents. - * Don't check magic number, the caller will (it's xfs_repair). - * - * Originally from xfs_da_btree.c in the kernel, but only used - * in userspace so it now resides here. - */ -int -libxfs_da_read_bufr( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, - int whichfork) -{ - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 2, - (inst_t *)__return_address); -} - -/* - * Hold dabuf at transaction commit. - * - * Originally from xfs_da_btree.c in the kernel, but only used - * in userspace so it now resides here. - */ -void -libxfs_da_bhold(xfs_trans_t *tp, xfs_dabuf_t *dabuf) -{ - int i; - - for (i = 0; i < dabuf->nbuf; i++) - xfs_trans_bhold(tp, dabuf->bps[i]); -} - -/* - * Join dabuf to transaction. - * - * Originally from xfs_da_btree.c in the kernel, but only used - * in userspace so it now resides here. - */ -void -libxfs_da_bjoin(xfs_trans_t *tp, xfs_dabuf_t *dabuf) -{ - int i; - - for (i = 0; i < dabuf->nbuf; i++) - xfs_trans_bjoin(tp, dabuf->bps[i]); -} - /* * Wrapper around call to libxfs_ialloc. Takes care of committing and * allocating a new transaction as needed. @@ -684,21 +509,25 @@ libxfs_inode_alloc( struct fsxattr *fsx, xfs_inode_t **ipp) { - boolean_t call_again; int i; xfs_buf_t *ialloc_context; xfs_inode_t *ip; xfs_trans_t *ntp; int error; - call_again = B_FALSE; ialloc_context = (xfs_buf_t *)0; error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, fsx, - 1, &ialloc_context, &call_again, &ip); - if (error) + 1, &ialloc_context, &ip); + if (error) { + *ipp = NULL; return error; + } + if (!ialloc_context && !ip) { + *ipp = NULL; + return XFS_ERROR(ENOSPC); + } - if (call_again) { + if (ialloc_context) { xfs_trans_bhold(*tp, ialloc_context); ntp = xfs_trans_dup(*tp); xfs_trans_commit(*tp, 0); @@ -710,8 +539,7 @@ libxfs_inode_alloc( } xfs_trans_bjoin(*tp, ialloc_context); error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, - fsx, 1, &ialloc_context, - &call_again, &ip); + fsx, 1, &ialloc_context, &ip); if (!ip) error = ENOSPC; if (error) diff --git a/libxfs/xfs.h b/libxfs/xfs.h index b3f837838..62132843d 100644 --- a/libxfs/xfs.h +++ b/libxfs/xfs.h @@ -43,14 +43,21 @@ */ #include +#include "xfs_dir2_priv.h" -typedef struct { dev_t dev; } xfs_buftarg_t; +#undef ASSERT +#define ASSERT(ex) assert(ex) -typedef __uint32_t uint_t; +typedef __uint32_t uint_t; typedef __uint32_t inst_t; /* an instruction */ +#ifndef EWRONGFS +#define EWRONGFS EINVAL +#endif -#define m_ddev_targp m_dev +#define m_ddev_targp m_dev +#define m_logdev_targp m_logdev +#define m_rtdev_targp m_rtdev #define xfs_error_level 0 #define STATIC static @@ -64,10 +71,11 @@ typedef __uint32_t inst_t; /* an instruction */ #define IHOLD(ip) ((void) 0) -#define XFS_CORRUPTION_ERROR(e,l,mp,m) ((void) 0) +/* stop unused var warnings by assigning mp to itself */ +#define XFS_CORRUPTION_ERROR(e,l,mp,m) do { (mp) = (mp); } while (0) +#define XFS_ERROR_REPORT(e,l,mp) do { (mp) = (mp); } while (0) #define XFS_QM_DQATTACH(mp,ip,flags) 0 #define XFS_ERROR(e) (e) -#define XFS_ERROR_REPORT(e,l,mp) ((void) 0) #define XFS_ERRLEVEL_LOW 1 #define XFS_FORCED_SHUTDOWN(mp) 0 #define XFS_ILOCK_EXCL 0 @@ -124,35 +132,6 @@ static inline int __do_div(unsigned long long *n, unsigned base) ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) -static inline __uint32_t __get_unaligned_be32(const __uint8_t *p) -{ - return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; -} - -static inline __uint64_t get_unaligned_be64(void *p) -{ - return (__uint64_t)__get_unaligned_be32(p) << 32 | - __get_unaligned_be32(p + 4); -} - -static inline void __put_unaligned_be16(__uint16_t val, __uint8_t *p) -{ - *p++ = val >> 8; - *p++ = val; -} - -static inline void __put_unaligned_be32(__uint32_t val, __uint8_t *p) -{ - __put_unaligned_be16(val >> 16, p); - __put_unaligned_be16(val, p + 2); -} - -static inline void put_unaligned_be64(__uint64_t val, void *p) -{ - __put_unaligned_be32(val >> 32, p); - __put_unaligned_be32(val, p + 4); -} - static inline __attribute__((const)) int is_power_of_2(unsigned long n) @@ -191,10 +170,11 @@ roundup_pow_of_two(uint v) #define XBF_LOCK XFS_BUF_LOCK #define XBF_TRYLOCK XFS_BUF_TRYLOCK #define XBF_DONT_BLOCK 0 +#define XBF_UNMAPPED 0 #define XFS_BUF_GETERROR(bp) 0 #define XFS_BUF_DONE(bp) ((bp)->b_flags |= LIBXFS_B_UPTODATE) #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & LIBXFS_B_UPTODATE) -#define XFS_BUF_STALE(bp) ((bp)->b_flags |= LIBXFS_B_STALE) +#define xfs_buf_stale(bp) ((bp)->b_flags |= LIBXFS_B_STALE) #define XFS_BUF_UNDELAYWRITE(bp) ((bp)->b_flags &= ~LIBXFS_B_DIRTY) #define XFS_BUF_SET_VTYPE(a,b) ((void) 0) #define XFS_BUF_SET_VTYPE_REF(a,b,c) ((void) 0) @@ -260,6 +240,8 @@ roundup_pow_of_two(uint v) #define xfs_trans_log_inode libxfs_trans_log_inode #define xfs_trans_mod_sb libxfs_trans_mod_sb #define xfs_trans_read_buf libxfs_trans_read_buf +#define xfs_trans_read_buf_map libxfs_trans_read_buf_map +#define xfs_trans_get_buf_map libxfs_trans_get_buf_map #define xfs_trans_reserve libxfs_trans_reserve #define xfs_trans_get_block_res(tp) 1 @@ -268,12 +250,16 @@ roundup_pow_of_two(uint v) #define xfs_trans_agflist_delta(tp, d) #define xfs_trans_agbtree_delta(tp, d) -#define xfs_buf_readahead(a,b,c) ((void) 0) /* no readahead */ -#define xfs_btree_reada_bufl(m,fsb,c) ((void) 0) -#define xfs_btree_reada_bufs(m,fsb,c,x) ((void) 0) -#define xfs_buftrace(x,y) ((void) 0) /* debug only */ +#define xfs_buf_readahead(a,b,c,ops) ((void) 0) /* no readahead */ +#define xfs_buf_readahead_map(a,b,c,ops) ((void) 0) /* no readahead */ +#define xfs_btree_reada_bufl(m,fsb,c,ops) ((void) 0) +#define xfs_btree_reada_bufs(m,fsb,c,x,ops) ((void) 0) +#define xfs_buftrace(x,y) ((void) 0) /* debug only */ #define xfs_cmn_err(tag,level,mp,fmt,args...) cmn_err(level,fmt, ## args) +#define xfs_warn(mp,fmt,args...) cmn_err(CE_WARN,fmt, ## args) +#define xfs_alert(mp,fmt,args...) cmn_err(CE_ALERT,fmt, ## args) +#define xfs_alert_tag(mp,tag,fmt,args...) cmn_err(CE_ALERT,fmt, ## args) #define xfs_dir2_trace_args(where, args) ((void) 0) #define xfs_dir2_trace_args_b(where, args, bp) ((void) 0) @@ -289,15 +275,28 @@ roundup_pow_of_two(uint v) #define xfs_initialize_perag_icache(pag) ((void) 0) #define xfs_ilock(ip,mode) ((void) 0) +#define xfs_ilock_nowait(ip,mode) ((void) 0) +#define xfs_ilock_demote(ip,mode) ((void) 0) #define xfs_iunlock(ip,mode) ((void) 0) +#define xfs_ilock_map_shared(ip,mode) ((void) 0) +#define xfs_iunlock_map_shared(ip,mode) ((void) 0) +#define __xfs_flock(ip) ((void) 0) /* space allocation */ -#define xfs_alloc_busy_search(tp,ag,b,len) 0 +#define xfs_extent_busy_reuse(mp,ag,bno,len,user) ((void) 0) +#define xfs_extent_busy_insert(tp,ag,bno,len,flags) ((void) 0) +#define xfs_extent_busy_trim(args,fbno,flen,bno,len) \ +do { \ + *(bno) = (fbno); \ + *(len) = (flen); \ +} while (0) + /* avoid unused variable warning */ #define xfs_alloc_busy_insert(tp,ag,b,len) ({ \ xfs_agnumber_t __foo = ag; \ __foo = 0; \ }) + #define xfs_rotorstep 1 #define xfs_bmap_rtalloc(a) (ENOSYS) #define xfs_rtpick_extent(mp,tp,len,p) (ENOSYS) @@ -306,6 +305,15 @@ roundup_pow_of_two(uint v) #define xfs_filestream_lookup_ag(ip) (0) #define xfs_filestream_new_ag(ip,ag) (0) +#define xfs_log_force(mp,flags) ((void) 0) +#define XFS_LOG_SYNC 1 + +/* quota bits */ +#define xfs_trans_mod_dquot_byino(t,i,f,d) ((void) 0) +#define xfs_trans_reserve_quota_nblks(t,i,b,n,f) (0) +#define xfs_trans_unreserve_quota_nblks(t,i,b,n,f) ((void) 0) +#define xfs_qm_dqattach(i,f) (0) + /* * Prototypes for kernel static functions that are aren't in their * associated header files @@ -318,14 +326,20 @@ int xfs_attr_rmtval_get(struct xfs_da_args *); void xfs_bmap_del_free(xfs_bmap_free_t *, xfs_bmap_free_item_t *, xfs_bmap_free_item_t *); -/* xfs_da_btree.c */ -int xfs_da_do_buf(xfs_trans_t *, xfs_inode_t *, xfs_dablk_t, xfs_daddr_t *, - xfs_dabuf_t **, int, int, inst_t *); - /* xfs_inode.c */ void xfs_iflush_fork(xfs_inode_t *, xfs_dinode_t *, xfs_inode_log_item_t *, int, xfs_buf_t *); -int xfs_iformat(xfs_inode_t *, xfs_dinode_t *); +/* + * For regular files we only update the on-disk filesize when actually + * writing data back to disk. Until then only the copy in the VFS inode + * is uptodate. + */ +static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip) +{ + if (S_ISREG(ip->i_d.di_mode)) + return ip->i_size; + return ip->i_d.di_size; +} /* xfs_mount.c */ int xfs_initialize_perag_data(xfs_mount_t *, xfs_agnumber_t); @@ -348,8 +362,8 @@ void xfs_buf_item_init (xfs_buf_t *, xfs_mount_t *); void xfs_buf_item_log (xfs_buf_log_item_t *, uint, uint); /* xfs_trans_buf.c */ -xfs_buf_t *xfs_trans_buf_item_match (xfs_trans_t *, xfs_buftarg_t *, - xfs_daddr_t, int); +xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, dev_t, + struct xfs_buf_map *, int); /* local source files */ int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c index a76512dc8..61cdc6c04 100644 --- a/libxfs/xfs_alloc.c +++ b/libxfs/xfs_alloc.c @@ -22,19 +22,11 @@ #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 -/* - * Prototypes for per-ag allocation routines - */ - STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); - -/* - * Internal functions. - */ + xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); /* * Lookup the record equal to [bno, len] in the btree given by cur. @@ -55,7 +47,7 @@ xfs_alloc_lookup_eq( * Lookup the first record greater than or equal to [bno, len] * in the btree given by cur. */ -STATIC int /* error */ +int /* error */ xfs_alloc_lookup_ge( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -71,7 +63,7 @@ xfs_alloc_lookup_ge( * Lookup the first record less than or equal to [bno, len] * in the btree given by cur. */ -STATIC int /* error */ +int /* error */ xfs_alloc_lookup_le( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -104,7 +96,7 @@ xfs_alloc_update( /* * Get the data from the pointed-to record. */ -STATIC int /* error */ +int /* error */ xfs_alloc_get_rec( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t *bno, /* output: starting block of extent */ @@ -128,27 +120,28 @@ xfs_alloc_get_rec( */ STATIC void xfs_alloc_compute_aligned( + xfs_alloc_arg_t *args, /* allocation argument structure */ xfs_agblock_t foundbno, /* starting block in found extent */ xfs_extlen_t foundlen, /* length in found extent */ - xfs_extlen_t alignment, /* alignment for allocation */ - xfs_extlen_t minlen, /* minimum length for allocation */ xfs_agblock_t *resbno, /* result block number */ xfs_extlen_t *reslen) /* result length */ { xfs_agblock_t bno; - xfs_extlen_t diff; xfs_extlen_t len; - if (alignment > 1 && foundlen >= minlen) { - bno = roundup(foundbno, alignment); - diff = bno - foundbno; - len = diff >= foundlen ? 0 : foundlen - diff; + /* Trim busy sections out of found extent */ + xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); + + if (args->alignment > 1 && len >= args->minlen) { + xfs_agblock_t aligned_bno = roundup(bno, args->alignment); + xfs_extlen_t diff = aligned_bno - bno; + + *resbno = aligned_bno; + *reslen = diff >= len ? 0 : len - diff; } else { - bno = foundbno; - len = foundlen; + *resbno = bno; + *reslen = len; } - *resbno = bno; - *reslen = len; } /* @@ -262,7 +255,6 @@ xfs_alloc_fix_minleft( return 1; agf = XFS_BUF_TO_AGF(args->agbp); diff = be32_to_cpu(agf->agf_freeblks) - + be32_to_cpu(agf->agf_flcount) - args->len - args->minleft; if (diff >= 0) return 1; @@ -418,6 +410,60 @@ xfs_alloc_fixup_trees( return 0; } +static void +xfs_agfl_verify( + struct xfs_buf *bp) +{ +#ifdef WHEN_CRCS_COME_ALONG + /* + * we cannot actually do any verification of the AGFL because mkfs does + * not initialise the AGFL to zero or NULL. Hence the only valid part of + * the AGFL is what the AGF says is active. We can't get to the AGF, so + * we can't verify just those entries are valid. + * + * This problem goes away when the CRC format change comes along as that + * requires the AGFL to be initialised by mkfs. At that point, we can + * verify the blocks in the agfl -active or not- lie within the bounds + * of the AG. Until then, just leave this check ifdef'd out. + */ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); + int agfl_ok = 1; + + int i; + + for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { + if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK || + be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) + agfl_ok = 0; + } + + if (!agfl_ok) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +#endif +} + +static void +xfs_agfl_write_verify( + struct xfs_buf *bp) +{ + xfs_agfl_verify(bp); +} + +static void +xfs_agfl_read_verify( + struct xfs_buf *bp) +{ + xfs_agfl_verify(bp); +} + +const struct xfs_buf_ops xfs_agfl_buf_ops = { + .verify_read = xfs_agfl_read_verify, + .verify_write = xfs_agfl_write_verify, +}; + /* * Read in the allocation group free block array. */ @@ -435,16 +481,36 @@ xfs_alloc_read_agfl( error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &bp); + XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); if (error) return error; - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); - XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); + ASSERT(!xfs_buf_geterror(bp)); + xfs_buf_set_ref(bp, XFS_AGFL_REF); *bpp = bp; return 0; } +STATIC int +xfs_alloc_update_counters( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agbp, + long len) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + + pag->pagf_freeblks += len; + be32_add_cpu(&agf->agf_freeblks, len); + + xfs_trans_agblocks_delta(tp, len); + if (unlikely(be32_to_cpu(agf->agf_freeblks) > + be32_to_cpu(agf->agf_length))) + return EFSCORRUPTED; + + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); + return 0; +} + /* * Allocation group level functions. */ @@ -486,49 +552,36 @@ xfs_alloc_ag_vextent( ASSERT(0); /* NOTREACHED */ } - if (error) + + if (error || args->agbno == NULLAGBLOCK) return error; - /* - * If the allocation worked, need to change the agf structure - * (and log it), and the superblock. - */ - if (args->agbno != NULLAGBLOCK) { - xfs_agf_t *agf; /* allocation group freelist header */ - long slen = (long)args->len; - ASSERT(args->len >= args->minlen && args->len <= args->maxlen); - ASSERT(!(args->wasfromfl) || !args->isfl); - ASSERT(args->agbno % args->alignment == 0); - if (!(args->wasfromfl)) { - - agf = XFS_BUF_TO_AGF(args->agbp); - be32_add_cpu(&agf->agf_freeblks, -(args->len)); - xfs_trans_agblocks_delta(args->tp, - -((long)(args->len))); - args->pag->pagf_freeblks -= args->len; - ASSERT(be32_to_cpu(agf->agf_freeblks) <= - be32_to_cpu(agf->agf_length)); - xfs_alloc_log_agf(args->tp, args->agbp, - XFS_AGF_FREEBLKS); - /* - * Search the busylist for these blocks and mark the - * transaction as synchronous if blocks are found. This - * avoids the need to block due to a synchronous log - * force to ensure correct ordering as the synchronous - * transaction will guarantee that for us. - */ - if (xfs_alloc_busy_search(args->mp, args->agno, - args->agbno, args->len)) - xfs_trans_set_sync(args->tp); - } - if (!args->isfl) - xfs_trans_mod_sb(args->tp, - args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : - XFS_TRANS_SB_FDBLOCKS, -slen); - XFS_STATS_INC(xs_allocx); - XFS_STATS_ADD(xs_allocb, args->len); + ASSERT(args->len >= args->minlen); + ASSERT(args->len <= args->maxlen); + ASSERT(!args->wasfromfl || !args->isfl); + ASSERT(args->agbno % args->alignment == 0); + + if (!args->wasfromfl) { + error = xfs_alloc_update_counters(args->tp, args->pag, + args->agbp, + -((long)(args->len))); + if (error) + return error; + + ASSERT(!xfs_extent_busy_search(args->mp, args->agno, + args->agbno, args->len)); } - return 0; + + if (!args->isfl) { + xfs_trans_mod_sb(args->tp, args->wasdel ? + XFS_TRANS_SB_RES_FDBLOCKS : + XFS_TRANS_SB_FDBLOCKS, + -((long)(args->len))); + } + + XFS_STATS_INC(xs_allocx); + XFS_STATS_ADD(xs_allocb, args->len); + return error; } /* @@ -543,17 +596,16 @@ xfs_alloc_ag_vextent_exact( { xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ - xfs_agblock_t end; /* end of allocated extent */ int error; xfs_agblock_t fbno; /* start block of found extent */ - xfs_agblock_t fend; /* end block of found extent */ xfs_extlen_t flen; /* length of found extent */ + xfs_agblock_t tbno; /* start block of trimmed extent */ + xfs_extlen_t tlen; /* length of trimmed extent */ + xfs_agblock_t tend; /* end block of trimmed extent */ int i; /* success/failure of operation */ - xfs_agblock_t maxend; /* end of maximal extent */ - xfs_agblock_t minend; /* end of minimal extent */ - xfs_extlen_t rlen; /* length of returned extent */ ASSERT(args->alignment == 1); + /* * Allocate/initialize a cursor for the by-number freespace btree. */ @@ -579,14 +631,22 @@ xfs_alloc_ag_vextent_exact( goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); ASSERT(fbno <= args->agbno); - minend = args->agbno + args->minlen; - maxend = args->agbno + args->maxlen; - fend = fbno + flen; /* - * Give up if the freespace isn't long enough for the minimum request. + * Check for overlapping busy extents. + */ + xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); + + /* + * Give up if the start of the extent is busy, or the freespace isn't + * long enough for the minimum request. */ - if (fend < minend) + if (tbno > args->agbno) + goto not_found; + if (tlen < args->minlen) + goto not_found; + tend = tbno + tlen; + if (tend < args->agbno + args->minlen) goto not_found; /* @@ -595,18 +655,16 @@ xfs_alloc_ag_vextent_exact( * * Fix the length according to mod and prod if given. */ - end = XFS_AGBLOCK_MIN(fend, maxend); - args->len = end - args->agbno; + args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) + - args->agbno; xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) goto not_found; - rlen = args->len; - ASSERT(args->agbno + rlen <= fend); - end = args->agbno + rlen; + ASSERT(args->agbno + args->len <= tend); /* - * We are allocating agbno for rlen [agbno .. end] + * We are allocating agbno for args->len * Allocate/initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, @@ -619,8 +677,10 @@ xfs_alloc_ag_vextent_exact( xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); goto error0; } + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + args->wasfromfl = 0; trace_xfs_alloc_exact_done(args); return 0; @@ -649,11 +709,11 @@ xfs_alloc_find_best_extent( struct xfs_btree_cur **scur, /* searching cursor */ xfs_agblock_t gdiff, /* difference for search comparison */ xfs_agblock_t *sbno, /* extent found by search */ - xfs_extlen_t *slen, - xfs_extlen_t *slena, /* aligned length */ + xfs_extlen_t *slen, /* extent length */ + xfs_agblock_t *sbnoa, /* aligned extent found by search */ + xfs_extlen_t *slena, /* aligned extent length */ int dir) /* 0 = search right, 1 = search left */ { - xfs_agblock_t bno; xfs_agblock_t new; xfs_agblock_t sdiff; int error; @@ -671,17 +731,16 @@ xfs_alloc_find_best_extent( if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, - args->minlen, &bno, slena); + xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); /* * The good extent is closer than this one. */ if (!dir) { - if (bno >= args->agbno + gdiff) + if (*sbnoa >= args->agbno + gdiff) goto out_use_good; } else { - if (bno <= args->agbno - gdiff) + if (*sbnoa <= args->agbno - gdiff) goto out_use_good; } @@ -693,8 +752,8 @@ xfs_alloc_find_best_extent( xfs_alloc_fix_len(args); sdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, *sbno, - *slen, &new); + args->alignment, *sbnoa, + *slena, &new); /* * Choose closer size and invalidate other cursor. @@ -744,7 +803,7 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t gtbnoa; /* aligned ... */ xfs_extlen_t gtdiff; /* difference to right side entry */ xfs_extlen_t gtlen; /* length of right side entry */ - xfs_extlen_t gtlena = 0; /* aligned ... */ + xfs_extlen_t gtlena; /* aligned ... */ xfs_agblock_t gtnew; /* useful start bno of right side */ int error; /* error code */ int i; /* result code, temporary */ @@ -753,9 +812,10 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t ltbnoa; /* aligned ... */ xfs_extlen_t ltdiff; /* difference to left side entry */ xfs_extlen_t ltlen; /* length of left side entry */ - xfs_extlen_t ltlena = 0; /* aligned ... */ + xfs_extlen_t ltlena; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; #if defined(DEBUG) && defined(__KERNEL__) /* * Randomly don't execute the first algorithm. @@ -764,13 +824,20 @@ xfs_alloc_ag_vextent_near( dofirst = random32() & 1; #endif + +restart: + bno_cur_lt = NULL; + bno_cur_gt = NULL; + ltlen = 0; + gtlena = 0; + ltlena = 0; + /* * Get a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); - ltlen = 0; - bno_cur_lt = bno_cur_gt = NULL; + /* * See if there are any free extents as big as maxlen. */ @@ -786,11 +853,13 @@ xfs_alloc_ag_vextent_near( goto error0; if (i == 0 || ltlen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_near_noentry(args); return 0; } ASSERT(i == 1); } args->wasfromfl = 0; + /* * First algorithm. * If the requested extent is large wrt the freespaces available @@ -844,8 +913,8 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, - args->minlen, <bnoa, <lena); + xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena); if (ltlena < args->minlen) continue; args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); @@ -854,7 +923,7 @@ xfs_alloc_ag_vextent_near( if (args->len < blen) continue; ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbno, ltlen, <new); + args->alignment, ltbnoa, ltlena, <new); if (ltnew != NULLAGBLOCK && (args->len > blen || ltdiff < bdiff)) { bdiff = ltdiff; @@ -965,8 +1034,8 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, - args->minlen, <bnoa, <lena); + xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena); if (ltlena >= args->minlen) break; if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) @@ -981,8 +1050,8 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, - args->minlen, >bnoa, >lena); + xfs_alloc_compute_aligned(args, gtbno, gtlen, + >bnoa, >lena); if (gtlena >= args->minlen) break; if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) @@ -1005,13 +1074,13 @@ xfs_alloc_ag_vextent_near( */ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); - rlen = args->len; ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbno, ltlen, <new); + args->alignment, ltbnoa, ltlena, <new); error = xfs_alloc_find_best_extent(args, &bno_cur_lt, &bno_cur_gt, - ltdiff, >bno, >len, >lena, + ltdiff, >bno, >len, + >bnoa, >lena, 0 /* search right */); } else { ASSERT(gtlena >= args->minlen); @@ -1022,11 +1091,12 @@ xfs_alloc_ag_vextent_near( args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); xfs_alloc_fix_len(args); gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, gtbno, gtlen, >new); + args->alignment, gtbnoa, gtlena, >new); error = xfs_alloc_find_best_extent(args, &bno_cur_gt, &bno_cur_lt, - gtdiff, <bno, <len, <lena, + gtdiff, <bno, <len, + <bnoa, <lena, 1 /* search left */); } @@ -1038,6 +1108,13 @@ xfs_alloc_ag_vextent_near( * If we couldn't get anything, give up. */ if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + + if (!forced++) { + trace_xfs_alloc_near_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; @@ -1072,12 +1149,13 @@ xfs_alloc_ag_vextent_near( return 0; } rlen = args->len; - (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, - ltlen, <new); + (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, + ltbnoa, ltlena, <new); ASSERT(ltnew >= ltbno); - ASSERT(ltnew + rlen <= ltbno + ltlen); + ASSERT(ltnew + rlen <= ltbnoa + ltlena); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->agbno = ltnew; + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, ltnew, rlen, XFSA_FIXUP_BNO_OK))) goto error0; @@ -1120,26 +1198,35 @@ xfs_alloc_ag_vextent_size( int i; /* temp status variable */ xfs_agblock_t rbno; /* returned block number */ xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; +restart: /* * Allocate and initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); bno_cur = NULL; + /* * Look for an entry >= maxlen+alignment-1 blocks. */ if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen + args->alignment - 1, &i))) goto error0; + /* - * If none, then pick up the last entry in the tree unless the - * tree is empty. + * If none or we have busy extents that we cannot allocate from, then + * we have to settle for a smaller extent. In the case that there are + * no large extents, this will return the last entry in the tree unless + * the tree is empty. In the case that there are only busy large + * extents, this will return the largest small extent unless there + * are no smaller extents available. */ - if (!i) { - if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, - &flen, &i))) + if (!i || forced > 1) { + error = xfs_alloc_ag_vextent_small(args, cnt_cur, + &fbno, &flen, &i); + if (error) goto error0; if (i == 0 || flen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); @@ -1147,23 +1234,56 @@ xfs_alloc_ag_vextent_size( return 0; } ASSERT(i == 1); + xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); + } else { + /* + * Search for a non-busy extent that is large enough. + * If we are at low space, don't check, or if we fall of + * the end of the btree, turn off the busy check and + * restart. + */ + for (;;) { + error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); + + if (rlen >= args->maxlen) + break; + + error = xfs_btree_increment(cnt_cur, 0, &i); + if (error) + goto error0; + if (i == 0) { + /* + * Our only valid extents must have been busy. + * Make it unbusy by forcing the log out and + * retrying. If we've been here before, forcing + * the log isn't making the extents available, + * which means they have probably been freed in + * this transaction. In that case, we have to + * give up on them and we'll attempt a minlen + * allocation the next time around. + */ + xfs_btree_del_cursor(cnt_cur, + XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + if (!forced++) + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + } } - /* - * There's a freespace as big as maxlen+alignment-1, get it. - */ - else { - if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } + /* * In the first case above, we got the last entry in the * by-size btree. Now we check to see if the space hits maxlen * once aligned; if not, we search left for something better. * This can't happen in the second case above. */ - xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, - &rbno, &rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), error0); @@ -1188,8 +1308,8 @@ xfs_alloc_ag_vextent_size( XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (flen < bestrlen) break; - xfs_alloc_compute_aligned(fbno, flen, args->alignment, - args->minlen, &rbno, &rlen); + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), @@ -1217,13 +1337,19 @@ xfs_alloc_ag_vextent_size( * Fix up the length. */ args->len = rlen; - xfs_alloc_fix_len(args); - if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_size_nominleft(args); - args->agbno = NULLAGBLOCK; - return 0; + if (rlen < args->minlen) { + if (!forced++) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + goto out_nominleft; } + xfs_alloc_fix_len(args); + + if (!xfs_alloc_fix_minleft(args)) + goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); /* @@ -1253,6 +1379,12 @@ error0: if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); return error; + +out_nominleft: + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_nominleft(args); + args->agbno = NULLAGBLOCK; + return 0; } /* @@ -1292,6 +1424,9 @@ xfs_alloc_ag_vextent_small( if (error) goto error0; if (fbno != NULLAGBLOCK) { + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, + args->userdata); + if (args->userdata) { xfs_buf_t *bp; @@ -1367,6 +1502,7 @@ xfs_free_ag_extent( xfs_mount_t *mp; /* mount point struct for filesystem */ xfs_agblock_t nbno; /* new starting block of freespace */ xfs_extlen_t nlen; /* new length of freespace */ + xfs_perag_t *pag; /* per allocation group data */ mp = tp->t_mountp; /* @@ -1565,45 +1701,23 @@ xfs_free_ag_extent( XFS_WANT_CORRUPTED_GOTO(i == 1, error0); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); cnt_cur = NULL; + /* * Update the freespace totals in the ag and superblock. */ - { - xfs_agf_t *agf; - xfs_perag_t *pag; /* per allocation group data */ - - pag = xfs_perag_get(mp, agno); - pag->pagf_freeblks += len; - xfs_perag_put(pag); - - agf = XFS_BUF_TO_AGF(agbp); - be32_add_cpu(&agf->agf_freeblks, len); - xfs_trans_agblocks_delta(tp, len); - XFS_WANT_CORRUPTED_GOTO( - be32_to_cpu(agf->agf_freeblks) <= - be32_to_cpu(agf->agf_length), - error0); - xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); - if (!isfl) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); - XFS_STATS_INC(xs_freex); - XFS_STATS_ADD(xs_freeb, len); - } + pag = xfs_perag_get(mp, agno); + error = xfs_alloc_update_counters(tp, pag, agbp, len); + xfs_perag_put(pag); + if (error) + goto error0; + + if (!isfl) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); + XFS_STATS_INC(xs_freex); + XFS_STATS_ADD(xs_freeb, len); trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); - /* - * Since blocks move to the free list without the coordination - * used in xfs_bmap_finish, we can't allow block to be available - * for reallocation and non-transaction writing (user data) - * until we know that the transaction that moved it to the free - * list is permanently on disk. We track the blocks by declaring - * these blocks as "busy"; the busy list is maintained on a per-ag - * basis and each transaction records which entries should be removed - * when the iclog commits to disk. If a busy block is allocated, - * the iclog is pushed up to the LSN that freed the block. - */ - xfs_alloc_busy_insert(tp, agno, bno, len); return 0; error0: @@ -1788,12 +1902,11 @@ xfs_alloc_fix_freelist( /* * Initialize the args structure. */ + memset(&targs, 0, sizeof(targs)); targs.tp = tp; targs.mp = mp; targs.agbp = agbp; targs.agno = args->agno; - targs.mod = targs.minleft = targs.wasdel = targs.userdata = - targs.minalignslop = 0; targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; targs.type = XFS_ALLOCTYPE_THIS_AG; targs.pag = pag; @@ -1900,21 +2013,6 @@ xfs_alloc_get_freelist( xfs_alloc_log_agf(tp, agbp, logflags); *bnop = bno; - /* - * As blocks are freed, they are added to the per-ag busy list and - * remain there until the freeing transaction is committed to disk. - * Now that we have allocated blocks, this list must be searched to see - * if a block is being reused. If one is, then the freeing transaction - * must be pushed to disk before this transaction. - * - * We do this by setting the current transaction to a sync transaction - * which guarantees that the freeing transaction is on disk before this - * transaction. This is done instead of a synchronous log force here so - * that we don't sit and wait with the AGF locked in the transaction - * during the log force. - */ - if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) - xfs_trans_set_sync(tp); return 0; } @@ -2027,6 +2125,63 @@ xfs_alloc_put_freelist( return 0; } +static void +xfs_agf_verify( + struct xfs_buf *bp) + { + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agf *agf; + int agf_ok; + + agf = XFS_BUF_TO_AGF(bp); + + agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && + XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && + be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && + be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && + be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && + be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); + + /* + * during growfs operations, the perag is not fully initialised, + * so we can't use it for any useful checking. growfs ensures we can't + * use it by using uncached buffers that don't have the perag attached + * so we can detect and avoid this problem. + */ + if (bp->b_pag) + agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) == + bp->b_pag->pag_agno; + + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= + be32_to_cpu(agf->agf_length); + + if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, + XFS_RANDOM_ALLOC_READ_AGF))) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +static void +xfs_agf_read_verify( + struct xfs_buf *bp) +{ + xfs_agf_verify(bp); +} + +static void +xfs_agf_write_verify( + struct xfs_buf *bp) +{ + xfs_agf_verify(bp); +} + +const struct xfs_buf_ops xfs_agf_buf_ops = { + .verify_read = xfs_agf_read_verify, + .verify_write = xfs_agf_write_verify, +}; + /* * Read in the allocation group header (free/alloc section). */ @@ -2038,45 +2193,20 @@ xfs_read_agf( int flags, /* XFS_BUF_ */ struct xfs_buf **bpp) /* buffer for the ag freelist header */ { - struct xfs_agf *agf; /* ag freelist header */ - int agf_ok; /* set if agf is consistent */ int error; ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), flags, bpp); + XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); if (error) return error; if (!*bpp) return 0; - ASSERT(!XFS_BUF_GETERROR(*bpp)); - agf = XFS_BUF_TO_AGF(*bpp); - - /* - * Validate the magic number of the agf block. - */ - agf_ok = - be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && - XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && - be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && - be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_seqno) == agno; - if (xfs_sb_version_haslazysbcount(&mp->m_sb)) - agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= - be32_to_cpu(agf->agf_length); - if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, - XFS_RANDOM_ALLOC_READ_AGF))) { - XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", - XFS_ERRLEVEL_LOW, mp, agf); - xfs_trans_brelse(tp, *bpp); - return XFS_ERROR(EFSCORRUPTED); - } - XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); + ASSERT(!(*bpp)->b_error); + xfs_buf_set_ref(*bpp, XFS_AGF_REF); return 0; } @@ -2104,7 +2234,7 @@ xfs_alloc_read_agf( return error; if (!*bpp) return 0; - ASSERT(!XFS_BUF_GETERROR(*bpp)); + ASSERT(!(*bpp)->b_error); agf = XFS_BUF_TO_AGF(*bpp); pag = xfs_perag_get(mp, agno); @@ -2371,18 +2501,36 @@ xfs_free_extent( memset(&args, 0, sizeof(xfs_alloc_arg_t)); args.tp = tp; args.mp = tp->t_mountp; + + /* + * validate that the block number is legal - the enables us to detect + * and handle a silent filesystem corruption rather than crashing. + */ args.agno = XFS_FSB_TO_AGNO(args.mp, bno); - ASSERT(args.agno < args.mp->m_sb.sb_agcount); + if (args.agno >= args.mp->m_sb.sb_agcount) + return EFSCORRUPTED; + args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); + if (args.agbno >= args.mp->m_sb.sb_agblocks) + return EFSCORRUPTED; + args.pag = xfs_perag_get(args.mp, args.agno); - if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) + ASSERT(args.pag); + + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); + if (error) goto error0; -#ifdef DEBUG - ASSERT(args.agbp != NULL); - ASSERT((args.agbno + len) <= - be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); -#endif + + /* validate the extent size is legal now we have the agf locked */ + if (args.agbno + len > + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { + error = EFSCORRUPTED; + goto error0; + } + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); + if (!error) + xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); return error; diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c index b782d9df9..dc9ed48ce 100644 --- a/libxfs/xfs_alloc_btree.c +++ b/libxfs/xfs_alloc_btree.c @@ -75,6 +75,8 @@ xfs_allocbt_alloc_block( return 0; } + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); + xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); @@ -98,19 +100,11 @@ xfs_allocbt_free_block( if (error) return error; - /* - * Since blocks move to the free list without the coordination used in - * xfs_bmap_finish, we can't allow block to be available for - * reallocation and non-transaction writing (user data) until we know - * that the transaction that moved it to the free list is permanently - * on disk. We track the blocks by declaring these blocks as "busy"; - * the busy list is maintained on a per-ag basis and each transaction - * records which entries should be removed when the iclog commits to - * disk. If a busy block is allocated, the iclog is pushed up to the - * LSN that freed the block. - */ - xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); + xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, + XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); + + xfs_trans_binval(cur->bc_tp, bp); return 0; } @@ -260,6 +254,82 @@ xfs_allocbt_key_diff( return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } +static void +xfs_allocbt_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_perag *pag = bp->b_pag; + unsigned int level; + int sblock_ok; /* block passes checks */ + + /* + * magic number and level verification + * + * During growfs operations, we can't verify the exact level as the + * perag is not fully initialised and hence not attached to the buffer. + * In this case, check against the maximum tree depth. + */ + level = be16_to_cpu(block->bb_level); + switch (cpu_to_be32(block->bb_magic)) { + case XFS_ABTB_MAGIC: + if (pag) + sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi]; + else + sblock_ok = level < mp->m_ag_maxlevels; + break; + case XFS_ABTC_MAGIC: + if (pag) + sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi]; + else + sblock_ok = level < mp->m_ag_maxlevels; + break; + default: + sblock_ok = 0; + break; + } + + /* numrecs verification */ + sblock_ok = sblock_ok && + be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0]; + + /* sibling pointer verification */ + sblock_ok = sblock_ok && + (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || + be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && + block->bb_u.s.bb_leftsib && + (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || + be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && + block->bb_u.s.bb_rightsib; + + if (!sblock_ok) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +static void +xfs_allocbt_read_verify( + struct xfs_buf *bp) +{ + xfs_allocbt_verify(bp); +} + +static void +xfs_allocbt_write_verify( + struct xfs_buf *bp) +{ + xfs_allocbt_verify(bp); +} + +const struct xfs_buf_ops xfs_allocbt_buf_ops = { + .verify_read = xfs_allocbt_read_verify, + .verify_write = xfs_allocbt_write_verify, +}; + + #ifdef DEBUG STATIC int xfs_allocbt_keys_inorder( @@ -381,7 +451,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, - + .buf_ops = &xfs_allocbt_buf_ops, #ifdef DEBUG .keys_inorder = xfs_allocbt_keys_inorder, .recs_inorder = xfs_allocbt_recs_inorder, @@ -415,13 +485,16 @@ xfs_allocbt_init_cursor( cur->bc_tp = tp; cur->bc_mp = mp; - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]); cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_allocbt_ops; - if (btnum == XFS_BTNUM_CNT) + + if (btnum == XFS_BTNUM_CNT) { + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; + } else { + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + } cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; diff --git a/libxfs/xfs_attr.c b/libxfs/xfs_attr.c index eec7f8fc9..42546a947 100644 --- a/libxfs/xfs_attr.c +++ b/libxfs/xfs_attr.c @@ -295,8 +295,7 @@ xfs_attr_set_int( return (error); } - xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args.trans, dp); + xfs_trans_ijoin(args.trans, dp, 0); /* * If the attribute list is non-existent or a shortform list, @@ -365,10 +364,8 @@ xfs_attr_set_int( * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args.trans, dp); - } + if (committed) + xfs_trans_ijoin(args.trans, dp, 0); /* * Commit the leaf transformation. We'll need another (linked) @@ -468,6 +465,13 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) args.total = 0; args.whichfork = XFS_ATTR_FORK; + /* + * we have no control over the attribute names that userspace passes us + * to remove, so we have to allow the name lookup prior to attribute + * removal to fail. + */ + args.op_flags = XFS_DA_OP_OKNOENT; + /* * Attach the dquots to the inode. */ @@ -509,8 +513,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) * No need to make quota reservations here. We expect to release some * blocks not allocate in the common case. */ - xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args.trans, dp); + xfs_trans_ijoin(args.trans, dp, 0); /* * Decide on what work routines to call based on the inode size. @@ -603,6 +606,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) { int newsize, forkoff, retval; + trace_xfs_attr_sf_addname(args); + retval = xfs_attr_shortform_lookup(args); if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { return(retval); @@ -643,19 +648,19 @@ STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args) { xfs_inode_t *dp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int retval, error, committed, forkoff; + trace_xfs_attr_leaf_addname(args); + /* * Read the (only) block in the attribute list in. */ dp = args->dp; args->blkno = 0; - error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, - XFS_ATTR_FORK); + error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) - return(error); - ASSERT(bp != NULL); + return error; /* * Look up the given attribute in the leaf block. Figure out if @@ -663,13 +668,16 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ retval = xfs_attr_leaf_lookup_int(bp, args); if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return(retval); } else if (retval == EEXIST) { if (args->flags & ATTR_CREATE) { /* pure create op */ - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return(retval); } + + trace_xfs_attr_leaf_replace(args); + args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; @@ -682,7 +690,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * if required. */ retval = xfs_attr_leaf_add(bp, args); - xfs_da_buf_done(bp); if (retval == ENOSPC) { /* * Promote the attribute list to the Btree format, then @@ -706,10 +713,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); /* * Commit the current trans (including the inode) and start @@ -779,12 +784,12 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * Read in the block containing the "old" attr, then * remove the "old" attr from that block (neat, huh!) */ - error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, - &bp, XFS_ATTR_FORK); + error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, + -1, &bp); if (error) - return(error); - ASSERT(bp != NULL); - (void)xfs_attr_leaf_remove(bp, args); + return error; + + xfs_attr_leaf_remove(bp, args); /* * If the result is small enough, shrink it all into the inode. @@ -810,12 +815,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * and started a new one. We need the inode to be * in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } - } else - xfs_da_buf_done(bp); + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + } /* * Commit the remove and start the next trans in series. @@ -841,28 +843,27 @@ STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args) { xfs_inode_t *dp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error, committed, forkoff; + trace_xfs_attr_leaf_removename(args); + /* * Remove the attribute. */ dp = args->dp; args->blkno = 0; - error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, - XFS_ATTR_FORK); - if (error) { - return(error); - } + error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + if (error) + return error; - ASSERT(bp != NULL); error = xfs_attr_leaf_lookup_int(bp, args); if (error == ENOATTR) { - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return(error); } - (void)xfs_attr_leaf_remove(bp, args); + xfs_attr_leaf_remove(bp, args); /* * If the result is small enough, shrink it all into the inode. @@ -886,12 +887,9 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } - } else - xfs_da_buf_done(bp); + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); + } return(0); } @@ -904,23 +902,23 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) STATIC int xfs_attr_leaf_get(xfs_da_args_t *args) { - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; + trace_xfs_attr_leaf_get(args); + args->blkno = 0; - error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, - XFS_ATTR_FORK); + error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) - return(error); - ASSERT(bp != NULL); + return error; error = xfs_attr_leaf_lookup_int(bp, args); if (error != EEXIST) { - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return(error); } error = xfs_attr_leaf_getvalue(bp, args); - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { error = xfs_attr_rmtval_get(args); } @@ -950,6 +948,8 @@ xfs_attr_node_addname(xfs_da_args_t *args) xfs_mount_t *mp; int committed, retval, error; + trace_xfs_attr_node_addname(args); + /* * Fill in bucket of arguments/results/context to carry around. */ @@ -976,6 +976,9 @@ restart: } else if (retval == EEXIST) { if (args->flags & ATTR_CREATE) goto out; + + trace_xfs_attr_node_replace(args); + args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; @@ -1013,10 +1016,8 @@ restart: * and started a new one. We need the inode to be * in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); /* * Commit the node conversion and start the next @@ -1052,10 +1053,8 @@ restart: * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); } else { /* * Addition succeeded, update Btree hashvals. @@ -1166,10 +1165,8 @@ restart: * and started a new one. We need the inode to be * in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); } /* @@ -1210,9 +1207,11 @@ xfs_attr_node_removename(xfs_da_args_t *args) xfs_da_state_t *state; xfs_da_state_blk_t *blk; xfs_inode_t *dp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int retval, error, committed, forkoff; + trace_xfs_attr_node_removename(args); + /* * Tie a string around our finger to remind us where we are. */ @@ -1300,10 +1299,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); /* * Commit the Btree join operation and start a new trans. @@ -1322,16 +1319,11 @@ xfs_attr_node_removename(xfs_da_args_t *args) */ ASSERT(state->path.active == 1); ASSERT(state->path.blk[0].bp); - xfs_da_buf_done(state->path.blk[0].bp); state->path.blk[0].bp = NULL; - error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, - XFS_ATTR_FORK); + error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp); if (error) goto out; - ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *) - bp->data)->hdr.info.magic) - == XFS_ATTR_LEAF_MAGIC); if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { xfs_bmap_init(args->flist, args->firstblock); @@ -1354,12 +1346,10 @@ xfs_attr_node_removename(xfs_da_args_t *args) * and started a new one. We need the inode to be * in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); } else - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); } error = 0; @@ -1381,6 +1371,8 @@ xfs_attr_fillstate(xfs_da_state_t *state) xfs_da_state_blk_t *blk; int level; + trace_xfs_attr_fillstate(state->args); + /* * Roll down the "path" in the state structure, storing the on-disk * block number for those buffers in the "path". @@ -1389,8 +1381,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->bp) { - blk->disk_blkno = xfs_da_blkno(blk->bp); - xfs_da_buf_done(blk->bp); + blk->disk_blkno = XFS_BUF_ADDR(blk->bp); blk->bp = NULL; } else { blk->disk_blkno = 0; @@ -1405,8 +1396,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->bp) { - blk->disk_blkno = xfs_da_blkno(blk->bp); - xfs_da_buf_done(blk->bp); + blk->disk_blkno = XFS_BUF_ADDR(blk->bp); blk->bp = NULL; } else { blk->disk_blkno = 0; @@ -1429,6 +1419,8 @@ xfs_attr_refillstate(xfs_da_state_t *state) xfs_da_state_blk_t *blk; int level, error; + trace_xfs_attr_refillstate(state->args); + /* * Roll down the "path" in the state structure, storing the on-disk * block number for those buffers in the "path". @@ -1437,7 +1429,7 @@ xfs_attr_refillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->disk_blkno) { - error = xfs_da_read_buf(state->args->trans, + error = xfs_da_node_read(state->args->trans, state->args->dp, blk->blkno, blk->disk_blkno, &blk->bp, XFS_ATTR_FORK); @@ -1456,7 +1448,7 @@ xfs_attr_refillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->disk_blkno) { - error = xfs_da_read_buf(state->args->trans, + error = xfs_da_node_read(state->args->trans, state->args->dp, blk->blkno, blk->disk_blkno, &blk->bp, XFS_ATTR_FORK); @@ -1485,6 +1477,8 @@ xfs_attr_node_get(xfs_da_args_t *args) int error, retval; int i; + trace_xfs_attr_node_get(args); + state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; @@ -1516,7 +1510,7 @@ xfs_attr_node_get(xfs_da_args_t *args) * If not in a transaction, we have to release all the buffers. */ for (i = 0; i < state->path.active; i++) { - xfs_da_brelse(args->trans, state->path.blk[i].bp); + xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } @@ -1543,6 +1537,8 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) int nmap, error, tmp, valuelen, blkcnt, i; xfs_dablk_t lblkno; + trace_xfs_attr_rmtval_get(args); + ASSERT(!(args->flags & ATTR_KERNOVAL)); mp = args->dp->i_mount; @@ -1551,10 +1547,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) lblkno = args->rmtblkno; while (valuelen > 0) { nmap = ATTR_RMTVALUE_MAPSIZE; - error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno, - args->rmtblkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - NULL, 0, map, &nmap, NULL); + error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, + args->rmtblkcnt, map, &nmap, + XFS_BMAPI_ATTRFORK); if (error) return(error); ASSERT(nmap >= 1); @@ -1564,14 +1559,12 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) (map[i].br_startblock != HOLESTARTBLOCK)); dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, - blkcnt, XBF_LOCK | XBF_DONT_BLOCK, - &bp); + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + dblkno, blkcnt, 0, &bp, NULL); if (error) return(error); - tmp = (valuelen < XFS_BUF_SIZE(bp)) - ? valuelen : XFS_BUF_SIZE(bp); + tmp = min_t(int, valuelen, BBTOB(bp->b_length)); xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); xfs_buf_relse(bp); dst += tmp; @@ -1601,6 +1594,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) xfs_dablk_t lblkno; int blkcnt, valuelen, nmap, error, tmp, committed; + trace_xfs_attr_rmtval_set(args); + dp = args->dp; mp = dp->i_mount; src = args->value; @@ -1628,10 +1623,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) */ xfs_bmap_init(args->flist, args->firstblock); nmap = 1; - error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno, + error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA | - XFS_BMAPI_WRITE, + XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, args->firstblock, args->total, &map, &nmap, args->flist); if (!error) { @@ -1649,10 +1643,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, dp); - } + if (committed) + xfs_trans_ijoin(args->trans, dp, 0); ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && @@ -1677,19 +1669,18 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) lblkno = args->rmtblkno; valuelen = args->valuelen; while (valuelen > 0) { + int buflen; + /* * Try to remember where we decided to put the value. */ xfs_bmap_init(args->flist, args->firstblock); nmap = 1; - error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno, - args->rmtblkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, 0, &map, &nmap, - NULL); - if (error) { + error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, + args->rmtblkcnt, &map, &nmap, + XFS_BMAPI_ATTRFORK); + if (error) return(error); - } ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); @@ -1697,19 +1688,20 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, - XBF_LOCK | XBF_DONT_BLOCK); - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0); + if (!bp) + return ENOMEM; - tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : - XFS_BUF_SIZE(bp); + buflen = BBTOB(bp->b_length); + tmp = min_t(int, valuelen, buflen); xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); - if (tmp < XFS_BUF_SIZE(bp)) - xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); - if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ - return (error); - } + if (tmp < buflen) + xfs_buf_zero(bp, tmp, buflen - tmp); + + error = xfs_bwrite(mp, bp); /* GROT: NOTE: synchronous write */ + xfs_buf_relse(bp); + if (error) + return error; src += tmp; valuelen -= tmp; @@ -1733,6 +1725,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) xfs_dablk_t lblkno; int valuelen, blkcnt, nmap, error, done, committed; + trace_xfs_attr_rmtval_remove(args); + mp = args->dp->i_mount; /* @@ -1745,16 +1739,12 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) /* * Try to remember where we decided to put the value. */ - xfs_bmap_init(args->flist, args->firstblock); nmap = 1; - error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno, - args->rmtblkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, 0, &map, &nmap, - args->flist); - if (error) { + error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, + args->rmtblkcnt, &map, &nmap, + XFS_BMAPI_ATTRFORK); + if (error) return(error); - } ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); @@ -1767,8 +1757,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) */ bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK); if (bp) { - XFS_BUF_STALE(bp); - XFS_BUF_UNDELAYWRITE(bp); + xfs_buf_stale(bp); xfs_buf_relse(bp); bp = NULL; } @@ -1805,10 +1794,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ - if (committed) { - xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(args->trans, args->dp); - } + if (committed) + xfs_trans_ijoin(args->trans, args->dp, 0); /* * Close out trans and start the next one in the chain. diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c index d1f7a2097..824c12295 100644 --- a/libxfs/xfs_attr_leaf.c +++ b/libxfs/xfs_attr_leaf.c @@ -32,10 +32,11 @@ * Routines used for growing the Btree. */ STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block, - xfs_dabuf_t **bpp); -STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args, - int freemap_index); -STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer); + struct xfs_buf **bpp); +STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer, + xfs_da_args_t *args, int freemap_index); +STATIC void xfs_attr_leaf_compact(struct xfs_da_args *args, + struct xfs_buf *leaf_buffer); STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_da_state_blk_t *blk2); @@ -56,6 +57,52 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, xfs_mount_t *mp); STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); +static void +xfs_attr_leaf_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_attr_leaf_hdr *hdr = bp->b_addr; + int block_ok = 0; + + block_ok = hdr->info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC); + if (!block_ok) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +static void +xfs_attr_leaf_read_verify( + struct xfs_buf *bp) +{ + xfs_attr_leaf_verify(bp); +} + +static void +xfs_attr_leaf_write_verify( + struct xfs_buf *bp) +{ + xfs_attr_leaf_verify(bp); +} + +const struct xfs_buf_ops xfs_attr_leaf_buf_ops = { + .verify_read = xfs_attr_leaf_read_verify, + .verify_write = xfs_attr_leaf_write_verify, +}; + +int +xfs_attr_leaf_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp) +{ + return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, + XFS_ATTR_FORK, &xfs_attr_leaf_buf_ops); +} + /*======================================================================== * Namespace helper routines *========================================================================*/ @@ -78,6 +125,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags) /* * Query whether the requested number of additional bytes of extended * attribute space will be able to fit inline. + * * Returns zero if not, else the di_forkoff fork offset to be used in the * literal area for attribute data once the new bytes have been added. * @@ -90,7 +138,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) int offset; int minforkoff; /* lower limit on valid forkoff locations */ int maxforkoff; /* upper limit on valid forkoff locations */ - int dsize; + int dsize; xfs_mount_t *mp = dp->i_mount; offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ @@ -104,47 +152,60 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) return (offset >= minforkoff) ? minforkoff : 0; } - if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { - if (bytes <= XFS_IFORK_ASIZE(dp)) - return dp->i_d.di_forkoff; + /* + * If the requested numbers of bytes is smaller or equal to the + * current attribute fork size we can always proceed. + * + * Note that if_bytes in the data fork might actually be larger than + * the current data fork size is due to delalloc extents. In that + * case either the extent count will go down when they are converted + * to real extents, or the delalloc conversion will take care of the + * literal area rebalancing. + */ + if (bytes <= XFS_IFORK_ASIZE(dp)) + return dp->i_d.di_forkoff; + + /* + * For attr2 we can try to move the forkoff if there is space in the + * literal area, but for the old format we are done if there is no + * space in the fixed attribute fork. + */ + if (!(mp->m_flags & XFS_MOUNT_ATTR2)) return 0; - } dsize = dp->i_df.if_bytes; - + switch (dp->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - /* + /* * If there is no attr fork and the data fork is extents, - * determine if creating the default attr fork will result - * in the extents form migrating to btree. If so, the - * minimum offset only needs to be the space required for + * determine if creating the default attr fork will result + * in the extents form migrating to btree. If so, the + * minimum offset only needs to be the space required for * the btree root. - */ + */ if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > xfs_default_attroffset(dp)) dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); break; - case XFS_DINODE_FMT_BTREE: /* - * If have data btree then keep forkoff if we have one, - * otherwise we are adding a new attr, so then we set - * minforkoff to where the btree root can finish so we have + * If we have a data btree then keep forkoff if we have one, + * otherwise we are adding a new attr, so then we set + * minforkoff to where the btree root can finish so we have * plenty of room for attrs */ if (dp->i_d.di_forkoff) { - if (offset < dp->i_d.di_forkoff) + if (offset < dp->i_d.di_forkoff) return 0; - else - return dp->i_d.di_forkoff; - } else - dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot); + return dp->i_d.di_forkoff; + } + dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot); break; } - - /* - * A data fork btree root must have space for at least + + /* + * A data fork btree root must have space for at least * MINDBTPTRS key/ptr pairs if the data fork is small or empty. */ minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); @@ -154,10 +215,10 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); maxforkoff = maxforkoff >> 3; /* rounded down */ - if (offset >= minforkoff && offset < maxforkoff) - return offset; if (offset >= maxforkoff) return maxforkoff; + if (offset >= minforkoff) + return offset; return 0; } @@ -189,6 +250,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args) xfs_inode_t *dp; xfs_ifork_t *ifp; + trace_xfs_attr_sf_create(args); + dp = args->dp; ASSERT(dp != NULL); ifp = dp->i_afp; @@ -222,13 +285,11 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) xfs_inode_t *dp; xfs_ifork_t *ifp; + trace_xfs_attr_sf_add(args); + dp = args->dp; mp = dp->i_mount; dp->i_d.di_forkoff = forkoff; - dp->i_df.if_ext_max = - XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); - dp->i_afp->if_ext_max = - XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); ifp = dp->i_afp; ASSERT(ifp->if_flags & XFS_IFINLINE); @@ -280,7 +341,6 @@ xfs_attr_fork_reset( ASSERT(ip->i_d.di_anextents == 0); ASSERT(ip->i_afp == NULL); - ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } @@ -296,6 +356,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) xfs_mount_t *mp; xfs_inode_t *dp; + trace_xfs_attr_sf_remove(args); + dp = args->dp; mp = dp->i_mount; base = sizeof(xfs_attr_sf_hdr_t); @@ -343,10 +405,6 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) (args->op_flags & XFS_DA_OP_ADDNAME) || !(mp->m_flags & XFS_MOUNT_ATTR2) || dp->i_d.di_format == XFS_DINODE_FMT_BTREE); - dp->i_afp->if_ext_max = - XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); - dp->i_df.if_ext_max = - XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); } @@ -368,6 +426,8 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) int i; xfs_ifork_t *ifp; + trace_xfs_attr_sf_lookup(args); + ifp = args->dp->i_afp; ASSERT(ifp->if_flags & XFS_IFINLINE); sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; @@ -436,9 +496,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) char *tmpbuffer; int error, i, size; xfs_dablk_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; xfs_ifork_t *ifp; + trace_xfs_attr_sf_to_leaf(args); + dp = args->dp; ifp = dp->i_afp; sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; @@ -504,8 +566,6 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) error = 0; out: - if(bp) - xfs_da_buf_done(bp); kmem_free(tmpbuffer); return(error); } @@ -515,15 +575,17 @@ out: * a shortform attribute list. */ int -xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp) +xfs_attr_shortform_allfit( + struct xfs_buf *bp, + struct xfs_inode *dp) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; xfs_attr_leaf_name_local_t *name_loc; int bytes, i; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); entry = &leaf->entries[0]; bytes = sizeof(struct xfs_attr_sf_hdr); @@ -552,7 +614,10 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp) * Convert a leaf attribute list to shortform attribute list */ int -xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) +xfs_attr_leaf_to_shortform( + struct xfs_buf *bp, + xfs_da_args_t *args, + int forkoff) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; @@ -562,15 +627,17 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) char *tmpbuffer; int error, i; + trace_xfs_attr_leaf_to_sf(args); + dp = args->dp; tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); ASSERT(tmpbuffer != NULL); ASSERT(bp != NULL); - memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount)); + memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount)); leaf = (xfs_attr_leafblock_t *)tmpbuffer; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); - memset(bp->data, 0, XFS_LBSIZE(dp->i_mount)); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount)); /* * Clean out the prior contents of the attribute list. @@ -631,30 +698,30 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) xfs_attr_leafblock_t *leaf; xfs_da_intnode_t *node; xfs_inode_t *dp; - xfs_dabuf_t *bp1, *bp2; + struct xfs_buf *bp1, *bp2; xfs_dablk_t blkno; int error; + trace_xfs_attr_leaf_to_node(args); + dp = args->dp; bp1 = bp2 = NULL; error = xfs_da_grow_inode(args, &blkno); if (error) goto out; - error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1, - XFS_ATTR_FORK); + error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp1); if (error) goto out; - ASSERT(bp1 != NULL); + bp2 = NULL; error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp2, XFS_ATTR_FORK); if (error) goto out; - ASSERT(bp2 != NULL); - memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount)); - xfs_da_buf_done(bp1); + bp2->b_ops = bp1->b_ops; + memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount)); bp1 = NULL; - xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); + xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); /* * Set up the new root node. @@ -662,21 +729,17 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); if (error) goto out; - node = bp1->data; - leaf = bp2->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + node = bp1->b_addr; + leaf = bp2->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); /* both on-disk, don't endian-flip twice */ node->btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; node->btree[0].before = cpu_to_be32(blkno); node->hdr.count = cpu_to_be16(1); - xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1); + xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1); error = 0; out: - if (bp1) - xfs_da_buf_done(bp1); - if (bp2) - xfs_da_buf_done(bp2); return(error); } @@ -690,22 +753,27 @@ out: * or a leaf in a node attribute list. */ STATIC int -xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) +xfs_attr_leaf_create( + xfs_da_args_t *args, + xfs_dablk_t blkno, + struct xfs_buf **bpp) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_hdr_t *hdr; xfs_inode_t *dp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; + trace_xfs_attr_leaf_create(args); + dp = args->dp; ASSERT(dp != NULL); error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, XFS_ATTR_FORK); if (error) return(error); - ASSERT(bp != NULL); - leaf = bp->data; + bp->b_ops = &xfs_attr_leaf_buf_ops; + leaf = bp->b_addr; memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); hdr = &leaf->hdr; hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC); @@ -719,7 +787,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) - sizeof(xfs_attr_leaf_hdr_t)); - xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); + xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); *bpp = bp; return(0); @@ -735,6 +803,8 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, xfs_dablk_t blkno; int error; + trace_xfs_attr_leaf_split(state->args); + /* * Allocate space for a new leaf node. */ @@ -764,10 +834,13 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, * * Insert the "new" entry in the correct block. */ - if (state->inleaf) + if (state->inleaf) { + trace_xfs_attr_leaf_add_old(state->args); error = xfs_attr_leaf_add(oldblk->bp, state->args); - else + } else { + trace_xfs_attr_leaf_add_new(state->args); error = xfs_attr_leaf_add(newblk->bp, state->args); + } /* * Update last hashval in each block since we added the name. @@ -781,15 +854,19 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, * Add a name to the leaf attribute list structure. */ int -xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) +xfs_attr_leaf_add( + struct xfs_buf *bp, + struct xfs_da_args *args) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_hdr_t *hdr; xfs_attr_leaf_map_t *map; int tablesize, entsize, sum, tmp, i; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + trace_xfs_attr_leaf_add(args); + + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(leaf->hdr.count))); hdr = &leaf->hdr; @@ -833,7 +910,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) * Compact the entries to coalesce free space. * This may change the hdr->count via dropping INCOMPLETE entries. */ - xfs_attr_leaf_compact(args->trans, bp); + xfs_attr_leaf_compact(args, bp); /* * After compaction, the block is guaranteed to have only one @@ -850,7 +927,10 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) * Add a name to a leaf attribute list structure. */ STATIC int -xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) +xfs_attr_leaf_add_work( + struct xfs_buf *bp, + xfs_da_args_t *args, + int mapindex) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_hdr_t *hdr; @@ -861,8 +941,10 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) xfs_mount_t *mp; int tmp, i; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + trace_xfs_attr_leaf_add_work(args); + + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); hdr = &leaf->hdr; ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE)); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count))); @@ -875,7 +957,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) tmp = be16_to_cpu(hdr->count) - args->index; tmp *= sizeof(xfs_attr_leaf_entry_t); memmove((char *)(entry+1), (char *)entry, tmp); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); } be16_add_cpu(&hdr->count, 1); @@ -907,7 +989,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) args->index2++; } } - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); ASSERT((args->index == 0) || (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval))); @@ -915,8 +997,6 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); /* - * Copy the attribute name and value into the new space. - * * For "remote" attribute values, simply note that we need to * allocate space for the "remote" value. We can't actually * allocate the extents in this transaction, and we can't decide @@ -941,7 +1021,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) args->rmtblkno = 1; args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); } - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), xfs_attr_leaf_entsize(leaf, args->index))); @@ -965,7 +1045,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) } } be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index)); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); return(0); } @@ -974,24 +1054,28 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) * Garbage collect a leaf attribute list block by copying it to a new buffer. */ STATIC void -xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp) +xfs_attr_leaf_compact( + struct xfs_da_args *args, + struct xfs_buf *bp) { - xfs_attr_leafblock_t *leaf_s, *leaf_d; - xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; - xfs_mount_t *mp; - char *tmpbuffer; + xfs_attr_leafblock_t *leaf_s, *leaf_d; + xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; + struct xfs_trans *trans = args->trans; + struct xfs_mount *mp = trans->t_mountp; + char *tmpbuffer; + + trace_xfs_attr_leaf_compact(args); - mp = trans->t_mountp; tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); ASSERT(tmpbuffer != NULL); - memcpy(tmpbuffer, bp->data, XFS_LBSIZE(mp)); - memset(bp->data, 0, XFS_LBSIZE(mp)); + memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); + memset(bp->b_addr, 0, XFS_LBSIZE(mp)); /* * Copy basic information */ leaf_s = (xfs_attr_leafblock_t *)tmpbuffer; - leaf_d = bp->data; + leaf_d = bp->b_addr; hdr_s = &leaf_s->hdr; hdr_d = &leaf_d->hdr; hdr_d->info = hdr_s->info; /* struct copy */ @@ -1014,7 +1098,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp) */ xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0, be16_to_cpu(hdr_s->count), mp); - xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); kmem_free(tmpbuffer); } @@ -1046,12 +1130,15 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, */ ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC); ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; - ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); - ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; + ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(leaf2->hdr.count == 0); args = state->args; + trace_xfs_attr_leaf_rebalance(args); + /* * Check ordering of blocks, reverse if it makes things simpler. * @@ -1063,8 +1150,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, tmp_blk = blk1; blk1 = blk2; blk2 = tmp_blk; - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; swap = 1; } hdr1 = &leaf1->hdr; @@ -1101,9 +1188,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, max = be16_to_cpu(hdr2->firstused) - sizeof(xfs_attr_leaf_hdr_t); max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t); - if (space > max) { - xfs_attr_leaf_compact(args->trans, blk2->bp); - } + if (space > max) + xfs_attr_leaf_compact(args, blk2->bp); /* * Move high entries from leaf1 to low end of leaf2. @@ -1111,13 +1197,14 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count, leaf2, 0, count, state->mp); - xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); - xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); } else if (count > be16_to_cpu(hdr1->count)) { /* * I assert that since all callers pass in an empty * second buffer, this code should never execute. */ + ASSERT(0); /* * Figure the total bytes to be added to the destination leaf. @@ -1133,9 +1220,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, max = be16_to_cpu(hdr1->firstused) - sizeof(xfs_attr_leaf_hdr_t); max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t); - if (space > max) { - xfs_attr_leaf_compact(args->trans, blk1->bp); - } + if (space > max) + xfs_attr_leaf_compact(args, blk1->bp); /* * Move low entries from leaf2 to high end of leaf1. @@ -1143,8 +1229,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_attr_leaf_moveents(leaf2, 0, leaf1, be16_to_cpu(hdr1->count), count, state->mp); - xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); - xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); } /* @@ -1179,10 +1265,24 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, args->index2 = 0; args->blkno2 = blk2->blkno; } else { + /* + * On a double leaf split, the original attr location + * is already stored in blkno2/index2, so don't + * overwrite it overwise we corrupt the tree. + */ blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); - args->index = args->index2 = blk2->index; - args->blkno = args->blkno2 = blk2->blkno; + args->index = blk2->index; + args->blkno = blk2->blkno; + if (!state->extravalid) { + /* + * set the new attr location to match the old + * one and let the higher level split code + * decide where in the leaf to place it. + */ + args->index2 = blk2->index; + args->blkno2 = blk2->blkno; + } } } else { ASSERT(state->inleaf == 1); @@ -1213,8 +1313,8 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state, /* * Set up environment. */ - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; hdr1 = &leaf1->hdr; hdr2 = &leaf2->hdr; foundit = 0; @@ -1316,7 +1416,9 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) xfs_da_blkinfo_t *info; int count, bytes, forward, error, retval, i; xfs_dablk_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; + + trace_xfs_attr_leaf_toosmall(state->args); /* * Check for the degenerate case of the block being over 50% full. @@ -1324,8 +1426,8 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) * to coalesce with a sibling. */ blk = &state->path.blk[ state->path.active-1 ]; - info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); + info = blk->bp->b_addr; + ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); leaf = (xfs_attr_leafblock_t *)info; count = be16_to_cpu(leaf->hdr.count); bytes = sizeof(xfs_attr_leaf_hdr_t) + @@ -1377,23 +1479,21 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) blkno = be32_to_cpu(info->back); if (blkno == 0) continue; - error = xfs_da_read_buf(state->args->trans, state->args->dp, - blkno, -1, &bp, XFS_ATTR_FORK); + error = xfs_attr_leaf_read(state->args->trans, state->args->dp, + blkno, -1, &bp); if (error) return(error); - ASSERT(bp != NULL); leaf = (xfs_attr_leafblock_t *)info; count = be16_to_cpu(leaf->hdr.count); bytes = state->blocksize - (state->blocksize>>2); bytes -= be16_to_cpu(leaf->hdr.usedbytes); - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf = bp->b_addr; count += be16_to_cpu(leaf->hdr.count); bytes -= be16_to_cpu(leaf->hdr.usedbytes); bytes -= count * sizeof(xfs_attr_leaf_entry_t); bytes -= sizeof(xfs_attr_leaf_hdr_t); - xfs_da_brelse(state->args->trans, bp); + xfs_trans_brelse(state->args->trans, bp); if (bytes >= 0) break; /* fits with at least 25% to spare */ } @@ -1431,7 +1531,9 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) * If two leaves are 37% full, when combined they will leave 25% free. */ int -xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) +xfs_attr_leaf_remove( + struct xfs_buf *bp, + xfs_da_args_t *args) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_hdr_t *hdr; @@ -1441,8 +1543,10 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) int tablesize, tmp, i; xfs_mount_t *mp; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + trace_xfs_attr_leaf_remove(args); + + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); hdr = &leaf->hdr; mp = args->trans->t_mountp; ASSERT((be16_to_cpu(hdr->count) > 0) @@ -1534,7 +1638,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) */ memset(xfs_attr_leaf_name(leaf, args->index), 0, entsize); be16_add_cpu(&hdr->usedbytes, -entsize); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), entsize)); @@ -1542,7 +1646,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) * sizeof(xfs_attr_leaf_entry_t); memmove((char *)entry, (char *)(entry+1), tmp); be16_add_cpu(&hdr->count, -1); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); entry = &leaf->entries[be16_to_cpu(hdr->count)]; memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t)); @@ -1572,7 +1676,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) } else { hdr->holes = 1; /* mark as needing compaction */ } - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); /* @@ -1597,16 +1701,18 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_mount_t *mp; char *tmpbuffer; + trace_xfs_attr_leaf_unbalance(state->args); + /* * Set up environment. */ mp = state->mp; ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC); ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC); - drop_leaf = drop_blk->bp->data; - save_leaf = save_blk->bp->data; - ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); - ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + drop_leaf = drop_blk->bp->b_addr; + save_leaf = save_blk->bp->b_addr; + ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); drop_hdr = &drop_leaf->hdr; save_hdr = &save_leaf->hdr; @@ -1669,7 +1775,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, kmem_free(tmpbuffer); } - xfs_da_log_buf(state->args->trans, save_blk->bp, 0, + xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, state->blocksize - 1); /* @@ -1697,7 +1803,9 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, * Don't change the args->value unless we find the attribute. */ int -xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) +xfs_attr_leaf_lookup_int( + struct xfs_buf *bp, + xfs_da_args_t *args) { xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; @@ -1706,8 +1814,10 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) int probe, span; xfs_dahash_t hashval; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + trace_xfs_attr_leaf_lookup(args); + + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); @@ -1802,7 +1912,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) * list structure. */ int -xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) +xfs_attr_leaf_getvalue( + struct xfs_buf *bp, + xfs_da_args_t *args) { int valuelen; xfs_attr_leafblock_t *leaf; @@ -1810,8 +1922,8 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_attr_leaf_name_local_t *name_loc; xfs_attr_leaf_name_remote_t *name_rmt; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); @@ -1879,8 +1991,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, /* * Set up environment. */ - ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); - ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); hdr_s = &leaf_s->hdr; hdr_d = &leaf_d->hdr; ASSERT((be16_to_cpu(hdr_s->count) > 0) && @@ -2008,14 +2120,16 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, * Return 0 unless leaf2 should go before leaf1. */ int -xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) +xfs_attr_leaf_order( + struct xfs_buf *leaf1_bp, + struct xfs_buf *leaf2_bp) { xfs_attr_leafblock_t *leaf1, *leaf2; - leaf1 = leaf1_bp->data; - leaf2 = leaf2_bp->data; - ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) && - (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC)); + leaf1 = leaf1_bp->b_addr; + leaf2 = leaf2_bp->b_addr; + ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) && + (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC))); if ((be16_to_cpu(leaf1->hdr.count) > 0) && (be16_to_cpu(leaf2->hdr.count) > 0) && ((be32_to_cpu(leaf2->entries[0].hashval) < @@ -2033,12 +2147,14 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) * Pick up the last hashvalue from a leaf block. */ xfs_dahash_t -xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count) +xfs_attr_leaf_lasthash( + struct xfs_buf *bp, + int *count) { xfs_attr_leafblock_t *leaf; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); if (count) *count = be16_to_cpu(leaf->hdr.count); if (!leaf->hdr.count) @@ -2057,7 +2173,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) xfs_attr_leaf_name_remote_t *name_rmt; int size; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { name_loc = xfs_attr_leaf_name_local(leaf, index); size = xfs_attr_leaf_entsize_local(name_loc->namelen, @@ -2107,7 +2223,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; xfs_attr_leaf_name_remote_t *name_rmt; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; #ifdef DEBUG xfs_attr_leaf_name_local_t *name_loc; @@ -2115,18 +2231,15 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) char *name; #endif /* DEBUG */ + trace_xfs_attr_leaf_clearflag(args); /* * Set up the operation. */ - error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, - XFS_ATTR_FORK); - if (error) { + error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + if (error) return(error); - } - ASSERT(bp != NULL); - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf = bp->b_addr; ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); entry = &leaf->entries[ args->index ]; @@ -2148,7 +2261,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) #endif /* DEBUG */ entry->flags &= ~XFS_ATTR_INCOMPLETE; - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); if (args->rmtblkno) { @@ -2156,10 +2269,9 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); name_rmt->valuelen = cpu_to_be32(args->valuelen); - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); } - xfs_da_buf_done(bp); /* * Commit the flag value change and start the next trans in series. @@ -2176,37 +2288,34 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) xfs_attr_leafblock_t *leaf; xfs_attr_leaf_entry_t *entry; xfs_attr_leaf_name_remote_t *name_rmt; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; + trace_xfs_attr_leaf_setflag(args); + /* * Set up the operation. */ - error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, - XFS_ATTR_FORK); - if (error) { + error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + if (error) return(error); - } - ASSERT(bp != NULL); - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf = bp->b_addr; ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); entry = &leaf->entries[ args->index ]; ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0); entry->flags |= XFS_ATTR_INCOMPLETE; - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); if ((entry->flags & XFS_ATTR_LOCAL) == 0) { name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); name_rmt->valueblk = 0; name_rmt->valuelen = 0; - xfs_da_log_buf(args->trans, bp, + xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); } - xfs_da_buf_done(bp); /* * Commit the flag value change and start the next trans in series. @@ -2227,7 +2336,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) xfs_attr_leafblock_t *leaf1, *leaf2; xfs_attr_leaf_entry_t *entry1, *entry2; xfs_attr_leaf_name_remote_t *name_rmt; - xfs_dabuf_t *bp1, *bp2; + struct xfs_buf *bp1, *bp2; int error; #ifdef DEBUG xfs_attr_leaf_name_local_t *name_loc; @@ -2235,38 +2344,33 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) char *name1, *name2; #endif /* DEBUG */ + trace_xfs_attr_leaf_flipflags(args); + /* * Read the block containing the "old" attr */ - error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp1, - XFS_ATTR_FORK); - if (error) { - return(error); - } - ASSERT(bp1 != NULL); + error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1); + if (error) + return error; /* * Read the block containing the "new" attr, if it is different */ if (args->blkno2 != args->blkno) { - error = xfs_da_read_buf(args->trans, args->dp, args->blkno2, - -1, &bp2, XFS_ATTR_FORK); - if (error) { - return(error); - } - ASSERT(bp2 != NULL); + error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno2, + -1, &bp2); + if (error) + return error; } else { bp2 = bp1; } - leaf1 = bp1->data; - ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf1 = bp1->b_addr; ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); ASSERT(args->index >= 0); entry1 = &leaf1->entries[ args->index ]; - leaf2 = bp2->data; - ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + leaf2 = bp2->b_addr; ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); ASSERT(args->index2 >= 0); entry2 = &leaf2->entries[ args->index2 ]; @@ -2299,30 +2403,27 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0); entry1->flags &= ~XFS_ATTR_INCOMPLETE; - xfs_da_log_buf(args->trans, bp1, + xfs_trans_log_buf(args->trans, bp1, XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1))); if (args->rmtblkno) { ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); name_rmt->valuelen = cpu_to_be32(args->valuelen); - xfs_da_log_buf(args->trans, bp1, + xfs_trans_log_buf(args->trans, bp1, XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); } entry2->flags |= XFS_ATTR_INCOMPLETE; - xfs_da_log_buf(args->trans, bp2, + xfs_trans_log_buf(args->trans, bp2, XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2))); if ((entry2->flags & XFS_ATTR_LOCAL) == 0) { name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2); name_rmt->valueblk = 0; name_rmt->valuelen = 0; - xfs_da_log_buf(args->trans, bp2, + xfs_trans_log_buf(args->trans, bp2, XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt))); } - xfs_da_buf_done(bp1); - if (bp1 != bp2) - xfs_da_buf_done(bp2); /* * Commit the flag value change and start the next trans in series. diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c index 5a626b03b..b328a0b89 100644 --- a/libxfs/xfs_bmap.c +++ b/libxfs/xfs_bmap.c @@ -18,10 +18,6 @@ #include -#ifdef DEBUG -STATIC void -xfs_bmap_check_leaf_extents(xfs_btree_cur_t *cur, xfs_inode_t *ip, int whichfork); -#endif kmem_zone_t *xfs_bmap_free_item_zone; @@ -29,6 +25,16 @@ kmem_zone_t *xfs_bmap_free_item_zone; * Prototypes for internal bmap routines. */ +#ifdef DEBUG +STATIC void +xfs_bmap_check_leaf_extents( + struct xfs_btree_cur *cur, + struct xfs_inode *ip, + int whichfork); +#else +#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) +#endif + /* * Called from xfs_bmap_add_attrfork to handle extents format files. @@ -52,75 +58,6 @@ xfs_bmap_add_attrfork_local( xfs_bmap_free_t *flist, /* blocks to free at commit */ int *flags); /* inode logging flags */ -/* - * Called by xfs_bmapi to update file extent records and the btree - * after allocating space (or doing a delayed allocation). - */ -STATIC int /* error */ -xfs_bmap_add_extent( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd); /* OK to allocate reserved blocks */ - -/* - * Called by xfs_bmap_add_extent to handle cases converting a delayed - * allocation to a real allocation. - */ -STATIC int /* error */ -xfs_bmap_add_extent_delay_real( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int rsvd); /* OK to allocate reserved blocks */ - -/* - * Called by xfs_bmap_add_extent to handle cases converting a hole - * to a delayed allocation. - */ -STATIC int /* error */ -xfs_bmap_add_extent_hole_delay( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp,/* inode logging flags */ - int rsvd); /* OK to allocate reserved blocks */ - -/* - * Called by xfs_bmap_add_extent to handle cases converting a hole - * to a real allocation. - */ -STATIC int /* error */ -xfs_bmap_add_extent_hole_real( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t *cur, /* if null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp, /* inode logging flags */ - int whichfork); /* data or attr fork */ - -/* - * Called by xfs_bmap_add_extent to handle cases converting an unwritten - * allocation to a real allocation or vice versa. - */ -STATIC int /* error */ -xfs_bmap_add_extent_unwritten_real( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp); /* inode logging flags */ - /* * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. * It figures out where to ask the underlying allocator to put the new extent. @@ -143,22 +80,6 @@ xfs_bmap_btree_to_extents( int *logflagsp, /* inode logging flags */ int whichfork); /* data or attr fork */ -/* - * Called by xfs_bmapi to update file extent records and the btree - * after removing space (or undoing a delayed allocation). - */ -STATIC int /* error */ -xfs_bmap_del_extent( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_trans_t *tp, /* current trans pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - xfs_btree_cur_t *cur, /* if null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp,/* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd); /* OK to allocate reserved blocks */ - /* * Convert an extents-format file into a btree-format file. * The new file will have a root block (in the inode) and a single child block. @@ -188,19 +109,6 @@ xfs_bmap_local_to_extents( int *logflagsp, /* inode logging flags */ int whichfork); /* data or attr fork */ -/* - * Check the last inode extent to determine whether this allocation will result - * in blocks being allocated at the end of the file. When we allocate new data - * blocks at the end of the file which do not start at the previous data block, - * we will try to align the new blocks at stripe unit boundaries. - */ -STATIC int /* error */ -xfs_bmap_isaeof( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fileoff_t off, /* file offset in fsblocks */ - int whichfork, /* data or attribute fork */ - char *aeof); /* return value */ - /* * Compute the worst-case number of indirect blocks that will be used * for ip's delayed extent of length "len". @@ -283,7 +191,27 @@ xfs_bmbt_lookup_ge( } /* -* Update the record referred to by cur to the value given + * Check if the inode needs to be converted to btree format. + */ +static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) +{ + return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) > + XFS_IFORK_MAXEXT(ip, whichfork); +} + +/* + * Check if the inode should be converted to extent format. + */ +static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) +{ + return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && + XFS_IFORK_NEXTENTS(ip, whichfork) <= + XFS_IFORK_MAXEXT(ip, whichfork); +} + +/* + * Update the record referred to by cur to the value given * by [off, bno, len, state]. * This either works (return 0) or gets an EFSCORRUPTED error. */ @@ -388,7 +316,7 @@ xfs_bmap_add_attrfork_local( if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) return 0; - if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { + if (S_ISDIR(ip->i_d.di_mode)) { mp = ip->i_mount; memset(&dargs, 0, sizeof(dargs)); dargs.dp = ip; @@ -405,188 +333,13 @@ xfs_bmap_add_attrfork_local( } /* - * Called by xfs_bmapi to update file extent records and the btree - * after allocating space (or doing a delayed allocation). - */ -STATIC int /* error */ -xfs_bmap_add_extent( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd) /* OK to use reserved data blocks */ -{ - xfs_btree_cur_t *cur; /* btree cursor or null */ - xfs_filblks_t da_new; /* new count del alloc blocks used */ - xfs_filblks_t da_old; /* old count del alloc blocks used */ - int error; /* error return value */ - xfs_ifork_t *ifp; /* inode fork ptr */ - int logflags; /* returned value */ - xfs_extnum_t nextents; /* number of extents in file now */ - - XFS_STATS_INC(xs_add_exlist); - cur = *curp; - ifp = XFS_IFORK_PTR(ip, whichfork); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - ASSERT(idx <= nextents); - da_old = da_new = 0; - error = 0; - /* - * This is the first extent added to a new/empty file. - * Special case this one, so other routines get to assume there are - * already extents in the list. - */ - if (nextents == 0) { - xfs_iext_insert(ip, 0, 1, new, - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); - - ASSERT(cur == NULL); - ifp->if_lastex = 0; - if (!isnullstartblock(new->br_startblock)) { - XFS_IFORK_NEXT_SET(ip, whichfork, 1); - logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); - } else - logflags = 0; - } - /* - * Any kind of new delayed allocation goes here. - */ - else if (isnullstartblock(new->br_startblock)) { - if (cur) - ASSERT((cur->bc_private.b.flags & - XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, - &logflags, rsvd))) - goto done; - } - /* - * Real allocation off the end of the file. - */ - else if (idx == nextents) { - if (cur) - ASSERT((cur->bc_private.b.flags & - XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, - &logflags, whichfork))) - goto done; - } else { - xfs_bmbt_irec_t prev; /* old extent at offset idx */ - - /* - * Get the record referred to by idx. - */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev); - /* - * If it's a real allocation record, and the new allocation ends - * after the start of the referred to record, then we're filling - * in a delayed or unwritten allocation with a real one, or - * converting real back to unwritten. - */ - if (!isnullstartblock(new->br_startblock) && - new->br_startoff + new->br_blockcount > prev.br_startoff) { - if (prev.br_state != XFS_EXT_UNWRITTEN && - isnullstartblock(prev.br_startblock)) { - da_old = startblockval(prev.br_startblock); - if (cur) - ASSERT(cur->bc_private.b.flags & - XFS_BTCUR_BPRV_WASDEL); - if ((error = xfs_bmap_add_extent_delay_real(ip, - idx, &cur, new, &da_new, first, flist, - &logflags, rsvd))) - goto done; - } else if (new->br_state == XFS_EXT_NORM) { - ASSERT(new->br_state == XFS_EXT_NORM); - if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags))) - goto done; - } else { - ASSERT(new->br_state == XFS_EXT_UNWRITTEN); - if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags))) - goto done; - } - ASSERT(*curp == cur || *curp == NULL); - } - /* - * Otherwise we're filling in a hole with an allocation. - */ - else { - if (cur) - ASSERT((cur->bc_private.b.flags & - XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, - new, &logflags, whichfork))) - goto done; - } - } - - ASSERT(*curp == cur || *curp == NULL); - /* - * Convert to a btree if necessary. - */ - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) { - int tmp_logflags; /* partial log flag return val */ - - ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, - flist, &cur, da_old > 0, &tmp_logflags, whichfork); - logflags |= tmp_logflags; - if (error) - goto done; - } - /* - * Adjust for changes in reserved delayed indirect blocks. - * Nothing to do for disk quotas here. - */ - if (da_old || da_new) { - xfs_filblks_t nblks; - - nblks = da_new; - if (cur) - nblks += cur->bc_private.b.allocated; - ASSERT(nblks <= da_old); - if (nblks < da_old) - xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - nblks), rsvd); - } - /* - * Clear out the allocated field, done with it now in any case. - */ - if (cur) { - cur->bc_private.b.allocated = 0; - *curp = cur; - } -done: -#ifdef DEBUG - if (!error) - xfs_bmap_check_leaf_extents(*curp, ip, whichfork); -#endif - *logflagsp = logflags; - return error; -} - -/* - * Called by xfs_bmap_add_extent to handle cases converting a delayed - * allocation to a real allocation. + * Convert a delayed allocation to a real allocation. */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int rsvd) /* OK to use reserved data block allocation */ + struct xfs_bmalloca *bma) { - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_bmbt_irec *new = &bma->got; int diff; /* temp value */ xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ int error; /* error return value */ @@ -597,10 +350,22 @@ xfs_bmap_add_extent_delay_real( /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ - xfs_filblks_t temp=0; /* value for dnew calculations */ - xfs_filblks_t temp2=0;/* value for dnew calculations */ + xfs_filblks_t da_new; /* new count del alloc blocks used */ + xfs_filblks_t da_old; /* old count del alloc blocks used */ + xfs_filblks_t temp=0; /* value for da_new calculations */ + xfs_filblks_t temp2=0;/* value for da_new calculations */ int tmp_rval; /* partial logging flags */ + ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); + + ASSERT(bma->idx >= 0); + ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + ASSERT(!bma->cur || + (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + + XFS_STATS_INC(xs_add_exlist); + #define LEFT r[0] #define RIGHT r[1] #define PREV r[2] @@ -608,14 +373,15 @@ xfs_bmap_add_extent_delay_real( /* * Set up a bunch of variables to make the tests simpler. */ - cur = *curp; - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, bma->idx); xfs_bmbt_get_all(ep, &PREV); new_endoff = new->br_startoff + new->br_blockcount; ASSERT(PREV.br_startoff <= new->br_startoff); ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + da_old = startblockval(PREV.br_startblock); + da_new = 0; + /* * Set flags determining what part of the previous delayed allocation * extent is being replaced by a real allocation. @@ -629,9 +395,9 @@ xfs_bmap_add_extent_delay_real( * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (idx > 0) { + if (bma->idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); if (isnullstartblock(LEFT.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -649,9 +415,9 @@ xfs_bmap_add_extent_delay_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) state |= BMAP_RIGHT_DELAY; @@ -682,38 +448,41 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left and right neighbors are both contiguous with new. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + bma->idx--; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 2, state); - ip->i_df.if_lastex = idx - 1; - ip->i_d.di_nextents--; - if (cur == NULL) + xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); + bma->ip->i_d.di_nextents--; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) + RIGHT.br_blockcount, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) + error = xfs_btree_delete(bma->cur, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) + error = xfs_btree_decrement(bma->cur, 0, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount, LEFT.br_state))) + RIGHT.br_blockcount, LEFT.br_state); + if (error) goto done; } - *dnew = 0; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: @@ -721,29 +490,31 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left neighbor is contiguous, the right is not. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + bma->idx--; + + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx - 1; - xfs_iext_remove(ip, idx, 1, state); - if (cur == NULL) + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + if (bma->cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount, - &i))) + &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + - PREV.br_blockcount, LEFT.br_state))) + PREV.br_blockcount, LEFT.br_state); + if (error) goto done; } - *dnew = 0; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -751,30 +522,30 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The right neighbor is contiguous, the left is not. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; - xfs_iext_remove(ip, idx + 1, 1, state); - if (cur == NULL) + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + if (bma->cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) + RIGHT.br_blockcount, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, + error = xfs_bmbt_update(bma->cur, PREV.br_startoff, new->br_startblock, PREV.br_blockcount + - RIGHT.br_blockcount, PREV.br_state))) + RIGHT.br_blockcount, PREV.br_state); + if (error) goto done; } - *dnew = 0; break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: @@ -783,27 +554,27 @@ xfs_bmap_add_extent_delay_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; - ip->i_d.di_nextents++; - if (cur == NULL) + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, new->br_startblock, new->br_blockcount, - &i))) + &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_btree_insert(cur, &i))) + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - *dnew = 0; break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: @@ -811,38 +582,40 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is contiguous. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - ip->i_df.if_lastex = idx - 1; - if (cur == NULL) + if (bma->cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount, - &i))) + &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, + error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + new->br_blockcount, - LEFT.br_state))) + LEFT.br_state); + if (error) goto done; } - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock)); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - *dnew = temp; + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx--; break; case BMAP_LEFT_FILLING: @@ -850,43 +623,43 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, new_endoff); temp = PREV.br_blockcount - new->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; - ip->i_d.di_nextents++; - if (cur == NULL) + xfs_iext_insert(bma->ip, bma->idx, 1, new, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, new->br_startblock, new->br_blockcount, - &i))) + &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_btree_insert(cur, &i))) + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(ip->i_transp, ip, - first, flist, &cur, 1, &tmp_rval, - XFS_DATA_FORK); + + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, + &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; if (error) goto done; } - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - - (cur ? cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, idx + 1); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); - *dnew = temp; + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + ep = xfs_iext_get_ext(ifp, bma->idx + 1); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); break; case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -895,36 +668,39 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is contiguous with the new allocation. */ temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, RIGHT.br_state); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); - ip->i_df.if_lastex = idx + 1; - if (cur == NULL) + trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + if (bma->cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) + RIGHT.br_blockcount, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, new->br_startoff, + error = xfs_bmbt_update(bma->cur, new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, - RIGHT.br_state))) + RIGHT.br_state); + if (error) goto done; } - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock)); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - *dnew = temp; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx++; break; case BMAP_RIGHT_FILLING: @@ -933,41 +709,43 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is not contiguous. */ temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(ip, idx + 1, 1, new, state); - ip->i_df.if_lastex = idx + 1; - ip->i_d.di_nextents++; - if (cur == NULL) + xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, new->br_startblock, new->br_blockcount, - &i))) + &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_btree_insert(cur, &i))) + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(ip->i_transp, ip, - first, flist, &cur, 1, &tmp_rval, - XFS_DATA_FORK); + + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, 1, + &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; if (error) goto done; } - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - - (cur ? cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, idx); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - *dnew = temp; + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + ep = xfs_iext_get_ext(ifp, bma->idx); + xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + + bma->idx++; break; case 0: @@ -975,82 +753,83 @@ xfs_bmap_add_extent_delay_real( * Filling in the middle part of a previous delayed allocation. * Contiguity is impossible here. * This case is avoided almost all the time. + * + * We start with a delayed allocation: + * + * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ + * PREV @ idx + * + * and we are allocating: + * +rrrrrrrrrrrrrrrrr+ + * new + * + * and we set it up for insertion as: + * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ + * new + * PREV @ idx LEFT RIGHT + * inserted at idx + 1 */ temp = new->br_startoff - PREV.br_startoff; - trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - r[0] = *new; - r[1].br_state = PREV.br_state; - r[1].br_startblock = 0; - r[1].br_startoff = new_endoff; temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; - r[1].br_blockcount = temp2; - xfs_iext_insert(ip, idx + 1, 2, &r[0], state); - ip->i_df.if_lastex = idx + 1; - ip->i_d.di_nextents++; - if (cur == NULL) + trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ + LEFT = *new; + RIGHT.br_state = PREV.br_state; + RIGHT.br_startblock = nullstartblock( + (int)xfs_bmap_worst_indlen(bma->ip, temp2)); + RIGHT.br_startoff = new_endoff; + RIGHT.br_blockcount = temp2; + /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ + xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); + bma->ip->i_d.di_nextents++; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, new->br_startblock, new->br_blockcount, - &i))) + &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_btree_insert(cur, &i))) + bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; + error = xfs_btree_insert(bma->cur, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(ip->i_transp, ip, - first, flist, &cur, 1, &tmp_rval, - XFS_DATA_FORK); + + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; if (error) goto done; } - temp = xfs_bmap_worst_indlen(ip, temp); - temp2 = xfs_bmap_worst_indlen(ip, temp2); + temp = xfs_bmap_worst_indlen(bma->ip, temp); + temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (cur ? cur->bc_private.b.allocated : 0)); - if (diff > 0 && - xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) { - /* - * Ick gross gag me with a spoon. - */ - ASSERT(0); /* want to see if this ever happens! */ - while (diff > 0) { - if (temp) { - temp--; - diff--; - if (!diff || - !xfs_icsb_modify_counters(ip->i_mount, - XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) - break; - } - if (temp2) { - temp2--; - diff--; - if (!diff || - !xfs_icsb_modify_counters(ip->i_mount, - XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) - break; - } - } + (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + if (diff > 0) { + error = xfs_icsb_modify_counters(bma->ip->i_mount, + XFS_SBS_FDBLOCKS, + -((int64_t)diff), 0); + ASSERT(!error); + if (error) + goto done; } - ep = xfs_iext_get_ext(ifp, idx); + + ep = xfs_iext_get_ext(ifp, bma->idx); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), nullstartblock((int)temp2)); - trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); - *dnew = temp + temp2; + trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); + + bma->idx++; + da_new = temp + temp2; break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: @@ -1065,9 +844,39 @@ xfs_bmap_add_extent_delay_real( */ ASSERT(0); } - *curp = cur; + + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(bma->cur == NULL); + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + da_old > 0, &tmp_logflags, XFS_DATA_FORK); + bma->logflags |= tmp_logflags; + if (error) + goto done; + } + + /* adjust for changes in reserved delayed indirect blocks */ + if (da_old || da_new) { + temp = da_new; + if (bma->cur) + temp += bma->cur->bc_private.b.allocated; + ASSERT(temp <= da_old); + if (temp < da_old) + xfs_icsb_modify_counters(bma->ip->i_mount, + XFS_SBS_FDBLOCKS, + (int64_t)(da_old - temp), 0); + } + + /* clear out the allocated field, done with it now in any case. */ + if (bma->cur) + bma->cur->bc_private.b.allocated = 0; + + xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK); done: - *logflagsp = rval; + bma->logflags |= rval; return error; #undef LEFT #undef RIGHT @@ -1075,15 +884,17 @@ done: } /* - * Called by xfs_bmap_add_extent to handle cases converting an unwritten - * allocation to a real allocation or vice versa. + * Convert an unwritten allocation to a real allocation or vice versa. */ STATIC int /* error */ xfs_bmap_add_extent_unwritten_real( + struct xfs_trans *tp, xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ + xfs_fsblock_t *first, /* pointer to firstblock variable */ + xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp) /* inode logging flags */ { xfs_btree_cur_t *cur; /* btree cursor */ @@ -1099,16 +910,26 @@ xfs_bmap_add_extent_unwritten_real( int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ + *logflagsp = 0; + + cur = *curp; + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + + ASSERT(*idx >= 0); + ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + + XFS_STATS_INC(xs_add_exlist); + #define LEFT r[0] #define RIGHT r[1] #define PREV r[2] + /* * Set up a bunch of variables to make the tests simpler. */ error = 0; - cur = *curp; - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &PREV); newext = new->br_state; oldext = (newext == XFS_EXT_UNWRITTEN) ? @@ -1131,9 +952,9 @@ xfs_bmap_add_extent_unwritten_real( * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); if (isnullstartblock(LEFT.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -1151,9 +972,9 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -1182,14 +1003,15 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left and right neighbors are both contiguous with new. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 2, state); - ip->i_df.if_lastex = idx - 1; + xfs_iext_remove(ip, *idx + 1, 2, state); ip->i_d.di_nextents -= 2; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1225,13 +1047,14 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx - 1; - xfs_iext_remove(ip, idx, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1261,13 +1084,12 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The right neighbor is contiguous, the left is not. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; - xfs_iext_remove(ip, idx + 1, 1, state); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_remove(ip, *idx + 1, 1, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1298,11 +1120,10 @@ xfs_bmap_add_extent_unwritten_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1324,21 +1145,22 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is contiguous. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + --*idx; - ip->i_df.if_lastex = idx - 1; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1356,10 +1178,11 @@ xfs_bmap_add_extent_unwritten_real( goto done; if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - if (xfs_bmbt_update(cur, LEFT.br_startoff, + error = xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + new->br_blockcount, - LEFT.br_state)) + LEFT.br_state); + if (error) goto done; } break; @@ -1369,17 +1192,16 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); xfs_bmbt_set_startoff(ep, new_endoff); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1408,17 +1230,19 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is contiguous with the new allocation. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, newext); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx + 1; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1448,13 +1272,14 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; + xfs_iext_insert(ip, *idx, 1, new, state); - xfs_iext_insert(ip, idx + 1, 1, new, state); - ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1488,10 +1313,10 @@ xfs_bmap_add_extent_unwritten_real( * newext. Contiguity is impossible here. * One extent becomes three extents. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, new->br_startoff - PREV.br_startoff); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); r[0] = *new; r[1].br_startoff = new_endoff; @@ -1499,8 +1324,10 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startoff + PREV.br_blockcount - new_endoff; r[1].br_startblock = new->br_startblock + new->br_blockcount; r[1].br_state = oldext; - xfs_iext_insert(ip, idx + 1, 2, &r[0], state); - ip->i_df.if_lastex = idx + 1; + + ++*idx; + xfs_iext_insert(ip, *idx, 2, &r[0], state); + ip->i_d.di_nextents += 2; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1553,9 +1380,28 @@ xfs_bmap_add_extent_unwritten_real( */ ASSERT(0); } - *curp = cur; + + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, + 0, &tmp_logflags, XFS_DATA_FORK); + *logflagsp |= tmp_logflags; + if (error) + goto done; + } + + /* clear out the allocated field, done with it now in any case. */ + if (cur) { + cur->bc_private.b.allocated = 0; + *curp = cur; + } + + xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); done: - *logflagsp = rval; + *logflagsp |= rval; return error; #undef LEFT #undef RIGHT @@ -1563,19 +1409,14 @@ done: } /* - * Called by xfs_bmap_add_extent to handle cases converting a hole - * to a delayed allocation. + * Convert a hole to a delayed allocation. */ -/*ARGSUSED*/ -STATIC int /* error */ +STATIC void xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp, /* inode logging flags */ - int rsvd) /* OK to allocate reserved blocks */ + xfs_extnum_t *idx, /* extent number to update/insert */ + xfs_bmbt_irec_t *new) /* new data to add to file extents */ { - xfs_bmbt_rec_host_t *ep; /* extent record for idx */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_filblks_t newlen=0; /* new indirect size */ @@ -1585,16 +1426,15 @@ xfs_bmap_add_extent_hole_delay( xfs_filblks_t temp=0; /* temp for indirect calculations */ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); state = 0; ASSERT(isnullstartblock(new->br_startblock)); /* * Check and set flags if this segment has a left neighbor */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -1604,9 +1444,9 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(ep, &right); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; @@ -1639,21 +1479,21 @@ xfs_bmap_add_extent_hole_delay( * on the left and on the right. * Merge all three into a single extent record. */ + --*idx; temp = left.br_blockcount + new->br_blockcount + right.br_blockcount; - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 1, state); - ip->i_df.if_lastex = idx - 1; + xfs_iext_remove(ip, *idx + 1, 1, state); break; case BMAP_LEFT_CONTIG: @@ -1662,17 +1502,17 @@ xfs_bmap_add_extent_hole_delay( * on the left. * Merge the new allocation with the left neighbor. */ + --*idx; temp = left.br_blockcount + new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); - - ip->i_df.if_lastex = idx - 1; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); break; case BMAP_RIGHT_CONTIG: @@ -1681,16 +1521,15 @@ xfs_bmap_add_extent_hole_delay( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); temp = new->br_blockcount + right.br_blockcount; oldlen = startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_allf(ep, new->br_startoff, + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, nullstartblock((int)newlen), temp, right.br_state); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - - ip->i_df.if_lastex = idx; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); break; case 0: @@ -1700,36 +1539,28 @@ xfs_bmap_add_extent_hole_delay( * Insert a new entry. */ oldlen = newlen = 0; - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); break; } if (oldlen != newlen) { ASSERT(oldlen > newlen); xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(oldlen - newlen), rsvd); + (int64_t)(oldlen - newlen), 0); /* * Nothing to do for disk quota accounting here. */ } - *logflagsp = 0; - return 0; } /* - * Called by xfs_bmap_add_extent to handle cases converting a hole - * to a real allocation. + * Convert a hole to a real allocation. */ STATIC int /* error */ xfs_bmap_add_extent_hole_real( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t *cur, /* if null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ + struct xfs_bmalloca *bma, + int whichfork) { - xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */ + struct xfs_bmbt_irec *new = &bma->got; int error; /* error return value */ int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ @@ -1738,20 +1569,26 @@ xfs_bmap_add_extent_hole_real( int rval=0; /* return value (logging flags) */ int state; /* state bits, accessed thru macros */ - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); - ep = xfs_iext_get_ext(ifp, idx); - state = 0; + ifp = XFS_IFORK_PTR(bma->ip, whichfork); + + ASSERT(bma->idx >= 0); + ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(!isnullstartblock(new->br_startblock)); + ASSERT(!bma->cur || + !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + + XFS_STATS_INC(xs_add_exlist); + state = 0; if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; /* * Check and set flags if this segment has a left neighbor. */ - if (idx > 0) { + if (bma->idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -1760,9 +1597,9 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(ep, &right); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -1799,38 +1636,42 @@ xfs_bmap_add_extent_hole_real( * left and on the right. * Merge all three into a single extent record. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --bma->idx; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), left.br_blockcount + new->br_blockcount + right.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 1, state); - ifp->if_lastex = idx - 1; - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); - if (cur == NULL) { + xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + + XFS_IFORK_NEXT_SET(bma->ip, whichfork, + XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); + if (bma->cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, - right.br_startoff, - right.br_startblock, - right.br_blockcount, &i))) + error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, + right.br_startblock, right.br_blockcount, + &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_delete(cur, &i))) + error = xfs_btree_delete(bma->cur, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_btree_decrement(cur, 0, &i))) + error = xfs_btree_decrement(bma->cur, 0, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, left.br_startoff, + error = xfs_bmbt_update(bma->cur, left.br_startoff, left.br_startblock, left.br_blockcount + new->br_blockcount + right.br_blockcount, - left.br_state))) + left.br_state); + if (error) goto done; } break; @@ -1841,27 +1682,28 @@ xfs_bmap_add_extent_hole_real( * on the left. * Merge the new allocation with the left neighbor. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --bma->idx; + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), left.br_blockcount + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - ifp->if_lastex = idx - 1; - if (cur == NULL) { + if (bma->cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, - left.br_startoff, - left.br_startblock, - left.br_blockcount, &i))) + error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, + left.br_startblock, left.br_blockcount, + &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, left.br_startoff, + error = xfs_bmbt_update(bma->cur, left.br_startoff, left.br_startblock, left.br_blockcount + new->br_blockcount, - left.br_state))) + left.br_state); + if (error) goto done; } break; @@ -1872,28 +1714,30 @@ xfs_bmap_add_extent_hole_real( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), + new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, right.br_state); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - ifp->if_lastex = idx; - if (cur == NULL) { + if (bma->cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, + error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, right.br_startblock, - right.br_blockcount, &i))) + right.br_blockcount, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_update(cur, new->br_startoff, + error = xfs_bmbt_update(bma->cur, new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, - right.br_state))) + right.br_state); + if (error) goto done; } break; @@ -1904,29 +1748,49 @@ xfs_bmap_add_extent_hole_real( * real allocation. * Insert a new entry. */ - xfs_iext_insert(ip, idx, 1, new, state); - ifp->if_lastex = idx; - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); - if (cur == NULL) { + xfs_iext_insert(bma->ip, bma->idx, 1, new, state); + XFS_IFORK_NEXT_SET(bma->ip, whichfork, + XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); + if (bma->cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, + error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, new->br_startblock, - new->br_blockcount, &i))) + new->br_blockcount, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); - cur->bc_rec.b.br_state = new->br_state; - if ((error = xfs_btree_insert(cur, &i))) + bma->cur->bc_rec.b.br_state = new->br_state; + error = xfs_btree_insert(bma->cur, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } break; } + + /* convert to a btree if necessary */ + if (xfs_bmap_needs_btree(bma->ip, whichfork)) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(bma->cur == NULL); + error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, + bma->firstblock, bma->flist, &bma->cur, + 0, &tmp_logflags, whichfork); + bma->logflags |= tmp_logflags; + if (error) + goto done; + } + + /* clear out the allocated field, done with it now in any case. */ + if (bma->cur) + bma->cur->bc_private.b.allocated = 0; + + xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); done: - *logflagsp = rval; + bma->logflags |= rval; return error; } @@ -2113,26 +1977,26 @@ xfs_bmap_adjacent( XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) mp = ap->ip->i_mount; - nullfb = ap->firstblock == NULLFSBLOCK; + nullfb = *ap->firstblock == NULLFSBLOCK; rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); /* * If allocating at eof, and there's a previous real block, * try to use its last block as our starting point. */ - if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF && - !isnullstartblock(ap->prevp->br_startblock) && - ISVALID(ap->prevp->br_startblock + ap->prevp->br_blockcount, - ap->prevp->br_startblock)) { - ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount; + if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && + !isnullstartblock(ap->prev.br_startblock) && + ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, + ap->prev.br_startblock)) { + ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; /* * Adjust for the gap between prevp and us. */ - adjust = ap->off - - (ap->prevp->br_startoff + ap->prevp->br_blockcount); + adjust = ap->offset - + (ap->prev.br_startoff + ap->prev.br_blockcount); if (adjust && - ISVALID(ap->rval + adjust, ap->prevp->br_startblock)) - ap->rval += adjust; + ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) + ap->blkno += adjust; } /* * If not at eof, then compare the two neighbor blocks. @@ -2149,17 +2013,17 @@ xfs_bmap_adjacent( * If there's a previous (left) block, select a requested * start block based on it. */ - if (ap->prevp->br_startoff != NULLFILEOFF && - !isnullstartblock(ap->prevp->br_startblock) && - (prevbno = ap->prevp->br_startblock + - ap->prevp->br_blockcount) && - ISVALID(prevbno, ap->prevp->br_startblock)) { + if (ap->prev.br_startoff != NULLFILEOFF && + !isnullstartblock(ap->prev.br_startblock) && + (prevbno = ap->prev.br_startblock + + ap->prev.br_blockcount) && + ISVALID(prevbno, ap->prev.br_startblock)) { /* * Calculate gap to end of previous block. */ - adjust = prevdiff = ap->off - - (ap->prevp->br_startoff + - ap->prevp->br_blockcount); + adjust = prevdiff = ap->offset - + (ap->prev.br_startoff + + ap->prev.br_blockcount); /* * Figure the startblock based on the previous block's * end and the gap size. @@ -2168,9 +2032,9 @@ xfs_bmap_adjacent( * allocating, or using it gives us an invalid block * number, then just use the end of the previous block. */ - if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen && + if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && ISVALID(prevbno + prevdiff, - ap->prevp->br_startblock)) + ap->prev.br_startblock)) prevbno += adjust; else prevdiff += adjust; @@ -2191,16 +2055,16 @@ xfs_bmap_adjacent( * If there's a following (right) block, select a requested * start block based on it. */ - if (!isnullstartblock(ap->gotp->br_startblock)) { + if (!isnullstartblock(ap->got.br_startblock)) { /* * Calculate gap to start of next block. */ - adjust = gotdiff = ap->gotp->br_startoff - ap->off; + adjust = gotdiff = ap->got.br_startoff - ap->offset; /* * Figure the startblock based on the next block's * start and the gap size. */ - gotbno = ap->gotp->br_startblock; + gotbno = ap->got.br_startblock; /* * Heuristic! * If the gap is large relative to the piece we're @@ -2208,12 +2072,12 @@ xfs_bmap_adjacent( * number, then just use the start of the next block * offset by our length. */ - if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen && + if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && ISVALID(gotbno - gotdiff, gotbno)) gotbno -= adjust; - else if (ISVALID(gotbno - ap->alen, gotbno)) { - gotbno -= ap->alen; - gotdiff += adjust - ap->alen; + else if (ISVALID(gotbno - ap->length, gotbno)) { + gotbno -= ap->length; + gotdiff += adjust - ap->length; } else gotdiff += adjust; /* @@ -2234,11 +2098,11 @@ xfs_bmap_adjacent( * one, else ap->rval is already set (to 0 or the inode block). */ if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) - ap->rval = prevdiff <= gotdiff ? prevbno : gotbno; + ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; else if (prevbno != NULLFSBLOCK) - ap->rval = prevbno; + ap->blkno = prevbno; else if (gotbno != NULLFSBLOCK) - ap->rval = gotbno; + ap->blkno = gotbno; } #undef ISVALID } @@ -2271,7 +2135,7 @@ xfs_bmap_btalloc_nullfb( startag = ag = 0; pag = xfs_perag_get(mp, ag); - while (*blen < ap->alen) { + while (*blen < args->maxlen) { if (!pag->pagf_init) { error = xfs_alloc_pagf_init(mp, args->tp, ag, XFS_ALLOC_FLAG_TRYLOCK); @@ -2293,7 +2157,7 @@ xfs_bmap_btalloc_nullfb( notinit = 1; if (xfs_inode_is_filestream(ap->ip)) { - if (*blen >= ap->alen) + if (*blen >= args->maxlen) break; if (ap->userdata) { @@ -2339,21 +2203,21 @@ xfs_bmap_btalloc_nullfb( * If the best seen length is less than the request * length, use the best as the minimum. */ - else if (*blen < ap->alen) + else if (*blen < args->maxlen) args->minlen = *blen; /* - * Otherwise we've seen an extent as big as alen, + * Otherwise we've seen an extent as big as maxlen, * use that as the minimum. */ else - args->minlen = ap->alen; + args->minlen = args->maxlen; /* * set the failure fallback case to look in the selected * AG as the stream may have moved. */ if (xfs_inode_is_filestream(ap->ip)) - ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); + ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); return 0; } @@ -2375,53 +2239,58 @@ xfs_bmap_btalloc( int tryagain; int error; + ASSERT(ap->length); + mp = ap->ip->i_mount; align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; if (unlikely(align)) { - error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, + error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, - &ap->off, &ap->alen); + &ap->offset, &ap->length); ASSERT(!error); - ASSERT(ap->alen); + ASSERT(ap->length); } - nullfb = ap->firstblock == NULLFSBLOCK; - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); + nullfb = *ap->firstblock == NULLFSBLOCK; + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); if (nullfb) { if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { ag = xfs_filestream_lookup_ag(ap->ip); ag = (ag != NULLAGNUMBER) ? ag : 0; - ap->rval = XFS_AGB_TO_FSB(mp, ag, 0); + ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); } else { - ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); + ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); } } else - ap->rval = ap->firstblock; + ap->blkno = *ap->firstblock; xfs_bmap_adjacent(ap); /* - * If allowed, use ap->rval; otherwise must use firstblock since + * If allowed, use ap->blkno; otherwise must use firstblock since * it's in the right allocation group. */ - if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno) + if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) ; else - ap->rval = ap->firstblock; + ap->blkno = *ap->firstblock; /* * Normal allocation, done through xfs_alloc_vextent. */ tryagain = isaligned = 0; + memset(&args, 0, sizeof(args)); args.tp = ap->tp; args.mp = mp; - args.fsbno = ap->rval; - args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); - args.firstblock = ap->firstblock; + args.fsbno = ap->blkno; + + /* Trim the allocation back to the maximum an AG can fit. */ + args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); + args.firstblock = *ap->firstblock; blen = 0; if (nullfb) { error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); if (error) return error; - } else if (ap->low) { + } else if (ap->flist->xbf_low) { if (xfs_inode_is_filestream(ap->ip)) args.type = XFS_ALLOCTYPE_FIRST_AG; else @@ -2435,14 +2304,14 @@ xfs_bmap_btalloc( /* apply extent size hints if obtained earlier */ if (unlikely(align)) { args.prod = align; - if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) + if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { args.prod = 1; args.mod = 0; } else { args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; - if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod)))) + if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod)))) args.mod = (xfs_extlen_t)(args.prod - args.mod); } /* @@ -2454,15 +2323,15 @@ xfs_bmap_btalloc( * is >= the stripe unit and the allocation offset is * at the end of file. */ - if (!ap->low && ap->aeof) { - if (!ap->off) { + if (!ap->flist->xbf_low && ap->aeof) { + if (!ap->offset) { args.alignment = mp->m_dalign; atype = args.type; isaligned = 1; /* * Adjust for alignment */ - if (blen > args.alignment && blen <= ap->alen) + if (blen > args.alignment && blen <= args.maxlen) args.minlen = blen - args.alignment; args.minalignslop = 0; } else { @@ -2481,7 +2350,7 @@ xfs_bmap_btalloc( * of minlen+alignment+slop doesn't go up * between the calls. */ - if (blen > mp->m_dalign && blen <= ap->alen) + if (blen > mp->m_dalign && blen <= args.maxlen) nextminlen = blen - mp->m_dalign; else nextminlen = args.minlen; @@ -2508,7 +2377,7 @@ xfs_bmap_btalloc( * turned on. */ args.type = atype; - args.fsbno = ap->rval; + args.fsbno = ap->blkno; args.alignment = mp->m_dalign; args.minlen = nextminlen; args.minalignslop = 0; @@ -2522,7 +2391,7 @@ xfs_bmap_btalloc( * try again. */ args.type = atype; - args.fsbno = ap->rval; + args.fsbno = ap->blkno; args.alignment = 0; if ((error = xfs_alloc_vextent(&args))) return error; @@ -2531,7 +2400,7 @@ xfs_bmap_btalloc( args.minlen > ap->minlen) { args.minlen = ap->minlen; args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = ap->rval; + args.fsbno = ap->blkno; if ((error = xfs_alloc_vextent(&args))) return error; } @@ -2542,13 +2411,26 @@ xfs_bmap_btalloc( args.minleft = 0; if ((error = xfs_alloc_vextent(&args))) return error; - ap->low = 1; + ap->flist->xbf_low = 1; } if (args.fsbno != NULLFSBLOCK) { - ap->firstblock = ap->rval = args.fsbno; + /* + * check the allocation happened at the same or higher AG than + * the first block that was allocated. + */ + ASSERT(*ap->firstblock == NULLFSBLOCK || + XFS_FSB_TO_AGNO(mp, *ap->firstblock) == + XFS_FSB_TO_AGNO(mp, args.fsbno) || + (ap->flist->xbf_low && + XFS_FSB_TO_AGNO(mp, *ap->firstblock) < + XFS_FSB_TO_AGNO(mp, args.fsbno))); + + ap->blkno = args.fsbno; + if (*ap->firstblock == NULLFSBLOCK) + *ap->firstblock = args.fsbno; ASSERT(nullfb || fb_agno == args.agno || - (ap->low && fb_agno < args.agno)); - ap->alen = args.len; + (ap->flist->xbf_low && fb_agno < args.agno)); + ap->length = args.len; ap->ip->i_d.di_nblocks += args.len; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) @@ -2562,8 +2444,8 @@ xfs_bmap_btalloc( XFS_TRANS_DQ_BCOUNT, (long) args.len); } else { - ap->rval = NULLFSBLOCK; - ap->alen = 0; + ap->blkno = NULLFSBLOCK; + ap->length = 0; } return 0; } @@ -2620,8 +2502,9 @@ xfs_bmap_btree_to_extents( if ((error = xfs_btree_check_lptr(cur, cbno, 1))) return error; #endif - if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, - XFS_BMAP_BTREE_REF))) + error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) return error; cblock = XFS_BUF_TO_BLOCK(cbp); if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) @@ -2648,13 +2531,12 @@ STATIC int /* error */ xfs_bmap_del_extent( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ - xfs_extnum_t idx, /* extent number to update/delete */ + xfs_extnum_t *idx, /* extent number to update/delete */ xfs_bmap_free_t *flist, /* list of extents to be freed */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd) /* OK to allocate reserved blocks */ + int whichfork) /* data or attr fork */ { xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ @@ -2685,10 +2567,10 @@ xfs_bmap_del_extent( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((idx >= 0) && (idx < ifp->if_bytes / + ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(del->br_blockcount > 0); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &got); ASSERT(got.br_startoff <= del->br_startoff); del_endoff = del->br_startoff + del->br_blockcount; @@ -2719,8 +2601,8 @@ xfs_bmap_del_extent( len = del->br_blockcount; do_div(bno, mp->m_sb.sb_rextsize); do_div(len, mp->m_sb.sb_rextsize); - if ((error = xfs_rtfree_extent(ip->i_transp, bno, - (xfs_extlen_t)len))) + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) goto done; do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; @@ -2762,11 +2644,12 @@ xfs_bmap_del_extent( /* * Matches the whole extent. Delete the entry. */ - xfs_iext_remove(ip, idx, 1, + xfs_iext_remove(ip, *idx, 1, whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); - ifp->if_lastex = idx; + --*idx; if (delay) break; + XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); flags |= XFS_ILOG_CORE; @@ -2783,21 +2666,20 @@ xfs_bmap_del_extent( /* * Deleting the first part of the extent. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, del_endoff); temp = got.br_blockcount - del->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); - ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); da_new = temp; break; } xfs_bmbt_set_startblock(ep, del_endblock); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; @@ -2813,18 +2695,17 @@ xfs_bmap_del_extent( * Deleting the last part of the extent. */ temp = got.br_blockcount - del->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); da_new = temp; break; } - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; @@ -2841,7 +2722,7 @@ xfs_bmap_del_extent( * Deleting the middle of the extent. */ temp = del->br_startoff - got.br_startoff; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); new.br_startoff = del_endoff; temp2 = got_endoff - del_endoff; @@ -2928,9 +2809,9 @@ xfs_bmap_del_extent( } } } - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - xfs_iext_insert(ip, idx + 1, 1, &new, state); - ifp->if_lastex = idx + 1; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_insert(ip, *idx + 1, 1, &new, state); + ++*idx; break; } /* @@ -2957,7 +2838,7 @@ xfs_bmap_del_extent( ASSERT(da_old >= da_new); if (da_old > da_new) { xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - da_new), rsvd); + (int64_t)(da_old - da_new), 0); } done: *logflagsp = flags; @@ -3014,8 +2895,7 @@ xfs_bmap_extents_to_btree( ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); - ASSERT(ifp->if_ext_max == - XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + /* * Make space in the inode incore. */ @@ -3044,6 +2924,7 @@ xfs_bmap_extents_to_btree( * Convert to a btree with two levels, one record in root. */ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); + memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = mp; args.firstblock = *firstblock; @@ -3058,8 +2939,6 @@ xfs_bmap_extents_to_btree( args.fsbno = *firstblock; } args.minlen = args.maxlen = args.prod = 1; - args.total = args.minleft = args.alignment = args.mod = args.isfl = - args.minalignslop = 0; args.wasdel = wasdel; *logflagsp = 0; if ((error = xfs_alloc_vextent(&args))) { @@ -3083,6 +2962,7 @@ xfs_bmap_extents_to_btree( /* * Fill in the child block. */ + abp->b_ops = &xfs_bmbt_buf_ops; ablock = XFS_BUF_TO_BLOCK(abp); ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); ablock->bb_level = 0; @@ -3161,13 +3041,8 @@ xfs_bmap_forkoff_reset( ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; - if (dfl_forkoff > ip->i_d.di_forkoff) { + if (dfl_forkoff > ip->i_d.di_forkoff) ip->i_d.di_forkoff = dfl_forkoff; - ip->i_df.if_ext_max = - XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t); - ip->i_afp->if_ext_max = - XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t); - } } } @@ -3194,8 +3069,7 @@ xfs_bmap_local_to_extents( * We don't want to deal with the case of keeping inode data inline yet. * So sending the data fork of a regular inode is invalid. */ - ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG && - whichfork == XFS_DATA_FORK)); + ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); flags = 0; @@ -3205,6 +3079,7 @@ xfs_bmap_local_to_extents( xfs_buf_t *bp; /* buffer for extent block */ xfs_bmbt_rec_host_t *ep;/* extent record pointer */ + memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = ip->i_mount; args.firstblock = *firstblock; @@ -3222,8 +3097,6 @@ xfs_bmap_local_to_extents( args.type = XFS_ALLOCTYPE_NEAR_BNO; } args.total = total; - args.mod = args.minleft = args.alignment = args.wasdel = - args.isfl = args.minalignslop = 0; args.minlen = args.maxlen = args.prod = 1; if ((error = xfs_alloc_vextent(&args))) goto done; @@ -3234,8 +3107,8 @@ xfs_bmap_local_to_extents( ASSERT(args.len == 1); *firstblock = args.fsbno; bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data, - ifp->if_bytes); + bp->b_ops = &xfs_bmbt_buf_ops; + memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); xfs_bmap_forkoff_reset(args.mp, ip, whichfork); xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); @@ -3341,7 +3214,7 @@ xfs_bmap_search_extents( if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { - xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, + xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " "blkcnt: %llx extent-state: %x lastx: %x\n", @@ -3409,8 +3282,6 @@ xfs_bmap_add_attrfork( int error; /* error return value */ ASSERT(XFS_IFORK_Q(ip) == 0); - ASSERT(ip->i_df.if_ext_max == - XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); mp = ip->i_mount; ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); @@ -3441,7 +3312,7 @@ xfs_bmap_add_attrfork( } ASSERT(ip->i_d.di_anextents == 0); - xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); switch (ip->i_d.di_format) { @@ -3465,12 +3336,9 @@ xfs_bmap_add_attrfork( error = XFS_ERROR(EINVAL); goto error1; } - ip->i_df.if_ext_max = - XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(ip->i_afp == NULL); ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); - ip->i_afp->if_ext_max = - XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); ip->i_afp->if_flags = XFS_IFEXTENTS; logflags = 0; xfs_bmap_init(&flist, &firstblock); @@ -3514,20 +3382,17 @@ xfs_bmap_add_attrfork( } else spin_unlock(&mp->m_sb_lock); } - if ((error = xfs_bmap_finish(&tp, &flist, &committed))) + + error = xfs_bmap_finish(&tp, &flist, &committed); + if (error) goto error2; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - ASSERT(ip->i_df.if_ext_max == - XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); - return error; + return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error2: xfs_bmap_cancel(&flist); error1: xfs_iunlock(ip, XFS_ILOCK_EXCL); error0: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - ASSERT(ip->i_df.if_ext_max == - XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); return error; } @@ -3753,42 +3618,122 @@ xfs_bmap_last_before( return 0; } +STATIC int +xfs_bmap_last_extent( + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *rec, + int *is_empty) +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + int error; + int nextents; + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + + nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *is_empty = 1; + return 0; + } + + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); + *is_empty = 0; + return 0; +} + +/* + * Check the last inode extent to determine whether this allocation will result + * in blocks being allocated at the end of the file. When we allocate new data + * blocks at the end of the file which do not start at the previous data block, + * we will try to align the new blocks at stripe unit boundaries. + * + * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be + * at, or past the EOF. + */ +STATIC int +xfs_bmap_isaeof( + struct xfs_bmalloca *bma, + int whichfork) +{ + struct xfs_bmbt_irec rec; + int is_empty; + int error; + + bma->aeof = 0; + error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, + &is_empty); + if (error || is_empty) + return error; + + /* + * Check if we are allocation or past the last extent, or at least into + * the last delayed allocated extent. + */ + bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || + (bma->offset >= rec.br_startoff && + isnullstartblock(rec.br_startblock)); + return 0; +} + +/* + * Check if the endoff is outside the last extent. If so the caller will grow + * the allocation to a stripe unit boundary. All offsets are considered outside + * the end of file for an empty fork, so 1 is returned in *eof in that case. + */ +int +xfs_bmap_eof( + struct xfs_inode *ip, + xfs_fileoff_t endoff, + int whichfork, + int *eof) +{ + struct xfs_bmbt_irec rec; + int error; + + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); + if (error || *eof) + return error; + + *eof = endoff >= rec.br_startoff + rec.br_blockcount; + return 0; +} + /* * Returns the file-relative block number of the first block past eof in * the file. This is not based on i_size, it is based on the extent records. * Returns 0 for local files, as they do not have extent records. */ -int /* error */ +int xfs_bmap_last_offset( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_fileoff_t *last_block, /* last block */ - int whichfork) /* data or attr fork */ + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_fileoff_t *last_block, + int whichfork) { - xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ - int error; /* error return value */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t nextents; /* number of extent entries */ + struct xfs_bmbt_irec rec; + int is_empty; + int error; + + *last_block = 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) + return 0; if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) return XFS_ERROR(EIO); - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - *last_block = 0; - return 0; - } - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) + + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); + if (error || is_empty) return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (!nextents) { - *last_block = 0; - return 0; - } - ep = xfs_iext_get_ext(ifp, nextents - 1); - *last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep); + + *last_block = rec.br_startoff + rec.br_blockcount; return 0; } @@ -3808,11 +3753,8 @@ xfs_bmap_one_block( xfs_bmbt_irec_t s; /* internal version of extent */ #ifndef DEBUG - if (whichfork == XFS_DATA_FORK) { - return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ? - (ip->i_size == ip->i_mount->m_sb.sb_blocksize) : - (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); - } + if (whichfork == XFS_DATA_FORK) + return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; #endif /* !DEBUG */ if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) return 0; @@ -3824,7 +3766,7 @@ xfs_bmap_one_block( xfs_bmbt_get_all(ep, &s); rval = s.br_startoff == 0 && s.br_blockcount == 1; if (rval && whichfork == XFS_DATA_FORK) - ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize); + ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); return rval; } @@ -3836,7 +3778,7 @@ xfs_bmap_sanity_check( { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC || + if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) || be16_to_cpu(block->bb_level) != level || be16_to_cpu(block->bb_numrecs) == 0 || be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) @@ -3890,8 +3832,9 @@ xfs_bmap_read_extents( * pointer (leftmost) at each level. */ while (level-- > 0) { - if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, - XFS_BMAP_BTREE_REF))) + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); + if (error) return error; block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( @@ -3918,16 +3861,14 @@ xfs_bmap_read_extents( xfs_extnum_t num_recs; xfs_extnum_t start; - num_recs = xfs_btree_get_numrecs(block); if (unlikely(i + num_recs > room)) { ASSERT(i + num_recs <= room); - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + xfs_warn(ip->i_mount, "corrupt dinode %Lu, (btree extents).", (unsigned long long) ip->i_ino); - XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", - XFS_ERRLEVEL_LOW, - ip->i_mount); + XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", + XFS_ERRLEVEL_LOW, ip->i_mount, block); goto error0; } XFS_WANT_CORRUPTED_GOTO( @@ -3938,7 +3879,8 @@ xfs_bmap_read_extents( */ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); if (nextbno != NULLFSBLOCK) - xfs_btree_reada_bufl(mp, nextbno, 1); + xfs_btree_reada_bufl(mp, nextbno, 1, + &xfs_bmbt_buf_ops); /* * Copy records into the extent records. */ @@ -3970,8 +3912,9 @@ xfs_bmap_read_extents( */ if (bno == NULLFSBLOCK) break; - if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, - XFS_BMAP_BTREE_REF))) + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); + if (error) return error; block = XFS_BUF_TO_BLOCK(bp); } @@ -4042,9 +3985,8 @@ xfs_bmap_validate_ret( ASSERT(i == 0 || mval[i - 1].br_startoff + mval[i - 1].br_blockcount == mval[i].br_startoff); - if ((flags & XFS_BMAPI_WRITE) && !(flags & XFS_BMAPI_DELAY)) - ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && - mval[i].br_startblock != HOLESTARTBLOCK); + ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && + mval[i].br_startblock != HOLESTARTBLOCK); ASSERT(mval[i].br_state == XFS_EXT_NORM || mval[i].br_state == XFS_EXT_UNWRITTEN); } @@ -4053,395 +3995,166 @@ xfs_bmap_validate_ret( /* - * Map file blocks to filesystem blocks. - * File range is given by the bno/len pair. - * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set) - * into a hole or past eof. - * Only allocates blocks from a single allocation group, - * to avoid locking problems. - * The returned value in "firstblock" from the first call in a transaction - * must be remembered and presented to subsequent calls in "firstblock". - * An upper bound for the number of blocks to be allocated is supplied to - * the first call in "total"; if no allocation group has that many free - * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). + * Trim the returned map to the required bounds */ -int /* error */ -xfs_bmapi( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting file offs. mapped */ - xfs_filblks_t len, /* length to map in file */ - int flags, /* XFS_BMAPI_... */ - xfs_fsblock_t *firstblock, /* first allocated block - controls a.g. for allocs */ - xfs_extlen_t total, /* total blocks needed */ - xfs_bmbt_irec_t *mval, /* output: map values */ - int *nmap, /* i/o: mval size/count */ - xfs_bmap_free_t *flist) /* i/o: list extents to free */ +STATIC void +xfs_bmapi_trim_map( + struct xfs_bmbt_irec *mval, + struct xfs_bmbt_irec *got, + xfs_fileoff_t *bno, + xfs_filblks_t len, + xfs_fileoff_t obno, + xfs_fileoff_t end, + int n, + int flags) { - xfs_fsblock_t abno; /* allocated block number */ - xfs_extlen_t alen; /* allocated extent length */ - xfs_fileoff_t aoff; /* allocated file offset */ - xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */ - xfs_btree_cur_t *cur; /* bmap btree cursor */ - xfs_fileoff_t end; /* end of mapped file region */ - int eof; /* we've hit the end of extents */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - int error; /* error return */ - xfs_bmbt_irec_t got; /* current file extent record */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extlen_t indlen; /* indirect blocks length */ - xfs_extnum_t lastx; /* last useful extent number */ - int logflags; /* flags for transaction logging */ - xfs_extlen_t minleft; /* min blocks left after allocation */ - xfs_extlen_t minlen; /* min allocation size */ - xfs_mount_t *mp; /* xfs mount structure */ - int n; /* current extent index */ - int nallocs; /* number of extents alloc'd */ - xfs_extnum_t nextents; /* number of extents in file */ - xfs_fileoff_t obno; /* old block number (offset) */ - xfs_bmbt_irec_t prev; /* previous file extent record */ - int tmp_logflags; /* temp flags holder */ - int whichfork; /* data or attr fork */ - char inhole; /* current location is hole in file */ - char wasdelay; /* old extent was delayed */ - char wr; /* this is a write request */ - char rt; /* this is a realtime file */ -#ifdef DEBUG - xfs_fileoff_t orig_bno; /* original block number value */ - int orig_flags; /* original flags arg value */ - xfs_filblks_t orig_len; /* original value of len arg */ - xfs_bmbt_irec_t *orig_mval; /* original value of mval */ - int orig_nmap; /* original value of *nmap */ - - orig_bno = bno; - orig_len = len; - orig_flags = flags; - orig_mval = mval; - orig_nmap = *nmap; -#endif - ASSERT(*nmap >= 1); - ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE)); - whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - mp = ip->i_mount; - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmapi", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + if ((flags & XFS_BMAPI_ENTIRE) || + got->br_startoff + got->br_blockcount <= obno) { + *mval = *got; + if (isnullstartblock(got->br_startblock)) + mval->br_startblock = DELAYSTARTBLOCK; + return; } - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(ifp->if_ext_max == - XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); - if ((wr = (flags & XFS_BMAPI_WRITE)) != 0) - XFS_STATS_INC(xs_blk_mapw); + + if (obno > *bno) + *bno = obno; + ASSERT((*bno >= obno) || (n == 0)); + ASSERT(*bno < end); + mval->br_startoff = *bno; + if (isnullstartblock(got->br_startblock)) + mval->br_startblock = DELAYSTARTBLOCK; else - XFS_STATS_INC(xs_blk_mapr); + mval->br_startblock = got->br_startblock + + (*bno - got->br_startoff); /* - * IGSTATE flag is used to combine extents which - * differ only due to the state of the extents. - * This technique is used from xfs_getbmap() - * when the caller does not wish to see the - * separation (which is the default). - * - * This technique is also used when writing a - * buffer which has been partially written, - * (usually by being flushed during a chunkread), - * to ensure one write takes place. This also - * prevents a change in the xfs inode extents at - * this time, intentionally. This change occurs - * on completion of the write operation, in - * xfs_strat_comp(), where the xfs_bmapi() call - * is transactioned, and the extents combined. + * Return the minimum of what we got and what we asked for for + * the length. We can use the len variable here because it is + * modified below and we could have been there before coming + * here if the first part of the allocation didn't overlap what + * was asked for. */ - if ((flags & XFS_BMAPI_IGSTATE) && wr) /* if writing unwritten space */ - wr = 0; /* no allocations are allowed */ - ASSERT(wr || !(flags & XFS_BMAPI_DELAY)); - logflags = 0; - nallocs = 0; - cur = NULL; - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - ASSERT(wr && tp); - if ((error = xfs_bmap_local_to_extents(tp, ip, - firstblock, total, &logflags, whichfork))) - goto error0; - } - if (wr && *firstblock == NULLFSBLOCK) { - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) - minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; - else - minleft = 1; - } else - minleft = 0; - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - goto error0; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - n = 0; - end = bno + len; - obno = bno; - bma.ip = NULL; + mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, + got->br_blockcount - (*bno - got->br_startoff)); + mval->br_state = got->br_state; + ASSERT(mval->br_blockcount <= len); + return; +} - while (bno < end && n < *nmap) { - /* - * Reading past eof, act as though there's a hole - * up to end. - */ - if (eof && !wr) - got.br_startoff = end; - inhole = eof || got.br_startoff > bno; - wasdelay = wr && !inhole && !(flags & XFS_BMAPI_DELAY) && - isnullstartblock(got.br_startblock); - /* - * First, deal with the hole before the allocated space - * that we found, if any. - */ - if (wr && (inhole || wasdelay)) { - /* - * For the wasdelay case, we could also just - * allocate the stuff asked for in this bmap call - * but that wouldn't be as good. - */ - if (wasdelay) { - alen = (xfs_extlen_t)got.br_blockcount; - aoff = got.br_startoff; - if (lastx != NULLEXTNUM && lastx) { - ep = xfs_iext_get_ext(ifp, lastx - 1); - xfs_bmbt_get_all(ep, &prev); - } - } else { - alen = (xfs_extlen_t) - XFS_FILBLKS_MIN(len, MAXEXTLEN); - if (!eof) - alen = (xfs_extlen_t) - XFS_FILBLKS_MIN(alen, - got.br_startoff - bno); - aoff = bno; - } - minlen = (flags & XFS_BMAPI_CONTIG) ? alen : 1; - if (flags & XFS_BMAPI_DELAY) { - xfs_extlen_t extsz; - - /* Figure out the extent size, adjust alen */ - extsz = xfs_get_extsz_hint(ip); - if (extsz) { - error = xfs_bmap_extsize_align(mp, - &got, &prev, extsz, - rt, eof, - flags&XFS_BMAPI_DELAY, - flags&XFS_BMAPI_CONVERT, - &aoff, &alen); - ASSERT(!error); - } +/* + * Update and validate the extent map to return + */ +STATIC void +xfs_bmapi_update_map( + struct xfs_bmbt_irec **map, + xfs_fileoff_t *bno, + xfs_filblks_t *len, + xfs_fileoff_t obno, + xfs_fileoff_t end, + int *n, + int flags) +{ + xfs_bmbt_irec_t *mval = *map; + + ASSERT((flags & XFS_BMAPI_ENTIRE) || + ((mval->br_startoff + mval->br_blockcount) <= end)); + ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || + (mval->br_startoff < obno)); + + *bno = mval->br_startoff + mval->br_blockcount; + *len = end - *bno; + if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { + /* update previous map with new information */ + ASSERT(mval->br_startblock == mval[-1].br_startblock); + ASSERT(mval->br_blockcount > mval[-1].br_blockcount); + ASSERT(mval->br_state == mval[-1].br_state); + mval[-1].br_blockcount = mval->br_blockcount; + mval[-1].br_state = mval->br_state; + } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && + mval[-1].br_startblock != DELAYSTARTBLOCK && + mval[-1].br_startblock != HOLESTARTBLOCK && + mval->br_startblock == mval[-1].br_startblock + + mval[-1].br_blockcount && + ((flags & XFS_BMAPI_IGSTATE) || + mval[-1].br_state == mval->br_state)) { + ASSERT(mval->br_startoff == + mval[-1].br_startoff + mval[-1].br_blockcount); + mval[-1].br_blockcount += mval->br_blockcount; + } else if (*n > 0 && + mval->br_startblock == DELAYSTARTBLOCK && + mval[-1].br_startblock == DELAYSTARTBLOCK && + mval->br_startoff == + mval[-1].br_startoff + mval[-1].br_blockcount) { + mval[-1].br_blockcount += mval->br_blockcount; + mval[-1].br_state = mval->br_state; + } else if (!((*n == 0) && + ((mval->br_startoff + mval->br_blockcount) <= + obno))) { + mval++; + (*n)++; + } + *map = mval; +} - if (rt) - extsz = alen / mp->m_sb.sb_rextsize; +/* + * Map file blocks to filesystem blocks without allocation. + */ +int +xfs_bmapi_read( + struct xfs_inode *ip, + xfs_fileoff_t bno, + xfs_filblks_t len, + struct xfs_bmbt_irec *mval, + int *nmap, + int flags) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec prev; + xfs_fileoff_t obno; + xfs_fileoff_t end; + xfs_extnum_t lastx; + int error; + int eof; + int n = 0; + int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; - /* - * Make a transaction-less quota reservation for - * delayed allocation blocks. This number gets - * adjusted later. We return if we haven't - * allocated blocks already inside this loop. - */ - error = xfs_trans_reserve_quota_nblks( - NULL, ip, (long)alen, 0, - rt ? XFS_QMOPT_RES_RTBLKS : - XFS_QMOPT_RES_REGBLKS); - if (error) { - if (n == 0) { - *nmap = 0; - ASSERT(cur == NULL); - return error; - } - break; - } + ASSERT(*nmap >= 1); + ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| + XFS_BMAPI_IGSTATE))); - /* - * Split changing sb for alen and indlen since - * they could be coming from different places. - */ - indlen = (xfs_extlen_t) - xfs_bmap_worst_indlen(ip, alen); - ASSERT(indlen > 0); - - if (rt) { - error = xfs_mod_incore_sb(mp, - XFS_SBS_FREXTENTS, - -((int64_t)extsz), (flags & - XFS_BMAPI_RSVBLOCKS)); - } else { - error = xfs_icsb_modify_counters(mp, - XFS_SBS_FDBLOCKS, - -((int64_t)alen), (flags & - XFS_BMAPI_RSVBLOCKS)); - } - if (!error) { - error = xfs_icsb_modify_counters(mp, - XFS_SBS_FDBLOCKS, - -((int64_t)indlen), (flags & - XFS_BMAPI_RSVBLOCKS)); - if (error && rt) - xfs_mod_incore_sb(mp, - XFS_SBS_FREXTENTS, - (int64_t)extsz, (flags & - XFS_BMAPI_RSVBLOCKS)); - else if (error) - xfs_icsb_modify_counters(mp, - XFS_SBS_FDBLOCKS, - (int64_t)alen, (flags & - XFS_BMAPI_RSVBLOCKS)); - } + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); + return XFS_ERROR(EFSCORRUPTED); + } - if (error) { - if (XFS_IS_QUOTA_ON(mp)) - /* unreserve the blocks now */ - (void) - xfs_trans_unreserve_quota_nblks( - NULL, ip, - (long)alen, 0, rt ? - XFS_QMOPT_RES_RTBLKS : - XFS_QMOPT_RES_REGBLKS); - break; - } + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); - ip->i_delayed_blks += alen; - abno = nullstartblock(indlen); - } else { - /* - * If first time, allocate and fill in - * once-only bma fields. - */ - if (bma.ip == NULL) { - bma.tp = tp; - bma.ip = ip; - bma.prevp = &prev; - bma.gotp = &got; - bma.total = total; - bma.userdata = 0; - } - /* Indicate if this is the first user data - * in the file, or just any user data. - */ - if (!(flags & XFS_BMAPI_METADATA)) { - bma.userdata = (aoff == 0) ? - XFS_ALLOC_INITIAL_USER_DATA : - XFS_ALLOC_USERDATA; - } - /* - * Fill in changeable bma fields. - */ - bma.eof = eof; - bma.firstblock = *firstblock; - bma.alen = alen; - bma.off = aoff; - bma.conv = !!(flags & XFS_BMAPI_CONVERT); - bma.wasdel = wasdelay; - bma.minlen = minlen; - bma.low = flist->xbf_low; - bma.minleft = minleft; - /* - * Only want to do the alignment at the - * eof if it is userdata and allocation length - * is larger than a stripe unit. - */ - if (mp->m_dalign && alen >= mp->m_dalign && - (!(flags & XFS_BMAPI_METADATA)) && - (whichfork == XFS_DATA_FORK)) { - if ((error = xfs_bmap_isaeof(ip, aoff, - whichfork, &bma.aeof))) - goto error0; - } else - bma.aeof = 0; - /* - * Call allocator. - */ - if ((error = xfs_bmap_alloc(&bma))) - goto error0; - /* - * Copy out result fields. - */ - abno = bma.rval; - if ((flist->xbf_low = bma.low)) - minleft = 0; - alen = bma.alen; - aoff = bma.off; - ASSERT(*firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *firstblock) == - XFS_FSB_TO_AGNO(mp, bma.firstblock) || - (flist->xbf_low && - XFS_FSB_TO_AGNO(mp, *firstblock) < - XFS_FSB_TO_AGNO(mp, bma.firstblock))); - *firstblock = bma.firstblock; - if (cur) - cur->bc_private.b.firstblock = - *firstblock; - if (abno == NULLFSBLOCK) - break; - if ((ifp->if_flags & XFS_IFBROOT) && !cur) { - cur = xfs_bmbt_init_cursor(mp, tp, - ip, whichfork); - cur->bc_private.b.firstblock = - *firstblock; - cur->bc_private.b.flist = flist; - } - /* - * Bump the number of extents we've allocated - * in this call. - */ - nallocs++; - } - if (cur) - cur->bc_private.b.flags = - wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0; - got.br_startoff = aoff; - got.br_startblock = abno; - got.br_blockcount = alen; - got.br_state = XFS_EXT_NORM; /* assume normal */ - /* - * Determine state of extent, and the filesystem. - * A wasdelay extent has been initialized, so - * shouldn't be flagged as unwritten. - */ - if (wr && xfs_sb_version_hasextflgbit(&mp->m_sb)) { - if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) - got.br_state = XFS_EXT_UNWRITTEN; - } - error = xfs_bmap_add_extent(ip, lastx, &cur, &got, - firstblock, flist, &tmp_logflags, - whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); - logflags |= tmp_logflags; - if (error) - goto error0; - lastx = ifp->if_lastex; - ep = xfs_iext_get_ext(ifp, lastx); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - xfs_bmbt_get_all(ep, &got); - ASSERT(got.br_startoff <= aoff); - ASSERT(got.br_startoff + got.br_blockcount >= - aoff + alen); -#ifdef DEBUG - if (flags & XFS_BMAPI_DELAY) { - ASSERT(isnullstartblock(got.br_startblock)); - ASSERT(startblockval(got.br_startblock) > 0); - } - ASSERT(got.br_state == XFS_EXT_NORM || - got.br_state == XFS_EXT_UNWRITTEN); -#endif - /* - * Fall down into the found allocated space case. - */ - } else if (inhole) { - /* - * Reading in a hole. - */ + XFS_STATS_INC(xs_blk_mapr); + + ifp = XFS_IFORK_PTR(ip, whichfork); + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, whichfork); + if (error) + return error; + } + + xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); + end = bno + len; + obno = bno; + + while (bno < end && n < *nmap) { + /* Reading past eof, act as though there's a hole up to end. */ + if (eof) + got.br_startoff = end; + if (got.br_startoff > bno) { + /* Reading in a hole. */ mval->br_startoff = bno; mval->br_startblock = HOLESTARTBLOCK; mval->br_blockcount = @@ -4453,193 +4166,624 @@ xfs_bmapi( n++; continue; } + + /* set up the extent map to return. */ + xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); + xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); + + /* If we're done, stop now. */ + if (bno >= end || n >= *nmap) + break; + + /* Else go on to the next record. */ + if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); + else + eof = 1; + } + *nmap = n; + return 0; +} + +STATIC int +xfs_bmapi_reserve_delalloc( + struct xfs_inode *ip, + xfs_fileoff_t aoff, + xfs_filblks_t len, + struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *prev, + xfs_extnum_t *lastx, + int eof) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + xfs_extlen_t alen; + xfs_extlen_t indlen; + char rt = XFS_IS_REALTIME_INODE(ip); + xfs_extlen_t extsz; + int error; + + alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); + if (!eof) + alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); + + /* Figure out the extent size, adjust alen */ + extsz = xfs_get_extsz_hint(ip); + if (extsz) { /* - * Then deal with the allocated space we found. + * Make sure we don't exceed a single extent length when we + * align the extent by reducing length we are going to + * allocate by the maximum amount extent size aligment may + * require. */ - ASSERT(ep != NULL); - if (!(flags & XFS_BMAPI_ENTIRE) && - (got.br_startoff + got.br_blockcount > obno)) { - if (obno > bno) - bno = obno; - ASSERT((bno >= obno) || (n == 0)); - ASSERT(bno < end); - mval->br_startoff = bno; - if (isnullstartblock(got.br_startblock)) { - ASSERT(!wr || (flags & XFS_BMAPI_DELAY)); - mval->br_startblock = DELAYSTARTBLOCK; - } else - mval->br_startblock = - got.br_startblock + - (bno - got.br_startoff); - /* - * Return the minimum of what we got and what we - * asked for for the length. We can use the len - * variable here because it is modified below - * and we could have been there before coming - * here if the first part of the allocation - * didn't overlap what was asked for. - */ - mval->br_blockcount = - XFS_FILBLKS_MIN(end - bno, got.br_blockcount - - (bno - got.br_startoff)); - mval->br_state = got.br_state; - ASSERT(mval->br_blockcount <= len); - } else { - *mval = got; - if (isnullstartblock(mval->br_startblock)) { - ASSERT(!wr || (flags & XFS_BMAPI_DELAY)); - mval->br_startblock = DELAYSTARTBLOCK; + alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); + error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, + 1, 0, &aoff, &alen); + ASSERT(!error); + } + + if (rt) + extsz = alen / mp->m_sb.sb_rextsize; + + /* + * Make a transaction-less quota reservation for delayed allocation + * blocks. This number gets adjusted later. We return if we haven't + * allocated blocks already inside this loop. + */ + error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, + rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + if (error) + return error; + + /* + * Split changing sb for alen and indlen since they could be coming + * from different places. + */ + indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); + ASSERT(indlen > 0); + + if (rt) { + error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, + -((int64_t)extsz), 0); + } else { + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + -((int64_t)alen), 0); + } + + if (error) + goto out_unreserve_quota; + + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + -((int64_t)indlen), 0); + if (error) + goto out_unreserve_blocks; + + + ip->i_delayed_blks += alen; + + got->br_startoff = aoff; + got->br_startblock = nullstartblock(indlen); + got->br_blockcount = alen; + got->br_state = XFS_EXT_NORM; + xfs_bmap_add_extent_hole_delay(ip, lastx, got); + + /* + * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay + * might have merged it into one of the neighbouring ones. + */ + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + + ASSERT(got->br_startoff <= aoff); + ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); + ASSERT(isnullstartblock(got->br_startblock)); + ASSERT(got->br_state == XFS_EXT_NORM); + return 0; + +out_unreserve_blocks: + if (rt) + xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); + else + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); +out_unreserve_quota: + if (XFS_IS_QUOTA_ON(mp)) + xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? + XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + return error; +} + +/* + * Map file blocks to filesystem blocks, adding delayed allocations as needed. + */ +int +xfs_bmapi_delay( + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting file offs. mapped */ + xfs_filblks_t len, /* length to map in file */ + struct xfs_bmbt_irec *mval, /* output: map values */ + int *nmap, /* i/o: mval size/count */ + int flags) /* XFS_BMAPI_... */ +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + struct xfs_bmbt_irec got; /* current file extent record */ + struct xfs_bmbt_irec prev; /* previous file extent record */ + xfs_fileoff_t obno; /* old block number (offset) */ + xfs_fileoff_t end; /* end of mapped file region */ + xfs_extnum_t lastx; /* last useful extent number */ + int eof; /* we've hit the end of extents */ + int n = 0; /* current extent index */ + int error = 0; + + ASSERT(*nmap >= 1); + ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); + ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); + return XFS_ERROR(EFSCORRUPTED); + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + XFS_STATS_INC(xs_blk_mapw); + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + if (error) + return error; + } + + xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev); + end = bno + len; + obno = bno; + + while (bno < end && n < *nmap) { + if (eof || got.br_startoff > bno) { + error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got, + &prev, &lastx, eof); + if (error) { + if (n == 0) { + *nmap = 0; + return error; + } + break; } } + /* set up the extent map to return. */ + xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); + xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); + + /* If we're done, stop now. */ + if (bno >= end || n >= *nmap) + break; + + /* Else go on to the next record. */ + prev = got; + if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); + else + eof = 1; + } + + *nmap = n; + return 0; +} + + +STATIC int +xfs_bmapi_allocate( + struct xfs_bmalloca *bma) +{ + struct xfs_mount *mp = bma->ip->i_mount; + int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); + int tmp_logflags = 0; + int error; + + ASSERT(bma->length > 0); + + /* + * For the wasdelay case, we could also just allocate the stuff asked + * for in this bmap call but that wouldn't be as good. + */ + if (bma->wasdel) { + bma->length = (xfs_extlen_t)bma->got.br_blockcount; + bma->offset = bma->got.br_startoff; + if (bma->idx != NULLEXTNUM && bma->idx) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), + &bma->prev); + } + } else { + bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); + if (!bma->eof) + bma->length = XFS_FILBLKS_MIN(bma->length, + bma->got.br_startoff - bma->offset); + } + + /* + * Indicate if this is the first user data in the file, or just any + * user data. + */ + if (!(bma->flags & XFS_BMAPI_METADATA)) { + bma->userdata = (bma->offset == 0) ? + XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; + } + + bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; + + /* + * Only want to do the alignment at the eof if it is userdata and + * allocation length is larger than a stripe unit. + */ + if (mp->m_dalign && bma->length >= mp->m_dalign && + !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { + error = xfs_bmap_isaeof(bma, whichfork); + if (error) + return error; + } + + if (bma->flags & XFS_BMAPI_STACK_SWITCH) + bma->stack_switch = 1; + + error = xfs_bmap_alloc(bma); + if (error) + return error; + + if (bma->flist->xbf_low) + bma->minleft = 0; + if (bma->cur) + bma->cur->bc_private.b.firstblock = *bma->firstblock; + if (bma->blkno == NULLFSBLOCK) + return 0; + if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { + bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); + bma->cur->bc_private.b.firstblock = *bma->firstblock; + bma->cur->bc_private.b.flist = bma->flist; + } + /* + * Bump the number of extents we've allocated + * in this call. + */ + bma->nallocs++; + + if (bma->cur) + bma->cur->bc_private.b.flags = + bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + + bma->got.br_startoff = bma->offset; + bma->got.br_startblock = bma->blkno; + bma->got.br_blockcount = bma->length; + bma->got.br_state = XFS_EXT_NORM; + + /* + * A wasdelay extent has been initialized, so shouldn't be flagged + * as unwritten. + */ + if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && + xfs_sb_version_hasextflgbit(&mp->m_sb)) + bma->got.br_state = XFS_EXT_UNWRITTEN; + + if (bma->wasdel) + error = xfs_bmap_add_extent_delay_real(bma); + else + error = xfs_bmap_add_extent_hole_real(bma, whichfork); + + bma->logflags |= tmp_logflags; + if (error) + return error; + + /* + * Update our extent pointer, given that xfs_bmap_add_extent_delay_real + * or xfs_bmap_add_extent_hole_real might have merged it into one of + * the neighbouring ones. + */ + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); + + ASSERT(bma->got.br_startoff <= bma->offset); + ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= + bma->offset + bma->length); + ASSERT(bma->got.br_state == XFS_EXT_NORM || + bma->got.br_state == XFS_EXT_UNWRITTEN); + return 0; +} + + +STATIC int +xfs_bmapi_convert_unwritten( + struct xfs_bmalloca *bma, + struct xfs_bmbt_irec *mval, + xfs_filblks_t len, + int flags) +{ + int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); + int tmp_logflags = 0; + int error; + + /* check if we need to do unwritten->real conversion */ + if (mval->br_state == XFS_EXT_UNWRITTEN && + (flags & XFS_BMAPI_PREALLOC)) + return 0; + + /* check if we need to do real->unwritten conversion */ + if (mval->br_state == XFS_EXT_NORM && + (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != + (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) + return 0; + + /* + * Modify (by adding) the state flag, if writing. + */ + ASSERT(mval->br_blockcount <= len); + if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { + bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, + bma->ip, whichfork); + bma->cur->bc_private.b.firstblock = *bma->firstblock; + bma->cur->bc_private.b.flist = bma->flist; + } + mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) + ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; + + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, + &bma->cur, mval, bma->firstblock, bma->flist, + &tmp_logflags); + bma->logflags |= tmp_logflags; + if (error) + return error; + + /* + * Update our extent pointer, given that + * xfs_bmap_add_extent_unwritten_real might have merged it into one + * of the neighbouring ones. + */ + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); + + /* + * We may have combined previously unwritten space with written space, + * so generate another request. + */ + if (mval->br_blockcount < len) + return EAGAIN; + return 0; +} + +/* + * Map file blocks to filesystem blocks, and allocate blocks or convert the + * extent state if necessary. Details behaviour is controlled by the flags + * parameter. Only allocates blocks from a single allocation group, to avoid + * locking problems. + * + * The returned value in "firstblock" from the first call in a transaction + * must be remembered and presented to subsequent calls in "firstblock". + * An upper bound for the number of blocks to be allocated is supplied to + * the first call in "total"; if no allocation group has that many free + * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). + */ +int +xfs_bmapi_write( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting file offs. mapped */ + xfs_filblks_t len, /* length to map in file */ + int flags, /* XFS_BMAPI_... */ + xfs_fsblock_t *firstblock, /* first allocated block + controls a.g. for allocs */ + xfs_extlen_t total, /* total blocks needed */ + struct xfs_bmbt_irec *mval, /* output: map values */ + int *nmap, /* i/o: mval size/count */ + struct xfs_bmap_free *flist) /* i/o: list extents to free */ +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp; + struct xfs_bmalloca bma = { 0 }; /* args for xfs_bmap_alloc */ + xfs_fileoff_t end; /* end of mapped file region */ + int eof; /* after the end of extents */ + int error; /* error return */ + int n; /* current extent index */ + xfs_fileoff_t obno; /* old block number (offset) */ + int whichfork; /* data or attr fork */ + char inhole; /* current location is hole in file */ + char wasdelay; /* old extent was delayed */ + +#ifdef DEBUG + xfs_fileoff_t orig_bno; /* original block number value */ + int orig_flags; /* original flags arg value */ + xfs_filblks_t orig_len; /* original value of len arg */ + struct xfs_bmbt_irec *orig_mval; /* original value of mval */ + int orig_nmap; /* original value of *nmap */ + + orig_bno = bno; + orig_len = len; + orig_flags = flags; + orig_mval = mval; + orig_nmap = *nmap; +#endif + + ASSERT(*nmap >= 1); + ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); + ASSERT(!(flags & XFS_BMAPI_IGSTATE)); + ASSERT(tp != NULL); + ASSERT(len > 0); + + whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); + return XFS_ERROR(EFSCORRUPTED); + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + ifp = XFS_IFORK_PTR(ip, whichfork); + + XFS_STATS_INC(xs_blk_mapw); + + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, + &bma.logflags, whichfork); + if (error) + goto error0; + } + + if (*firstblock == NULLFSBLOCK) { + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) + bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; + else + bma.minleft = 1; + } else { + bma.minleft = 0; + } + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + goto error0; + } + + xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got, + &bma.prev); + n = 0; + end = bno + len; + obno = bno; + + bma.tp = tp; + bma.ip = ip; + bma.total = total; + bma.userdata = 0; + bma.flist = flist; + bma.firstblock = firstblock; + + while (bno < end && n < *nmap) { + inhole = eof || bma.got.br_startoff > bno; + wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); + /* - * Check if writing previously allocated but - * unwritten extents. + * First, deal with the hole before the allocated space + * that we found, if any. */ - if (wr && - ((mval->br_state == XFS_EXT_UNWRITTEN && - ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) || - (mval->br_state == XFS_EXT_NORM && - ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) == - (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) { + if (inhole || wasdelay) { + bma.eof = eof; + bma.conv = !!(flags & XFS_BMAPI_CONVERT); + bma.wasdel = wasdelay; + bma.offset = bno; + bma.flags = flags; + /* - * Modify (by adding) the state flag, if writing. + * There's a 32/64 bit type mismatch between the + * allocation length request (which can be 64 bits in + * length) and the bma length request, which is + * xfs_extlen_t and therefore 32 bits. Hence we have to + * check for 32-bit overflows and handle them here. */ - ASSERT(mval->br_blockcount <= len); - if ((ifp->if_flags & XFS_IFBROOT) && !cur) { - cur = xfs_bmbt_init_cursor(mp, - tp, ip, whichfork); - cur->bc_private.b.firstblock = - *firstblock; - cur->bc_private.b.flist = flist; - } - mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) - ? XFS_EXT_NORM - : XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, mval, - firstblock, flist, &tmp_logflags, - whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); - logflags |= tmp_logflags; + if (len > (xfs_filblks_t)MAXEXTLEN) + bma.length = MAXEXTLEN; + else + bma.length = len; + + ASSERT(len > 0); + ASSERT(bma.length > 0); + error = xfs_bmapi_allocate(&bma); if (error) goto error0; - lastx = ifp->if_lastex; - ep = xfs_iext_get_ext(ifp, lastx); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - xfs_bmbt_get_all(ep, &got); - /* - * We may have combined previously unwritten - * space with written space, so generate - * another request. - */ - if (mval->br_blockcount < len) - continue; + if (bma.blkno == NULLFSBLOCK) + break; } - ASSERT((flags & XFS_BMAPI_ENTIRE) || - ((mval->br_startoff + mval->br_blockcount) <= end)); - ASSERT((flags & XFS_BMAPI_ENTIRE) || - (mval->br_blockcount <= len) || - (mval->br_startoff < obno)); - bno = mval->br_startoff + mval->br_blockcount; - len = end - bno; - if (n > 0 && mval->br_startoff == mval[-1].br_startoff) { - ASSERT(mval->br_startblock == mval[-1].br_startblock); - ASSERT(mval->br_blockcount > mval[-1].br_blockcount); - ASSERT(mval->br_state == mval[-1].br_state); - mval[-1].br_blockcount = mval->br_blockcount; - mval[-1].br_state = mval->br_state; - } else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK && - mval[-1].br_startblock != DELAYSTARTBLOCK && - mval[-1].br_startblock != HOLESTARTBLOCK && - mval->br_startblock == - mval[-1].br_startblock + mval[-1].br_blockcount && - ((flags & XFS_BMAPI_IGSTATE) || - mval[-1].br_state == mval->br_state)) { - ASSERT(mval->br_startoff == - mval[-1].br_startoff + mval[-1].br_blockcount); - mval[-1].br_blockcount += mval->br_blockcount; - } else if (n > 0 && - mval->br_startblock == DELAYSTARTBLOCK && - mval[-1].br_startblock == DELAYSTARTBLOCK && - mval->br_startoff == - mval[-1].br_startoff + mval[-1].br_blockcount) { - mval[-1].br_blockcount += mval->br_blockcount; - mval[-1].br_state = mval->br_state; - } else if (!((n == 0) && - ((mval->br_startoff + mval->br_blockcount) <= - obno))) { - mval++; - n++; - } + /* Deal with the allocated space we found. */ + xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, + end, n, flags); + + /* Execute unwritten extent conversion if necessary */ + error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); + if (error == EAGAIN) + continue; + if (error) + goto error0; + + /* update the extent map to return */ + xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); + /* * If we're done, stop now. Stop when we've allocated * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise * the transaction may get too big. */ - if (bno >= end || n >= *nmap || nallocs >= *nmap) + if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) break; - /* - * Else go on to the next record. - */ - ep = xfs_iext_get_ext(ifp, ++lastx); - prev = got; - if (lastx >= nextents) + + /* Else go on to the next record. */ + bma.prev = bma.got; + if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), + &bma.got); + } else eof = 1; - else - xfs_bmbt_get_all(ep, &got); } - ifp->if_lastex = lastx; *nmap = n; + /* * Transform from btree to extents, give it cur. */ - if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && - XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { - ASSERT(wr && cur); - error = xfs_bmap_btree_to_extents(tp, ip, cur, + if (xfs_bmap_wants_extents(ip, whichfork)) { + int tmp_logflags = 0; + + ASSERT(bma.cur); + error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &tmp_logflags, whichfork); - logflags |= tmp_logflags; + bma.logflags |= tmp_logflags; if (error) goto error0; } - ASSERT(ifp->if_ext_max == - XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || - XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); + XFS_IFORK_NEXTENTS(ip, whichfork) > + XFS_IFORK_MAXEXT(ip, whichfork)); error = 0; error0: /* * Log everything. Do this after conversion, there's no point in * logging the extent records if we've converted to btree format. */ - if ((logflags & xfs_ilog_fext(whichfork)) && + if ((bma.logflags & xfs_ilog_fext(whichfork)) && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - logflags &= ~xfs_ilog_fext(whichfork); - else if ((logflags & xfs_ilog_fbroot(whichfork)) && + bma.logflags &= ~xfs_ilog_fext(whichfork); + else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) - logflags &= ~xfs_ilog_fbroot(whichfork); + bma.logflags &= ~xfs_ilog_fbroot(whichfork); /* * Log whatever the flags say, even if error. Otherwise we might miss * detecting a case where the data is changed, there's an error, * and it's not logged so we don't shutdown when we should. */ - if (logflags) { - ASSERT(tp && wr); - xfs_trans_log_inode(tp, ip, logflags); - } - if (cur) { + if (bma.logflags) + xfs_trans_log_inode(tp, ip, bma.logflags); + + if (bma.cur) { if (!error) { ASSERT(*firstblock == NULLFSBLOCK || XFS_FSB_TO_AGNO(mp, *firstblock) == XFS_FSB_TO_AGNO(mp, - cur->bc_private.b.firstblock) || + bma.cur->bc_private.b.firstblock) || (flist->xbf_low && XFS_FSB_TO_AGNO(mp, *firstblock) < XFS_FSB_TO_AGNO(mp, - cur->bc_private.b.firstblock))); - *firstblock = cur->bc_private.b.firstblock; + bma.cur->bc_private.b.firstblock))); + *firstblock = bma.cur->bc_private.b.firstblock; } - xfs_btree_del_cursor(cur, + xfs_btree_del_cursor(bma.cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); } if (!error) @@ -4648,59 +4792,6 @@ error0: return error; } -/* - * Map file blocks to filesystem blocks, simple version. - * One block (extent) only, read-only. - * For flags, only the XFS_BMAPI_ATTRFORK flag is examined. - * For the other flag values, the effect is as if XFS_BMAPI_METADATA - * was set and all the others were clear. - */ -int /* error */ -xfs_bmapi_single( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - int whichfork, /* data or attr fork */ - xfs_fsblock_t *fsb, /* output: mapped block */ - xfs_fileoff_t bno) /* starting file offs. mapped */ -{ - int eof; /* we've hit the end of extents */ - int error; /* error return */ - xfs_bmbt_irec_t got; /* current file extent record */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t lastx; /* last useful extent number */ - xfs_bmbt_irec_t prev; /* previous file extent record */ - - ifp = XFS_IFORK_PTR(ip, whichfork); - if (unlikely( - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)) { - XFS_ERROR_REPORT("xfs_bmapi_single", XFS_ERRLEVEL_LOW, - ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); - } - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return XFS_ERROR(EIO); - XFS_STATS_INC(xs_blk_mapr); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - (void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); - /* - * Reading past eof, act as though there's a hole - * up to end. - */ - if (eof || got.br_startoff > bno) { - *fsb = NULLFSBLOCK; - return 0; - } - ASSERT(!isnullstartblock(got.br_startblock)); - ASSERT(bno < got.br_startoff + got.br_blockcount); - *fsb = got.br_startblock + (bno - got.br_startoff); - ifp->if_lastex = lastx; - return 0; -} - /* * Unmap (remove) blocks from a file. * If nexts is nonzero then the number of extents to remove is limited to @@ -4739,7 +4830,6 @@ xfs_bunmapi( int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ - int rsvd; /* OK to allocate reserved blocks */ xfs_fsblock_t sum; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -4757,11 +4847,10 @@ xfs_bunmapi( mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; + ASSERT(len > 0); ASSERT(nexts >= 0); - ASSERT(ifp->if_ext_max == - XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; @@ -4795,6 +4884,15 @@ xfs_bunmapi( cur->bc_private.b.flags = 0; } else cur = NULL; + + if (isrt) { + /* + * Synchronize by locking the bitmap inode. + */ + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); + } + extno = 0; while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && (nexts == 0 || extno < nexts)) { @@ -4873,9 +4971,9 @@ xfs_bunmapi( del.br_blockcount = mod; } del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, &del, - firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + error = xfs_bmap_add_extent_unwritten_real(tp, ip, + &lastx, &cur, &del, firstblock, flist, + &logflags); if (error) goto error0; goto nodelete; @@ -4901,9 +4999,12 @@ xfs_bunmapi( */ ASSERT(bno >= del.br_blockcount); bno -= del.br_blockcount; - if (bno < got.br_startoff) { - if (--lastx >= 0) - xfs_bmbt_get_all(--ep, &got); + if (got.br_startoff > bno) { + if (--lastx >= 0) { + ep = xfs_iext_get_ext(ifp, + lastx); + xfs_bmbt_get_all(ep, &got); + } } continue; } else if (del.br_state == XFS_EXT_UNWRITTEN) { @@ -4927,18 +5028,19 @@ xfs_bunmapi( prev.br_startoff = start; } prev.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx - 1, &cur, - &prev, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + lastx--; + error = xfs_bmap_add_extent_unwritten_real(tp, + ip, &lastx, &cur, &prev, + firstblock, flist, &logflags); if (error) goto error0; goto nodelete; } else { ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, - &del, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + error = xfs_bmap_add_extent_unwritten_real(tp, + ip, &lastx, &cur, &del, + firstblock, flist, &logflags); if (error) goto error0; goto nodelete; @@ -4953,13 +5055,13 @@ xfs_bunmapi( rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - (int64_t)rtexts, rsvd); + (int64_t)rtexts, 0); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)del.br_blockcount, rsvd); + (int64_t)del.br_blockcount, 0); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); @@ -4983,46 +5085,43 @@ xfs_bunmapi( */ if (!wasdel && xfs_trans_get_block_res(tp) == 0 && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max && + XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ + XFS_IFORK_MAXEXT(ip, whichfork) && del.br_startoff > got.br_startoff && del.br_startoff + del.br_blockcount < got.br_startoff + got.br_blockcount) { error = XFS_ERROR(ENOSPC); goto error0; } - error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, - &tmp_logflags, whichfork, rsvd); + error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, + &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; bno = del.br_startoff - 1; nodelete: - lastx = ifp->if_lastex; /* * If not done go on to the next (previous) record. - * Reset ep in case the extents array was re-alloced. */ - ep = xfs_iext_get_ext(ifp, lastx); if (bno != (xfs_fileoff_t)-1 && bno >= start) { - if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) || - xfs_bmbt_get_startoff(ep) > bno) { - if (--lastx >= 0) - ep = xfs_iext_get_ext(ifp, lastx); - } - if (lastx >= 0) + if (lastx >= 0) { + ep = xfs_iext_get_ext(ifp, lastx); + if (xfs_bmbt_get_startoff(ep) > bno) { + if (--lastx >= 0) + ep = xfs_iext_get_ext(ifp, + lastx); + } xfs_bmbt_get_all(ep, &got); + } extno++; } } - ifp->if_lastex = lastx; *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; - ASSERT(ifp->if_ext_max == - XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + /* * Convert to a btree if necessary. */ - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) { + if (xfs_bmap_needs_btree(ip, whichfork)) { ASSERT(cur == NULL); error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, &tmp_logflags, whichfork); @@ -5033,8 +5132,7 @@ nodelete: /* * transform from btree to extents, give it cur */ - else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && - XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { + else if (xfs_bmap_wants_extents(ip, whichfork)) { ASSERT(cur != NULL); error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, whichfork); @@ -5045,8 +5143,6 @@ nodelete: /* * transform from extents to local? */ - ASSERT(ifp->if_ext_max == - XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); error = 0; error0: /* @@ -5076,88 +5172,252 @@ error0: return error; } +#ifdef DEBUG +STATIC struct xfs_buf * +xfs_bmap_get_bp( + struct xfs_btree_cur *cur, + xfs_fsblock_t bno) +{ + struct xfs_log_item_desc *lidp; + int i; + + if (!cur) + return NULL; + + for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { + if (!cur->bc_bufs[i]) + break; + if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) + return cur->bc_bufs[i]; + } + + /* Chase down all the log items to see if the bp is there */ + list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) { + struct xfs_buf_log_item *bip; + bip = (struct xfs_buf_log_item *)lidp->lid_item; + if (bip->bli_item.li_type == XFS_LI_BUF && + XFS_BUF_ADDR(bip->bli_buf) == bno) + return bip->bli_buf; + } + + return NULL; +} + +STATIC void +xfs_check_block( + struct xfs_btree_block *block, + xfs_mount_t *mp, + int root, + short sz) +{ + int i, j, dmxr; + __be64 *pp, *thispa; /* pointer to block address */ + xfs_bmbt_key_t *prevp, *keyp; + + ASSERT(be16_to_cpu(block->bb_level) > 0); + + prevp = NULL; + for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { + dmxr = mp->m_bmap_dmxr[0]; + keyp = XFS_BMBT_KEY_ADDR(mp, block, i); + + if (prevp) { + ASSERT(be64_to_cpu(prevp->br_startoff) < + be64_to_cpu(keyp->br_startoff)); + } + prevp = keyp; + + /* + * Compare the block numbers to see if there are dups. + */ + if (root) + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); + else + pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); + + for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { + if (root) + thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); + else + thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); + if (*thispa == *pp) { + xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", + __func__, j, i, + (unsigned long long)be64_to_cpu(*thispa)); + panic("%s: ptrs are equal in node\n", + __func__); + } + } + } +} + /* - * Check the last inode extent to determine whether this allocation will result - * in blocks being allocated at the end of the file. When we allocate new data - * blocks at the end of the file which do not start at the previous data block, - * we will try to align the new blocks at stripe unit boundaries. + * Check that the extents for the inode ip are in the right order in all + * btree leaves. */ -STATIC int /* error */ -xfs_bmap_isaeof( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fileoff_t off, /* file offset in fsblocks */ - int whichfork, /* data or attribute fork */ - char *aeof) /* return value */ + +STATIC void +xfs_bmap_check_leaf_extents( + xfs_btree_cur_t *cur, /* btree cursor or null */ + xfs_inode_t *ip, /* incore inode pointer */ + int whichfork) /* data or attr fork */ { - int error; /* error return value */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_rec_host_t *lastrec; /* extent record pointer */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_bmbt_irec_t s; /* expanded extent record */ + struct xfs_btree_block *block; /* current btree block */ + xfs_fsblock_t bno; /* block # of "block" */ + xfs_buf_t *bp; /* buffer for "block" */ + int error; /* error return value */ + xfs_extnum_t i=0, j; /* index into the extents list */ + xfs_ifork_t *ifp; /* fork structure */ + int level; /* btree level, for checking */ + xfs_mount_t *mp; /* file system mount structure */ + __be64 *pp; /* pointer to block address */ + xfs_bmbt_rec_t *ep; /* pointer to current extent */ + xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ + xfs_bmbt_rec_t *nextp; /* pointer to next extent */ + int bp_release = 0; - ASSERT(whichfork == XFS_DATA_FORK); - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(NULL, ip, whichfork))) - return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { - *aeof = 1; - return 0; + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { + return; } + + bno = NULLFSBLOCK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + block = ifp->if_broot; /* - * Go to the last extent + * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ - lastrec = xfs_iext_get_ext(ifp, nextents - 1); - xfs_bmbt_get_all(lastrec, &s); + level = be16_to_cpu(block->bb_level); + ASSERT(level > 0); + xfs_check_block(block, mp, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + bno = be64_to_cpu(*pp); + + ASSERT(bno != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + /* - * Check we are allocating in the last extent (for delayed allocations) - * or past the last extent for non-delayed allocations. + * Go down the tree until leaf level is reached, following the first + * pointer (leftmost) at each level. */ - *aeof = (off >= s.br_startoff && - off < s.br_startoff + s.br_blockcount && - isnullstartblock(s.br_startblock)) || - off >= s.br_startoff + s.br_blockcount; - return 0; -} + while (level-- > 0) { + /* See if buf is in cur first */ + bp_release = 0; + bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); + if (!bp) { + bp_release = 1; + error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + goto error_norelse; + } + block = XFS_BUF_TO_BLOCK(bp); + XFS_WANT_CORRUPTED_GOTO( + xfs_bmap_sanity_check(mp, bp, level), + error0); + if (level == 0) + break; -/* - * Check if the endoff is outside the last extent. If so the caller will grow - * the allocation to a stripe unit boundary. - */ -int /* error */ -xfs_bmap_eof( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fileoff_t endoff, /* file offset in fsblocks */ - int whichfork, /* data or attribute fork */ - int *eof) /* result value */ -{ - xfs_fsblock_t blockcount; /* extent block count */ - int error; /* error return value */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_rec_host_t *lastrec; /* extent record pointer */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_fileoff_t startoff; /* extent starting file offset */ + /* + * Check this block for basic sanity (increasing keys and + * no duplicate blocks). + */ - ASSERT(whichfork == XFS_DATA_FORK); - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(NULL, ip, whichfork))) - return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { - *eof = 1; - return 0; + xfs_check_block(block, mp, 0, 0); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + bno = be64_to_cpu(*pp); + XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); + } } + + /* + * Here with bp and block set to the leftmost leaf node in the tree. + */ + i = 0; + /* - * Go to the last extent + * Loop over all leaf nodes checking that all extents are in the right order. */ - lastrec = xfs_iext_get_ext(ifp, nextents - 1); - startoff = xfs_bmbt_get_startoff(lastrec); - blockcount = xfs_bmbt_get_blockcount(lastrec); - *eof = endoff >= startoff + blockcount; - return 0; + for (;;) { + xfs_fsblock_t nextbno; + xfs_extnum_t num_recs; + + + num_recs = xfs_btree_get_numrecs(block); + + /* + * Read-ahead the next leaf block, if any. + */ + + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); + + /* + * Check all the extents to make sure they are OK. + * If we had a previous block, the last entry should + * conform with the first entry in this one. + */ + + ep = XFS_BMBT_REC_ADDR(mp, block, 1); + if (i) { + ASSERT(xfs_bmbt_disk_get_startoff(&last) + + xfs_bmbt_disk_get_blockcount(&last) <= + xfs_bmbt_disk_get_startoff(ep)); + } + for (j = 1; j < num_recs; j++) { + nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); + ASSERT(xfs_bmbt_disk_get_startoff(ep) + + xfs_bmbt_disk_get_blockcount(ep) <= + xfs_bmbt_disk_get_startoff(nextp)); + ep = nextp; + } + + last = *ep; + i += num_recs; + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); + } + bno = nextbno; + /* + * If we've reached the end, stop. + */ + if (bno == NULLFSBLOCK) + break; + + bp_release = 0; + bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); + if (!bp) { + bp_release = 1; + error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) + goto error_norelse; + } + block = XFS_BUF_TO_BLOCK(bp); + } + if (bp_release) { + bp_release = 0; + xfs_trans_brelse(NULL, bp); + } + return; + +error0: + xfs_warn(mp, "%s: at error0", __func__); + if (bp_release) + xfs_trans_brelse(NULL, bp); +error_norelse: + xfs_warn(mp, "%s: BAD after btree leaves for %d extents", + __func__, i); + panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); + return; } +#endif /* * Count fsblocks of the given fork. @@ -5229,7 +5489,9 @@ xfs_bmap_count_tree( struct xfs_btree_block *block, *nextblock; int numrecs; - if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) return error; *count += 1; block = XFS_BUF_TO_BLOCK(bp); @@ -5238,8 +5500,10 @@ xfs_bmap_count_tree( /* Not at node above leaves, count this level of nodes */ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); while (nextbno != NULLFSBLOCK) { - if ((error = xfs_btree_read_bufl(mp, tp, nextbno, - 0, &nbp, XFS_BMAP_BTREE_REF))) + error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) return error; *count += 1; nextblock = XFS_BUF_TO_BLOCK(nbp); @@ -5268,8 +5532,10 @@ xfs_bmap_count_tree( if (nextbno == NULLFSBLOCK) break; bno = nextbno; - if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, - XFS_BMAP_BTREE_REF))) + error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF, + &xfs_bmbt_buf_ops); + if (error) return error; *count += 1; block = XFS_BUF_TO_BLOCK(bp); @@ -5315,3 +5581,16 @@ xfs_bmap_disk_count_leaves( *count += xfs_bmbt_disk_get_blockcount(frp); } } + +/* + * Convert the given file system block to a disk block. We have to treat it + * differently based on whether the file is a real time file or not, because the + * bmap code does. + */ +xfs_daddr_t +xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) +{ + return (XFS_IS_REALTIME_INODE(ip) ? \ + (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ + XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); +} diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c index ff51fdda5..836f52f64 100644 --- a/libxfs/xfs_bmap_btree.c +++ b/libxfs/xfs_bmap_btree.c @@ -403,10 +403,10 @@ xfs_bmbt_to_bmdr( xfs_bmbt_key_t *tkp; __be64 *tpp; - ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); - ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO); - ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO); - ASSERT(be16_to_cpu(rblock->bb_level) > 0); + ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); + ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); + ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); + ASSERT(rblock->bb_level != 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); @@ -687,6 +687,67 @@ xfs_bmbt_key_diff( cur->bc_rec.b.br_startoff; } +static void +xfs_bmbt_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + unsigned int level; + int lblock_ok; /* block passes checks */ + + /* magic number and level verification. + * + * We don't know waht fork we belong to, so just verify that the level + * is less than the maximum of the two. Later checks will be more + * precise. + */ + level = be16_to_cpu(block->bb_level); + lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) && + level < MAX(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]); + + /* numrecs verification */ + lblock_ok = lblock_ok && + be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0]; + + /* sibling pointer verification */ + lblock_ok = lblock_ok && + block->bb_u.l.bb_leftsib && + (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || + XFS_FSB_SANITY_CHECK(mp, + be64_to_cpu(block->bb_u.l.bb_leftsib))) && + block->bb_u.l.bb_rightsib && + (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || + XFS_FSB_SANITY_CHECK(mp, + be64_to_cpu(block->bb_u.l.bb_rightsib))); + + if (!lblock_ok) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +static void +xfs_bmbt_read_verify( + struct xfs_buf *bp) +{ + xfs_bmbt_verify(bp); +} + +static void +xfs_bmbt_write_verify( + struct xfs_buf *bp) +{ + xfs_bmbt_verify(bp); +} + +const struct xfs_buf_ops xfs_bmbt_buf_ops = { + .verify_read = xfs_bmbt_read_verify, + .verify_write = xfs_bmbt_write_verify, +}; + + #ifdef DEBUG STATIC int xfs_bmbt_keys_inorder( @@ -815,7 +876,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, - + .buf_ops = &xfs_bmbt_buf_ops, #ifdef DEBUG .keys_inorder = xfs_bmbt_keys_inorder, .recs_inorder = xfs_bmbt_recs_inorder, diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c index 02854dbb7..c35269b67 100644 --- a/libxfs/xfs_btree.c +++ b/libxfs/xfs_btree.c @@ -48,11 +48,11 @@ xfs_btree_check_lblock( be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && block->bb_u.l.bb_leftsib && - (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || + (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) && block->bb_u.l.bb_rightsib && - (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || + (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))); if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, @@ -87,10 +87,10 @@ xfs_btree_check_sblock( be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || + (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && block->bb_u.s.bb_leftsib && - (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || + (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && block->bb_u.s.bb_rightsib; if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, @@ -250,16 +250,19 @@ xfs_btree_dup_cursor( for (i = 0; i < new->bc_nlevels; i++) { new->bc_ptrs[i] = cur->bc_ptrs[i]; new->bc_ra[i] = cur->bc_ra[i]; - if ((bp = cur->bc_bufs[i])) { - if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) { + bp = cur->bc_bufs[i]; + if (bp) { + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_BUF_ADDR(bp), mp->m_bsize, + 0, &bp, + cur->bc_ops->buf_ops); + if (error) { xfs_btree_del_cursor(new, error); *ncur = NULL; return error; } new->bc_bufs[i] = bp; - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); } else new->bc_bufs[i] = NULL; } @@ -450,8 +453,7 @@ xfs_btree_get_bufl( ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); return bp; } @@ -474,8 +476,7 @@ xfs_btree_get_bufs( ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); return bp; } @@ -493,9 +494,9 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; + return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); else - return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; + return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); } /* @@ -596,69 +597,29 @@ xfs_btree_offsets( * Get a buffer for the block, return it read in. * Long-form addressing. */ -int /* error */ +int xfs_btree_read_bufl( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - uint lock, /* lock flags for read_buf */ - xfs_buf_t **bpp, /* buffer for fsbno */ - int refval) /* ref count value for buffer */ -{ - xfs_buf_t *bp; /* return value */ + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + uint lock, /* lock flags for read_buf */ + struct xfs_buf **bpp, /* buffer for fsbno */ + int refval, /* ref count value for buffer */ + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; /* return value */ xfs_daddr_t d; /* real disk block address */ - int error; + int error; ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, lock, &bp))) { - return error; - } - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (bp != NULL) { - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); - } - *bpp = bp; - return 0; -} - -/* - * Get a buffer for the block, return it read in. - * Short-form addressing. - */ -int /* error */ -xfs_btree_read_bufs( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock, /* lock flags for read_buf */ - xfs_buf_t **bpp, /* buffer for agno/agbno */ - int refval) /* ref count value for buffer */ -{ - xfs_buf_t *bp; /* return value */ - xfs_daddr_t d; /* real disk block address */ - int error; - - ASSERT(agno != NULLAGNUMBER); - ASSERT(agbno != NULLAGBLOCK); - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, lock, &bp))) { + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, + mp->m_bsize, lock, &bp, ops); + if (error) return error; - } - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (bp != NULL) { - switch (refval) { - case XFS_ALLOC_BTREE_REF: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); - break; - case XFS_INO_BTREE_REF: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval); - break; - } - } + ASSERT(!xfs_buf_geterror(bp)); + if (bp) + xfs_buf_set_ref(bp, refval); *bpp = bp; return 0; } @@ -674,12 +635,14 @@ xfs_btree_readahead_lblock( xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, left, 1); + xfs_btree_reada_bufl(cur->bc_mp, left, 1, + cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, right, 1); + xfs_btree_reada_bufl(cur->bc_mp, right, 1, + cur->bc_ops->buf_ops); rval++; } @@ -699,13 +662,13 @@ xfs_btree_readahead_sblock( if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - left, 1); + left, 1, cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - right, 1); + right, 1, cur->bc_ops->buf_ops); rval++; } @@ -762,14 +725,14 @@ xfs_btree_setbuf( b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) + if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) + if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } else { - if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK) + if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK) + if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } } @@ -780,9 +743,9 @@ xfs_btree_ptr_is_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return be64_to_cpu(ptr->l) == NULLDFSBNO; + return ptr->l == cpu_to_be64(NULLDFSBNO); else - return be32_to_cpu(ptr->s) == NULLAGBLOCK; + return ptr->s == cpu_to_be32(NULLAGBLOCK); } STATIC void @@ -843,18 +806,22 @@ xfs_btree_set_sibling( } } -STATIC void +void xfs_btree_init_block( - struct xfs_btree_cur *cur, - int level, - int numrecs, - struct xfs_btree_block *new) /* new block */ + struct xfs_mount *mp, + struct xfs_buf *bp, + __u32 magic, + __u16 level, + __u16 numrecs, + unsigned int flags) { - new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); + struct xfs_btree_block *new = XFS_BUF_TO_BLOCK(bp); + + new->bb_magic = cpu_to_be32(magic); new->bb_level = cpu_to_be16(level); new->bb_numrecs = cpu_to_be16(numrecs); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (flags & XFS_BTREE_LONG_PTRS) { new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); } else { @@ -863,6 +830,17 @@ xfs_btree_init_block( } } +STATIC void +xfs_btree_init_block_cur( + struct xfs_btree_cur *cur, + int level, + int numrecs, + struct xfs_buf *bp) +{ + xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum], + level, numrecs, cur->bc_flags); +} + /* * Return true if ptr is the last record in the btree and * we need to track updateѕ to this record. The decision @@ -908,12 +886,12 @@ xfs_btree_ptr_to_daddr( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO); + ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); } else { ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); - ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); + ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, be32_to_cpu(ptr->s)); @@ -928,13 +906,13 @@ xfs_btree_set_refs( switch (cur->bc_btnum) { case XFS_BTNUM_BNO: case XFS_BTNUM_CNT: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF); + xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); break; case XFS_BTNUM_INO: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF); + xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); break; case XFS_BTNUM_BMAP: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF); + xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); break; default: ASSERT(0); @@ -959,9 +937,10 @@ xfs_btree_get_buf_block( *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize, flags); - ASSERT(*bpp); - ASSERT(!XFS_BUF_GETERROR(*bpp)); + if (!*bpp) + return ENOMEM; + (*bpp)->b_ops = cur->bc_ops->buf_ops; *block = XFS_BUF_TO_BLOCK(*bpp); return 0; } @@ -988,20 +967,15 @@ xfs_btree_read_buf_block( d = xfs_btree_ptr_to_daddr(cur, ptr); error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, flags, bpp); + mp->m_bsize, flags, bpp, + cur->bc_ops->buf_ops); if (error) return error; - ASSERT(*bpp != NULL); - ASSERT(!XFS_BUF_GETERROR(*bpp)); - + ASSERT(!xfs_buf_geterror(*bpp)); xfs_btree_set_refs(cur, *bpp); *block = XFS_BUF_TO_BLOCK(*bpp); - - error = xfs_btree_check_block(cur, *block, level, *bpp); - if (error) - xfs_trans_brelse(cur->bc_tp, *bpp); - return error; + return 0; } /* @@ -2174,7 +2148,7 @@ xfs_btree_split( goto error0; /* Fill in the btree header for the new right block. */ - xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right); + xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp); /* * Split the entries between the old and the new block evenly. @@ -2483,7 +2457,7 @@ xfs_btree_new_root( nptr = 2; } /* Fill in the new block's btree header and log it. */ - xfs_btree_init_block(cur, cur->bc_nlevels, 2, new); + xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp); xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && !xfs_btree_ptr_is_null(cur, &rptr)); diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c index 3eb34d65c..a31d35380 100644 --- a/libxfs/xfs_da_btree.c +++ b/libxfs/xfs_da_btree.c @@ -62,14 +62,92 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, /* * Utility routines. */ -STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); -STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); -STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra); +STATIC uint xfs_da_node_lasthash(struct xfs_buf *bp, int *count); +STATIC int xfs_da_node_order(struct xfs_buf *node1_bp, + struct xfs_buf *node2_bp); STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_state_blk_t *save_blk); STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state); +static void +xfs_da_node_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_da_node_hdr *hdr = bp->b_addr; + int block_ok = 0; + + block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC); + block_ok = block_ok && + be16_to_cpu(hdr->level) > 0 && + be16_to_cpu(hdr->count) > 0 ; + if (!block_ok) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } + +} + +static void +xfs_da_node_write_verify( + struct xfs_buf *bp) +{ + xfs_da_node_verify(bp); +} + +/* + * leaf/node format detection on trees is sketchy, so a node read can be done on + * leaf level blocks when detection identifies the tree as a node format tree + * incorrectly. In this case, we need to swap the verifier to match the correct + * format of the block being read. + */ +static void +xfs_da_node_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DA_NODE_MAGIC: + xfs_da_node_verify(bp); + break; + case XFS_ATTR_LEAF_MAGIC: + bp->b_ops = &xfs_attr_leaf_buf_ops; + bp->b_ops->verify_read(bp); + return; + case XFS_DIR2_LEAFN_MAGIC: + bp->b_ops = &xfs_dir2_leafn_buf_ops; + bp->b_ops->verify_read(bp); + return; + default: + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, + mp, info); + xfs_buf_ioerror(bp, EFSCORRUPTED); + break; + } +} + +const struct xfs_buf_ops xfs_da_node_buf_ops = { + .verify_read = xfs_da_node_read_verify, + .verify_write = xfs_da_node_write_verify, +}; + + +int +xfs_da_node_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp, + int which_fork) +{ + return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, + which_fork, &xfs_da_node_buf_ops); +} + /*======================================================================== * Routines used for growing the Btree. *========================================================================*/ @@ -79,19 +157,21 @@ STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state); */ int xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, - xfs_dabuf_t **bpp, int whichfork) + struct xfs_buf **bpp, int whichfork) { xfs_da_intnode_t *node; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; xfs_trans_t *tp; + trace_xfs_da_node_create(args); + tp = args->trans; error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); if (error) return(error); ASSERT(bp != NULL); - node = bp->data; + node = bp->b_addr; node->hdr.info.forw = 0; node->hdr.info.back = 0; node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC); @@ -99,9 +179,10 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, node->hdr.count = 0; node->hdr.level = cpu_to_be16(level); - xfs_da_log_buf(tp, bp, + xfs_trans_log_buf(tp, bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); + bp->b_ops = &xfs_da_node_buf_ops; *bpp = bp; return(0); } @@ -115,9 +196,11 @@ xfs_da_split(xfs_da_state_t *state) { xfs_da_state_blk_t *oldblk, *newblk, *addblk; xfs_da_intnode_t *node; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int max, action, error, i; + trace_xfs_da_split(state->args); + /* * Walk back up the tree splitting/inserting/adjusting as necessary. * If we need to insert and there isn't room, split the node, then @@ -156,10 +239,12 @@ xfs_da_split(xfs_da_state_t *state) state->extravalid = 1; if (state->inleaf) { state->extraafter = 0; /* before newblk */ + trace_xfs_attr_leaf_split_before(state->args); error = xfs_attr_leaf_split(state, oldblk, &state->extrablk); } else { state->extraafter = 1; /* after newblk */ + trace_xfs_attr_leaf_split_after(state->args); error = xfs_attr_leaf_split(state, newblk, &state->extrablk); } @@ -176,7 +261,6 @@ xfs_da_split(xfs_da_state_t *state) case XFS_DA_NODE_MAGIC: error = xfs_da_node_split(state, oldblk, newblk, addblk, max - i, &action); - xfs_da_buf_done(addblk->bp); addblk->bp = NULL; if (error) return(error); /* GROT: dir is inconsistent */ @@ -194,13 +278,6 @@ xfs_da_split(xfs_da_state_t *state) * Update the btree to show the new hashval for this child. */ xfs_da_fixhashpath(state, &state->path); - /* - * If we won't need this block again, it's getting dropped - * from the active path by the loop control, so we need - * to mark it done now. - */ - if (i > 0 || !addblk) - xfs_da_buf_done(oldblk->bp); } if (!addblk) return(0); @@ -212,8 +289,6 @@ xfs_da_split(xfs_da_state_t *state) oldblk = &state->path.blk[0]; error = xfs_da_root_split(state, oldblk, addblk); if (error) { - xfs_da_buf_done(oldblk->bp); - xfs_da_buf_done(addblk->bp); addblk->bp = NULL; return(error); /* GROT: dir is inconsistent */ } @@ -225,7 +300,7 @@ xfs_da_split(xfs_da_state_t *state) * and the original block 0 could be at any position in the list. */ - node = oldblk->bp->data; + node = oldblk->bp->b_addr; if (node->hdr.info.forw) { if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { bp = addblk->bp; @@ -233,13 +308,13 @@ xfs_da_split(xfs_da_state_t *state) ASSERT(state->extravalid); bp = state->extrablk.bp; } - node = bp->data; + node = bp->b_addr; node->hdr.info.back = cpu_to_be32(oldblk->blkno); - xfs_da_log_buf(state->args->trans, bp, + xfs_trans_log_buf(state->args->trans, bp, XFS_DA_LOGRANGE(node, &node->hdr.info, sizeof(node->hdr.info))); } - node = oldblk->bp->data; + node = oldblk->bp->b_addr; if (node->hdr.info.back) { if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) { bp = addblk->bp; @@ -247,14 +322,12 @@ xfs_da_split(xfs_da_state_t *state) ASSERT(state->extravalid); bp = state->extrablk.bp; } - node = bp->data; + node = bp->b_addr; node->hdr.info.forw = cpu_to_be32(oldblk->blkno); - xfs_da_log_buf(state->args->trans, bp, + xfs_trans_log_buf(state->args->trans, bp, XFS_DA_LOGRANGE(node, &node->hdr.info, sizeof(node->hdr.info))); } - xfs_da_buf_done(oldblk->bp); - xfs_da_buf_done(addblk->bp); addblk->bp = NULL; return(0); } @@ -271,13 +344,15 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_da_intnode_t *node, *oldroot; xfs_da_args_t *args; xfs_dablk_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error, size; xfs_inode_t *dp; xfs_trans_t *tp; xfs_mount_t *mp; xfs_dir2_leaf_t *leaf; + trace_xfs_da_root_split(state->args); + /* * Copy the existing (incorrect) block from the root node position * to a free space somewhere. @@ -294,20 +369,21 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, if (error) return(error); ASSERT(bp != NULL); - node = bp->data; - oldroot = blk1->bp->data; - if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) { + node = bp->b_addr; + oldroot = blk1->bp->b_addr; + if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - (char *)oldroot); } else { - ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); leaf = (xfs_dir2_leaf_t *)oldroot; size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - (char *)leaf); } memcpy(node, oldroot, size); - xfs_da_log_buf(tp, bp, 0, size - 1); - xfs_da_buf_done(blk1->bp); + xfs_trans_log_buf(tp, bp, 0, size - 1); + + bp->b_ops = blk1->bp->b_ops; blk1->bp = bp; blk1->blkno = blkno; @@ -319,7 +395,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork); if (error) return(error); - node = bp->data; + node = bp->b_addr; node->btree[0].hashval = cpu_to_be32(blk1->hashval); node->btree[0].before = cpu_to_be32(blk1->blkno); node->btree[1].hashval = cpu_to_be32(blk2->hashval); @@ -327,7 +403,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, node->hdr.count = cpu_to_be16(2); #ifdef DEBUG - if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) { + if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { ASSERT(blk1->blkno >= mp->m_dirleafblk && blk1->blkno < mp->m_dirfreeblk); ASSERT(blk2->blkno >= mp->m_dirleafblk && @@ -336,10 +412,9 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, #endif /* Header is already logged by xfs_da_node_create */ - xfs_da_log_buf(tp, bp, + xfs_trans_log_buf(tp, bp, XFS_DA_LOGRANGE(node, node->btree, sizeof(xfs_da_node_entry_t) * 2)); - xfs_da_buf_done(bp); return(0); } @@ -358,8 +433,10 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, int newcount, error; int useextra; - node = oldblk->bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + trace_xfs_da_node_split(state->args); + + node = oldblk->bp->b_addr; + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); /* * With V2 dirs the extra block is data or freespace. @@ -405,7 +482,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, * * If we had double-split op below us, then add the extra block too. */ - node = oldblk->bp->data; + node = oldblk->bp->b_addr; if (oldblk->index <= be16_to_cpu(node->hdr.count)) { oldblk->index++; xfs_da_node_add(state, oldblk, addblk); @@ -444,8 +521,10 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, int count, tmp; xfs_trans_t *tp; - node1 = blk1->bp->data; - node2 = blk2->bp->data; + trace_xfs_da_node_rebalance(state->args); + + node1 = blk1->bp->b_addr; + node2 = blk2->bp->b_addr; /* * Figure out how many entries need to move, and in which direction. * Swap the nodes around if that makes it simpler. @@ -458,8 +537,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, node1 = node2; node2 = tmpnode; } - ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC); - ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2; if (count == 0) return; @@ -499,7 +578,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)]; memcpy(btree_d, btree_s, tmp); be16_add_cpu(&node1->hdr.count, count); - xfs_da_log_buf(tp, blk1->bp, + xfs_trans_log_buf(tp, blk1->bp, XFS_DA_LOGRANGE(node1, btree_d, tmp)); /* @@ -516,9 +595,9 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, /* * Log header of node 1 and all current bits of node 2. */ - xfs_da_log_buf(tp, blk1->bp, + xfs_trans_log_buf(tp, blk1->bp, XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr))); - xfs_da_log_buf(tp, blk2->bp, + xfs_trans_log_buf(tp, blk2->bp, XFS_DA_LOGRANGE(node2, &node2->hdr, sizeof(node2->hdr) + sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count))); @@ -527,8 +606,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, * Record the last hashval from each block for upward propagation. * (note: don't use the swapped node pointers) */ - node1 = blk1->bp->data; - node2 = blk2->bp->data; + node1 = blk1->bp->b_addr; + node2 = blk2->bp->b_addr; blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval); blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval); @@ -552,8 +631,10 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, xfs_da_node_entry_t *btree; int tmp; - node = oldblk->bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + trace_xfs_da_node_add(state->args); + + node = oldblk->bp->b_addr; + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); ASSERT(newblk->blkno != 0); if (state->args->whichfork == XFS_DATA_FORK) @@ -571,10 +652,10 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, } btree->hashval = cpu_to_be32(newblk->hashval); btree->before = cpu_to_be32(newblk->blkno); - xfs_da_log_buf(state->args->trans, oldblk->bp, + xfs_trans_log_buf(state->args->trans, oldblk->bp, XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree))); be16_add_cpu(&node->hdr.count, 1); - xfs_da_log_buf(state->args->trans, oldblk->bp, + xfs_trans_log_buf(state->args->trans, oldblk->bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); /* @@ -597,6 +678,8 @@ xfs_da_join(xfs_da_state_t *state) xfs_da_state_blk_t *drop_blk, *save_blk; int action, error; + trace_xfs_da_join(state->args); + action = 0; drop_blk = &state->path.blk[ state->path.active-1 ]; save_blk = &state->altpath.blk[ state->path.active-1 ]; @@ -670,6 +753,24 @@ xfs_da_join(xfs_da_state_t *state) return(error); } +#ifdef DEBUG +static void +xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) +{ + __be16 magic = blkinfo->magic; + + if (level == 1) { + ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + } else + ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(!blkinfo->forw); + ASSERT(!blkinfo->back); +} +#else /* !DEBUG */ +#define xfs_da_blkinfo_onlychild_validate(blkinfo, level) +#endif /* !DEBUG */ + /* * We have only one entry in the root. Copy the only remaining child of * the old root to block 0 as the new root node. @@ -678,18 +779,18 @@ STATIC int xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) { xfs_da_intnode_t *oldroot; - /* REFERENCED */ - xfs_da_blkinfo_t *blkinfo; xfs_da_args_t *args; xfs_dablk_t child; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; + trace_xfs_da_root_join(state->args); + args = state->args; ASSERT(args != NULL); ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); - oldroot = root_blk->bp->data; - ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC); + oldroot = root_blk->bp->b_addr; + ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); ASSERT(!oldroot->hdr.info.forw); ASSERT(!oldroot->hdr.info.back); @@ -705,22 +806,23 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) */ child = be32_to_cpu(oldroot->btree[0].before); ASSERT(child != 0); - error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp, + error = xfs_da_node_read(args->trans, args->dp, child, -1, &bp, args->whichfork); if (error) return(error); ASSERT(bp != NULL); - blkinfo = bp->data; - if (be16_to_cpu(oldroot->hdr.level) == 1) { - ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC || - be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC); - } else { - ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC); - } - ASSERT(!blkinfo->forw); - ASSERT(!blkinfo->back); - memcpy(root_blk->bp->data, bp->data, state->blocksize); - xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); + xfs_da_blkinfo_onlychild_validate(bp->b_addr, + be16_to_cpu(oldroot->hdr.level)); + + /* + * This could be copying a leaf back into the root block in the case of + * there only being a single leaf block left in the tree. Hence we have + * to update the b_ops pointer as well to match the buffer type change + * that could occur. + */ + memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); + root_blk->bp->b_ops = bp->b_ops; + xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); error = xfs_da_shrink_inode(args, child, bp); return(error); } @@ -742,7 +844,9 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) xfs_da_blkinfo_t *info; int count, forward, error, retval, i; xfs_dablk_t blkno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; + + trace_xfs_da_node_toosmall(state->args); /* * Check for the degenerate case of the block being over 50% full. @@ -750,8 +854,8 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) * to coalesce with a sibling. */ blk = &state->path.blk[ state->path.active-1 ]; - info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC); + info = blk->bp->b_addr; + ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); node = (xfs_da_intnode_t *)info; count = be16_to_cpu(node->hdr.count); if (count > (state->node_ents >> 1)) { @@ -800,7 +904,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) blkno = be32_to_cpu(info->back); if (blkno == 0) continue; - error = xfs_da_read_buf(state->args->trans, state->args->dp, + error = xfs_da_node_read(state->args->trans, state->args->dp, blkno, -1, &bp, state->args->whichfork); if (error) return(error); @@ -810,10 +914,10 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) count = state->node_ents; count -= state->node_ents >> 2; count -= be16_to_cpu(node->hdr.count); - node = bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + node = bp->b_addr; + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); count -= be16_to_cpu(node->hdr.count); - xfs_da_brelse(state->args->trans, bp); + xfs_trans_brelse(state->args->trans, bp); if (count >= 0) break; /* fits with at least 25% to spare */ } @@ -865,6 +969,8 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) xfs_dahash_t lasthash=0; int level, count; + trace_xfs_da_fixhashpath(state->args); + level = path->active-1; blk = &path->blk[ level ]; switch (blk->magic) { @@ -885,14 +991,14 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) break; } for (blk--, level--; level >= 0; blk--, level--) { - node = blk->bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + node = blk->bp->b_addr; + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); btree = &node->btree[ blk->index ]; if (be32_to_cpu(btree->hashval) == lasthash) break; blk->hashval = lasthash; btree->hashval = cpu_to_be32(lasthash); - xfs_da_log_buf(state->args->trans, blk->bp, + xfs_trans_log_buf(state->args->trans, blk->bp, XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); @@ -909,7 +1015,9 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) xfs_da_node_entry_t *btree; int tmp; - node = drop_blk->bp->data; + trace_xfs_da_node_remove(state->args); + + node = drop_blk->bp->b_addr; ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); ASSERT(drop_blk->index >= 0); @@ -921,15 +1029,15 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) tmp = be16_to_cpu(node->hdr.count) - drop_blk->index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); memmove(btree, btree + 1, tmp); - xfs_da_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, btree, tmp)); btree = &node->btree[be16_to_cpu(node->hdr.count)-1]; } memset((char *)btree, 0, sizeof(xfs_da_node_entry_t)); - xfs_da_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); be16_add_cpu(&node->hdr.count, -1); - xfs_da_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); /* @@ -952,10 +1060,12 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, int tmp; xfs_trans_t *tp; - drop_node = drop_blk->bp->data; - save_node = save_blk->bp->data; - ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); - ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + trace_xfs_da_node_unbalance(state->args); + + drop_node = drop_blk->bp->b_addr; + save_node = save_blk->bp->b_addr; + ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); tp = state->args->trans; /* @@ -970,13 +1080,13 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); memmove(btree, &save_node->btree[0], tmp); btree = &save_node->btree[0]; - xfs_da_log_buf(tp, save_blk->bp, + xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, btree, (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) * sizeof(xfs_da_node_entry_t))); } else { btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)]; - xfs_da_log_buf(tp, save_blk->bp, + xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, btree, be16_to_cpu(drop_node->hdr.count) * sizeof(xfs_da_node_entry_t))); @@ -989,7 +1099,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, memcpy(btree, &drop_node->btree[0], tmp); be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); - xfs_da_log_buf(tp, save_blk->bp, + xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, &save_node->hdr, sizeof(save_node->hdr))); @@ -1040,14 +1150,14 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) * Read the next node down in the tree. */ blk->blkno = blkno; - error = xfs_da_read_buf(args->trans, args->dp, blkno, + error = xfs_da_node_read(args->trans, args->dp, blkno, -1, &blk->bp, args->whichfork); if (error) { blk->blkno = 0; state->path.active--; return(error); } - curr = blk->bp->data; + curr = blk->bp->b_addr; blk->magic = be16_to_cpu(curr->magic); ASSERT(blk->magic == XFS_DA_NODE_MAGIC || blk->magic == XFS_DIR2_LEAFN_MAGIC || @@ -1057,7 +1167,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) * Search an intermediate node for a match. */ if (blk->magic == XFS_DA_NODE_MAGIC) { - node = blk->bp->data; + node = blk->bp->b_addr; max = be16_to_cpu(node->hdr.count); blk->hashval = be32_to_cpu(node->btree[max-1].hashval); @@ -1163,15 +1273,15 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, xfs_da_blkinfo_t *old_info, *new_info, *tmp_info; xfs_da_args_t *args; int before=0, error; - xfs_dabuf_t *bp; + struct xfs_buf *bp; /* * Set up environment. */ args = state->args; ASSERT(args != NULL); - old_info = old_blk->bp->data; - new_info = new_blk->bp->data; + old_info = old_blk->bp->b_addr; + new_info = new_blk->bp->b_addr; ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || old_blk->magic == XFS_DIR2_LEAFN_MAGIC || old_blk->magic == XFS_ATTR_LEAF_MAGIC); @@ -1198,48 +1308,48 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, /* * Link new block in before existing block. */ + trace_xfs_da_link_before(args); new_info->forw = cpu_to_be32(old_blk->blkno); new_info->back = old_info->back; if (old_info->back) { - error = xfs_da_read_buf(args->trans, args->dp, + error = xfs_da_node_read(args->trans, args->dp, be32_to_cpu(old_info->back), -1, &bp, args->whichfork); if (error) return(error); ASSERT(bp != NULL); - tmp_info = bp->data; + tmp_info = bp->b_addr; ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic)); ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); tmp_info->forw = cpu_to_be32(new_blk->blkno); - xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); - xfs_da_buf_done(bp); + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); } old_info->back = cpu_to_be32(new_blk->blkno); } else { /* * Link new block in after existing block. */ + trace_xfs_da_link_after(args); new_info->forw = old_info->forw; new_info->back = cpu_to_be32(old_blk->blkno); if (old_info->forw) { - error = xfs_da_read_buf(args->trans, args->dp, + error = xfs_da_node_read(args->trans, args->dp, be32_to_cpu(old_info->forw), -1, &bp, args->whichfork); if (error) return(error); ASSERT(bp != NULL); - tmp_info = bp->data; + tmp_info = bp->b_addr; ASSERT(tmp_info->magic == old_info->magic); ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); tmp_info->back = cpu_to_be32(new_blk->blkno); - xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); - xfs_da_buf_done(bp); + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); } old_info->forw = cpu_to_be32(new_blk->blkno); } - xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); - xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); + xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); + xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); return(0); } @@ -1247,14 +1357,16 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, * Compare two intermediate nodes for "order". */ STATIC int -xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) +xfs_da_node_order( + struct xfs_buf *node1_bp, + struct xfs_buf *node2_bp) { xfs_da_intnode_t *node1, *node2; - node1 = node1_bp->data; - node2 = node2_bp->data; - ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) && - (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC)); + node1 = node1_bp->b_addr; + node2 = node2_bp->b_addr; + ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) && + node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) || @@ -1269,12 +1381,14 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) * Pick up the last hashvalue from an intermediate node. */ STATIC uint -xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) +xfs_da_node_lasthash( + struct xfs_buf *bp, + int *count) { xfs_da_intnode_t *node; - node = bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + node = bp->b_addr; + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if (count) *count = be16_to_cpu(node->hdr.count); if (!node->hdr.count) @@ -1291,7 +1405,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, { xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info; xfs_da_args_t *args; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int error; /* @@ -1299,8 +1413,8 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, */ args = state->args; ASSERT(args != NULL); - save_info = save_blk->bp->data; - drop_info = drop_blk->bp->data; + save_info = save_blk->bp->b_addr; + drop_info = drop_blk->bp->b_addr; ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || save_blk->magic == XFS_DIR2_LEAFN_MAGIC || save_blk->magic == XFS_ATTR_LEAF_MAGIC); @@ -1316,42 +1430,42 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, * Unlink the leaf block from the doubly linked chain of leaves. */ if (be32_to_cpu(save_info->back) == drop_blk->blkno) { + trace_xfs_da_unlink_back(args); save_info->back = drop_info->back; if (drop_info->back) { - error = xfs_da_read_buf(args->trans, args->dp, + error = xfs_da_node_read(args->trans, args->dp, be32_to_cpu(drop_info->back), -1, &bp, args->whichfork); if (error) return(error); ASSERT(bp != NULL); - tmp_info = bp->data; + tmp_info = bp->b_addr; ASSERT(tmp_info->magic == save_info->magic); ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); tmp_info->forw = cpu_to_be32(save_blk->blkno); - xfs_da_log_buf(args->trans, bp, 0, + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info) - 1); - xfs_da_buf_done(bp); } } else { + trace_xfs_da_unlink_forward(args); save_info->forw = drop_info->forw; if (drop_info->forw) { - error = xfs_da_read_buf(args->trans, args->dp, + error = xfs_da_node_read(args->trans, args->dp, be32_to_cpu(drop_info->forw), -1, &bp, args->whichfork); if (error) return(error); ASSERT(bp != NULL); - tmp_info = bp->data; + tmp_info = bp->b_addr; ASSERT(tmp_info->magic == save_info->magic); ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); tmp_info->back = cpu_to_be32(save_blk->blkno); - xfs_da_log_buf(args->trans, bp, 0, + xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info) - 1); - xfs_da_buf_done(bp); } } - xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); + xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); return(0); } @@ -1374,6 +1488,8 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, xfs_dablk_t blkno=0; int level, error; + trace_xfs_da_path_shift(state->args); + /* * Roll up the Btree looking for the first block where our * current index is not at the edge of the block. Note that @@ -1386,8 +1502,8 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, level = (path->active-1) - 1; /* skip bottom layer in path */ for (blk = &path->blk[level]; level >= 0; blk--, level--) { ASSERT(blk->bp != NULL); - node = blk->bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + node = blk->bp->b_addr; + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { blk->index++; blkno = be32_to_cpu(node->btree[blk->index].before); @@ -1414,21 +1530,21 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, * (if it's dirty, trans won't actually let go) */ if (release) - xfs_da_brelse(args->trans, blk->bp); + xfs_trans_brelse(args->trans, blk->bp); /* * Read the next child block. */ blk->blkno = blkno; - error = xfs_da_read_buf(args->trans, args->dp, blkno, -1, - &blk->bp, args->whichfork); + error = xfs_da_node_read(args->trans, args->dp, blkno, -1, + &blk->bp, args->whichfork); if (error) return(error); ASSERT(blk->bp != NULL); - info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC || - be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC || - be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); + info = blk->bp->b_addr; + ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || + info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); blk->magic = be16_to_cpu(info->magic); if (blk->magic == XFS_DA_NODE_MAGIC) { node = (xfs_da_intnode_t *)info; @@ -1521,79 +1637,60 @@ const struct xfs_nameops xfs_default_nameops = { .compname = xfs_da_compname }; -/* - * Add a block to the btree ahead of the file. - * Return the new block number to the caller. - */ int -xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) +xfs_da_grow_inode_int( + struct xfs_da_args *args, + xfs_fileoff_t *bno, + int count) { - xfs_fileoff_t bno, b; - xfs_bmbt_irec_t map; - xfs_bmbt_irec_t *mapp; - xfs_inode_t *dp; - int nmap, error, w, count, c, got, i, mapi; - xfs_trans_t *tp; - xfs_mount_t *mp; - xfs_drfsbno_t nblks; - - dp = args->dp; - mp = dp->i_mount; - w = args->whichfork; - tp = args->trans; - nblks = dp->i_d.di_nblocks; + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; + int w = args->whichfork; + xfs_drfsbno_t nblks = dp->i_d.di_nblocks; + struct xfs_bmbt_irec map, *mapp; + int nmap, error, got, i, mapi; - /* - * For new directories adjust the file offset and block count. - */ - if (w == XFS_DATA_FORK) { - bno = mp->m_dirleafblk; - count = mp->m_dirblkfsbs; - } else { - bno = 0; - count = 1; - } /* * Find a spot in the file space to put the new block. */ - if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) + error = xfs_bmap_first_unused(tp, dp, count, bno, w); + if (error) return error; - if (w == XFS_DATA_FORK) - ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk); + /* * Try mapping it in one filesystem block. */ nmap = 1; ASSERT(args->firstblock != NULL); - if ((error = xfs_bmapi(tp, dp, bno, count, - xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| - XFS_BMAPI_CONTIG, + error = xfs_bmapi_write(tp, dp, *bno, count, + xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, - args->flist))) { + args->flist); + if (error) return error; - } + ASSERT(nmap <= 1); if (nmap == 1) { mapp = ↦ mapi = 1; - } - /* - * If we didn't get it and the block might work if fragmented, - * try without the CONTIG flag. Loop until we get it all. - */ - else if (nmap == 0 && count > 1) { + } else if (nmap == 0 && count > 1) { + xfs_fileoff_t b; + int c; + + /* + * If we didn't get it and the block might work if fragmented, + * try without the CONTIG flag. Loop until we get it all. + */ mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); - for (b = bno, mapi = 0; b < bno + count; ) { + for (b = *bno, mapi = 0; b < *bno + count; ) { nmap = MIN(XFS_BMAP_MAX_NMAP, count); - c = (int)(bno + count - b); - if ((error = xfs_bmapi(tp, dp, b, c, - xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| - XFS_BMAPI_METADATA, + c = (int)(*bno + count - b); + error = xfs_bmapi_write(tp, dp, b, c, + xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist))) { - kmem_free(mapp); - return error; - } + &mapp[mapi], &nmap, args->flist); + if (error) + goto out_free_map; if (nmap < 1) break; mapi += nmap; @@ -1604,24 +1701,55 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) mapi = 0; mapp = NULL; } + /* * Count the blocks we got, make sure it matches the total. */ for (i = 0, got = 0; i < mapi; i++) got += mapp[i].br_blockcount; - if (got != count || mapp[0].br_startoff != bno || + if (got != count || mapp[0].br_startoff != *bno || mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != - bno + count) { - if (mapp != &map) - kmem_free(mapp); - return XFS_ERROR(ENOSPC); + *bno + count) { + error = XFS_ERROR(ENOSPC); + goto out_free_map; } - if (mapp != &map) - kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ args->total -= dp->i_d.di_nblocks - nblks; - *new_blkno = (xfs_dablk_t)bno; - return 0; + +out_free_map: + if (mapp != &map) + kmem_free(mapp); + return error; +} + +/* + * Add a block to the btree ahead of the file. + * Return the new block number to the caller. + */ +int +xfs_da_grow_inode( + struct xfs_da_args *args, + xfs_dablk_t *new_blkno) +{ + xfs_fileoff_t bno; + int count; + int error; + + trace_xfs_da_grow_inode(args); + + if (args->whichfork == XFS_DATA_FORK) { + bno = args->dp->i_mount->m_dirleafblk; + count = args->dp->i_mount->m_dirblkfsbs; + } else { + bno = 0; + count = 1; + } + + error = xfs_da_grow_inode_int(args, &bno, count); + if (!error) + *new_blkno = (xfs_dablk_t)bno; + return error; } /* @@ -1633,11 +1761,13 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) * a bmap btree split to do that. */ STATIC int -xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, - xfs_dabuf_t **dead_bufp) +xfs_da_swap_lastblock( + xfs_da_args_t *args, + xfs_dablk_t *dead_blknop, + struct xfs_buf **dead_bufp) { xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno; - xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf; + struct xfs_buf *dead_buf, *last_buf, *sib_buf, *par_buf; xfs_fileoff_t lastoff; xfs_inode_t *ip; xfs_trans_t *tp; @@ -1648,6 +1778,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, xfs_dir2_leaf_t *dead_leaf2; xfs_dahash_t dead_hash; + trace_xfs_da_swap_lastblock(args); + dead_buf = *dead_bufp; dead_blkno = *dead_blknop; tp = args->trans; @@ -1668,23 +1800,24 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, * Read the last block in the btree space. */ last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; - if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w))) + error = xfs_da_node_read(tp, ip, last_blkno, -1, &last_buf, w); + if (error) return error; /* * Copy the last block into the dead buffer and log it. */ - memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize); - xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); - dead_info = dead_buf->data; + memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize); + xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); + dead_info = dead_buf->b_addr; /* * Get values from the moved block. */ - if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) { + if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; dead_level = 0; dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); } else { - ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC); + ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); dead_node = (xfs_da_intnode_t *)dead_info; dead_level = be16_to_cpu(dead_node->hdr.level); dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval); @@ -1694,9 +1827,10 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, * If the moved block has a left sibling, fix up the pointers. */ if ((sib_blkno = be32_to_cpu(dead_info->back))) { - if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))) + error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); + if (error) goto done; - sib_info = sib_buf->data; + sib_info = sib_buf->b_addr; if (unlikely( be32_to_cpu(sib_info->forw) != last_blkno || sib_info->magic != dead_info->magic)) { @@ -1706,19 +1840,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, goto done; } sib_info->forw = cpu_to_be32(dead_blkno); - xfs_da_log_buf(tp, sib_buf, + xfs_trans_log_buf(tp, sib_buf, XFS_DA_LOGRANGE(sib_info, &sib_info->forw, sizeof(sib_info->forw))); - xfs_da_buf_done(sib_buf); sib_buf = NULL; } /* * If the moved block has a right sibling, fix up the pointers. */ if ((sib_blkno = be32_to_cpu(dead_info->forw))) { - if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))) + error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); + if (error) goto done; - sib_info = sib_buf->data; + sib_info = sib_buf->b_addr; if (unlikely( be32_to_cpu(sib_info->back) != last_blkno || sib_info->magic != dead_info->magic)) { @@ -1728,10 +1862,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, goto done; } sib_info->back = cpu_to_be32(dead_blkno); - xfs_da_log_buf(tp, sib_buf, + xfs_trans_log_buf(tp, sib_buf, XFS_DA_LOGRANGE(sib_info, &sib_info->back, sizeof(sib_info->back))); - xfs_da_buf_done(sib_buf); sib_buf = NULL; } par_blkno = mp->m_dirleafblk; @@ -1740,11 +1873,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, * Walk down the tree looking for the parent of the moved block. */ for (;;) { - if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) + error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); + if (error) goto done; - par_node = par_buf->data; - if (unlikely( - be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC || + par_node = par_buf->b_addr; + if (unlikely(par_node->hdr.info.magic != + cpu_to_be16(XFS_DA_NODE_MAGIC) || (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", XFS_ERRLEVEL_LOW, mp); @@ -1766,7 +1900,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, par_blkno = be32_to_cpu(par_node->btree[entno].before); if (level == dead_level + 1) break; - xfs_da_brelse(tp, par_buf); + xfs_trans_brelse(tp, par_buf); par_buf = NULL; } /* @@ -1782,7 +1916,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, if (entno < be16_to_cpu(par_node->hdr.count)) break; par_blkno = be32_to_cpu(par_node->hdr.info.forw); - xfs_da_brelse(tp, par_buf); + xfs_trans_brelse(tp, par_buf); par_buf = NULL; if (unlikely(par_blkno == 0)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", @@ -1790,12 +1924,13 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, error = XFS_ERROR(EFSCORRUPTED); goto done; } - if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) + error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); + if (error) goto done; - par_node = par_buf->data; + par_node = par_buf->b_addr; if (unlikely( be16_to_cpu(par_node->hdr.level) != level || - be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) { + par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", XFS_ERRLEVEL_LOW, mp); error = XFS_ERROR(EFSCORRUPTED); @@ -1807,20 +1942,18 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, * Update the parent entry pointing to the moved block. */ par_node->btree[entno].before = cpu_to_be32(dead_blkno); - xfs_da_log_buf(tp, par_buf, + xfs_trans_log_buf(tp, par_buf, XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before, sizeof(par_node->btree[entno].before))); - xfs_da_buf_done(par_buf); - xfs_da_buf_done(dead_buf); *dead_blknop = last_blkno; *dead_bufp = last_buf; return 0; done: if (par_buf) - xfs_da_brelse(tp, par_buf); + xfs_trans_brelse(tp, par_buf); if (sib_buf) - xfs_da_brelse(tp, sib_buf); - xfs_da_brelse(tp, last_buf); + xfs_trans_brelse(tp, sib_buf); + xfs_trans_brelse(tp, last_buf); return error; } @@ -1828,14 +1961,18 @@ done: * Remove a btree block from a directory or attribute. */ int -xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, - xfs_dabuf_t *dead_buf) +xfs_da_shrink_inode( + xfs_da_args_t *args, + xfs_dablk_t dead_blkno, + struct xfs_buf *dead_buf) { xfs_inode_t *dp; int done, error, w, count; xfs_trans_t *tp; xfs_mount_t *mp; + trace_xfs_da_shrink_inode(args); + dp = args->dp; w = args->whichfork; tp = args->trans; @@ -1862,7 +1999,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, break; } } - xfs_da_binval(tp, dead_buf); + xfs_trans_binval(tp, dead_buf); return error; } @@ -1894,36 +2031,75 @@ xfs_da_map_covers_blocks( } /* - * Make a dabuf. - * Used for get_buf, read_buf, read_bufr, and reada_buf. + * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. + * + * For the single map case, it is assumed that the caller has provided a pointer + * to a valid xfs_buf_map. For the multiple map case, this function will + * allocate the xfs_buf_map to hold all the maps and replace the caller's single + * map pointer with the allocated map. */ -int -xfs_da_do_buf( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - xfs_daddr_t *mappedbnop, - xfs_dabuf_t **bpp, - int whichfork, - int caller, - inst_t *ra) +static int +xfs_buf_map_from_irec( + struct xfs_mount *mp, + struct xfs_buf_map **mapp, + unsigned int *nmaps, + struct xfs_bmbt_irec *irecs, + unsigned int nirecs) { - xfs_buf_t *bp = NULL; - xfs_buf_t **bplist; - int error=0; - int i; - xfs_bmbt_irec_t map; - xfs_bmbt_irec_t *mapp; - xfs_daddr_t mappedbno; - xfs_mount_t *mp; - int nbplist=0; - int nfsb; - int nmap; - xfs_dabuf_t *rbp; + struct xfs_buf_map *map; + int i; + + ASSERT(*nmaps == 1); + ASSERT(nirecs >= 1); + + if (nirecs > 1) { + map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP); + if (!map) + return ENOMEM; + *mapp = map; + } + + *nmaps = nirecs; + map = *mapp; + for (i = 0; i < *nmaps; i++) { + ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && + irecs[i].br_startblock != HOLESTARTBLOCK); + map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); + map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); + } + return 0; +} + +/* + * Map the block we are given ready for reading. There are three possible return + * values: + * -1 - will be returned if we land in a hole and mappedbno == -2 so the + * caller knows not to execute a subsequent read. + * 0 - if we mapped the block successfully + * >0 - positive error number if there was an error. + */ +static int +xfs_dabuf_map( + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + int whichfork, + struct xfs_buf_map **map, + int *nmaps) +{ + struct xfs_mount *mp = dp->i_mount; + int nfsb; + int error = 0; + struct xfs_bmbt_irec irec; + struct xfs_bmbt_irec *irecs = &irec; + int nirecs; + + ASSERT(map && *map); + ASSERT(*nmaps == 1); - mp = dp->i_mount; nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1; - mappedbno = *mappedbnop; + /* * Caller doesn't have a mapping. -2 means don't complain * if we land in a hole. @@ -1932,139 +2108,154 @@ xfs_da_do_buf( /* * Optimize the one-block case. */ - if (nfsb == 1) { - xfs_fsblock_t fsb; + if (nfsb != 1) + irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP); - if ((error = - xfs_bmapi_single(trans, dp, whichfork, &fsb, - (xfs_fileoff_t)bno))) { - return error; - } - mapp = ↦ - if (fsb == NULLFSBLOCK) { - nmap = 0; - } else { - map.br_startblock = fsb; - map.br_startoff = (xfs_fileoff_t)bno; - map.br_blockcount = 1; - nmap = 1; - } - } else { - mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP); - nmap = nfsb; - if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno, - nfsb, - XFS_BMAPI_METADATA | - xfs_bmapi_aflag(whichfork), - NULL, 0, mapp, &nmap, NULL))) - goto exit0; - } + nirecs = nfsb; + error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, + &nirecs, xfs_bmapi_aflag(whichfork)); + if (error) + goto out; } else { - map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); - map.br_startoff = (xfs_fileoff_t)bno; - map.br_blockcount = nfsb; - mapp = ↦ - nmap = 1; + irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); + irecs->br_startoff = (xfs_fileoff_t)bno; + irecs->br_blockcount = nfsb; + irecs->br_state = 0; + nirecs = 1; } - if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) { - error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); + + if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { + error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED); if (unlikely(error == EFSCORRUPTED)) { if (xfs_error_level >= XFS_ERRLEVEL_LOW) { - cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", - (long long)bno); - cmn_err(CE_ALERT, "dir: inode %lld\n", + int i; + xfs_alert(mp, "%s: bno %lld dir: inode %lld", + __func__, (long long)bno, (long long)dp->i_ino); - for (i = 0; i < nmap; i++) { - cmn_err(CE_ALERT, - "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n", + for (i = 0; i < *nmaps; i++) { + xfs_alert(mp, +"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", i, - (long long)mapp[i].br_startoff, - (long long)mapp[i].br_startblock, - (long long)mapp[i].br_blockcount, - mapp[i].br_state); + (long long)irecs[i].br_startoff, + (long long)irecs[i].br_startblock, + (long long)irecs[i].br_blockcount, + irecs[i].br_state); } } XFS_ERROR_REPORT("xfs_da_do_buf(1)", XFS_ERRLEVEL_LOW, mp); } - goto exit0; + goto out; } - if (caller != 3 && nmap > 1) { - bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP); - nbplist = 0; - } else - bplist = NULL; - /* - * Turn the mapping(s) into buffer(s). - */ - for (i = 0; i < nmap; i++) { - int nmapped; - - mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock); - if (i == 0) - *mappedbnop = mappedbno; - nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount); - switch (caller) { - case 0: - bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, - mappedbno, nmapped, 0); - error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO); - break; - case 1: - case 2: - bp = NULL; - error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp, - mappedbno, nmapped, 0, &bp); - break; - case 3: - xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped); + error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); +out: + if (irecs != &irec) + kmem_free(irecs); + return error; +} + +/* + * Get a buffer for the dir/attr block. + */ +int +xfs_da_get_buf( + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp, + int whichfork) +{ + struct xfs_buf *bp; + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; + + *bpp = NULL; + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) error = 0; - bp = NULL; - break; - } - if (error) { - if (bp) - xfs_trans_brelse(trans, bp); - goto exit1; - } - if (!bp) - continue; - if (caller == 1) { - if (whichfork == XFS_ATTR_FORK) { - XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE, - XFS_ATTR_BTREE_REF); - } else { - XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE, - XFS_DIR_BTREE_REF); - } - } - if (bplist) { - bplist[nbplist++] = bp; - } + goto out_free; } - /* - * Build a dabuf structure. - */ - if (bplist) { - rbp = xfs_da_buf_make(nbplist, bplist, ra); - } else if (bp) - rbp = xfs_da_buf_make(1, &bp, ra); + + bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, + mapp, nmap, 0); + error = bp ? bp->b_error : XFS_ERROR(EIO); + if (error) { + xfs_trans_brelse(trans, bp); + goto out_free; + } + + *bpp = bp; + +out_free: + if (mapp != &map) + kmem_free(mapp); + + return error; +} + +/* + * Get a buffer for the dir/attr block, fill in the contents. + */ +int +xfs_da_read_buf( + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp, + int whichfork, + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; + + *bpp = NULL; + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) + error = 0; + goto out_free; + } + + error = xfs_trans_read_buf_map(dp->i_mount, trans, + dp->i_mount->m_ddev_targp, + mapp, nmap, 0, &bp, ops); + if (error) + goto out_free; + + if (whichfork == XFS_ATTR_FORK) + xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); else - rbp = NULL; + xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); + /* - * For read_buf, check the magic number. + * This verification code will be moved to a CRC verification callback + * function so just leave it here unchanged until then. */ - if (caller == 1) { - xfs_dir2_data_t *data; - xfs_dir2_free_t *free; - xfs_da_blkinfo_t *info; + { + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + xfs_dir2_free_t *free = bp->b_addr; + xfs_da_blkinfo_t *info = bp->b_addr; uint magic, magic1; + struct xfs_mount *mp = dp->i_mount; - info = rbp->data; - data = rbp->data; - free = rbp->data; magic = be16_to_cpu(info->magic); - magic1 = be32_to_cpu(data->hdr.magic); + magic1 = be32_to_cpu(hdr->magic); if (unlikely( XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && (magic != XFS_ATTR_LEAF_MAGIC) && @@ -2072,71 +2263,23 @@ xfs_da_do_buf( (magic != XFS_DIR2_LEAFN_MAGIC) && (magic1 != XFS_DIR2_BLOCK_MAGIC) && (magic1 != XFS_DIR2_DATA_MAGIC) && - (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), + (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)), mp, XFS_ERRTAG_DA_READ_BUF, XFS_RANDOM_DA_READ_BUF))) { - trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_); + trace_xfs_da_btree_corrupt(bp, _RET_IP_); XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", XFS_ERRLEVEL_LOW, mp, info); error = XFS_ERROR(EFSCORRUPTED); - xfs_da_brelse(trans, rbp); - nbplist = 0; - goto exit1; + xfs_trans_brelse(trans, bp); + goto out_free; } } - if (bplist) { - kmem_free(bplist); - } - if (mapp != &map) { - kmem_free(mapp); - } - if (bpp) - *bpp = rbp; - return 0; -exit1: - if (bplist) { - for (i = 0; i < nbplist; i++) - xfs_trans_brelse(trans, bplist[i]); - kmem_free(bplist); - } -exit0: + *bpp = bp; +out_free: if (mapp != &map) kmem_free(mapp); - if (bpp) - *bpp = NULL; - return error; -} -/* - * Get a buffer for the dir/attr block. - */ -int -xfs_da_get_buf( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, - int whichfork) -{ - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0, - (inst_t *)__return_address); -} - -/* - * Get a buffer for the dir/attr block, fill in the contents. - */ -int -xfs_da_read_buf( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - xfs_dabuf_t **bpp, - int whichfork) -{ - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1, - (inst_t *)__return_address); + return error; } /* @@ -2144,23 +2287,42 @@ xfs_da_read_buf( */ xfs_daddr_t xfs_da_reada_buf( - xfs_trans_t *trans, - xfs_inode_t *dp, - xfs_dablk_t bno, - int whichfork) + struct xfs_trans *trans, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + int whichfork, + const struct xfs_buf_ops *ops) { - xfs_daddr_t rval; + struct xfs_buf_map map; + struct xfs_buf_map *mapp; + int nmap; + int error; - rval = -1; - if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3, - (inst_t *)__return_address)) + mapp = ↦ + nmap = 1; + error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + &mapp, &nmap); + if (error) { + /* mapping a hole is not an error, but we don't continue */ + if (error == -1) + error = 0; + goto out_free; + } + + mappedbno = mapp[0].bm_bn; + xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); + +out_free: + if (mapp != &map) + kmem_free(mapp); + + if (error) return -1; - else - return rval; + return mappedbno; } kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ -kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */ /* * Allocate a dir-state structure. @@ -2180,13 +2342,8 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state) { int i; - for (i = 0; i < state->altpath.active; i++) { - if (state->altpath.blk[i].bp) { - if (state->altpath.blk[i].bp != state->path.blk[i].bp) - xfs_da_buf_done(state->altpath.blk[i].bp); - state->altpath.blk[i].bp = NULL; - } - } + for (i = 0; i < state->altpath.active; i++) + state->altpath.blk[i].bp = NULL; state->altpath.active = 0; } @@ -2196,244 +2353,9 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state) void xfs_da_state_free(xfs_da_state_t *state) { - int i; - xfs_da_state_kill_altpath(state); - for (i = 0; i < state->path.active; i++) { - if (state->path.blk[i].bp) - xfs_da_buf_done(state->path.blk[i].bp); - } - if (state->extravalid && state->extrablk.bp) - xfs_da_buf_done(state->extrablk.bp); #ifdef DEBUG memset((char *)state, 0, sizeof(*state)); #endif /* DEBUG */ kmem_zone_free(xfs_da_state_zone, state); } - -#ifdef XFS_DABUF_DEBUG -xfs_dabuf_t *xfs_dabuf_global_list; -static DEFINE_SPINLOCK(xfs_dabuf_global_lock); -#endif - -/* - * Create a dabuf. - */ -/* ARGSUSED */ -STATIC xfs_dabuf_t * -xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) -{ - xfs_buf_t *bp; - xfs_dabuf_t *dabuf; - int i; - int off; - - if (nbuf == 1) - dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS); - else - dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); - dabuf->dirty = 0; -#ifdef XFS_DABUF_DEBUG - dabuf->ra = ra; - dabuf->target = XFS_BUF_TARGET(bps[0]); - dabuf->blkno = XFS_BUF_ADDR(bps[0]); -#endif - if (nbuf == 1) { - dabuf->nbuf = 1; - bp = bps[0]; - dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); - dabuf->data = XFS_BUF_PTR(bp); - dabuf->bps[0] = bp; - } else { - dabuf->nbuf = nbuf; - for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) { - dabuf->bps[i] = bp = bps[i]; - dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp)); - } - dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); - for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { - bp = bps[i]; - memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp), - XFS_BUF_COUNT(bp)); - } - } -#ifdef XFS_DABUF_DEBUG - { - xfs_dabuf_t *p; - - spin_lock(&xfs_dabuf_global_lock); - for (p = xfs_dabuf_global_list; p; p = p->next) { - ASSERT(p->blkno != dabuf->blkno || - p->target != dabuf->target); - } - dabuf->prev = NULL; - if (xfs_dabuf_global_list) - xfs_dabuf_global_list->prev = dabuf; - dabuf->next = xfs_dabuf_global_list; - xfs_dabuf_global_list = dabuf; - spin_unlock(&xfs_dabuf_global_lock); - } -#endif - return dabuf; -} - -/* - * Un-dirty a dabuf. - */ -STATIC void -xfs_da_buf_clean(xfs_dabuf_t *dabuf) -{ - xfs_buf_t *bp; - int i; - int off; - - if (dabuf->dirty) { - ASSERT(dabuf->nbuf > 1); - dabuf->dirty = 0; - for (i = off = 0; i < dabuf->nbuf; - i++, off += XFS_BUF_COUNT(bp)) { - bp = dabuf->bps[i]; - memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off, - XFS_BUF_COUNT(bp)); - } - } -} - -/* - * Release a dabuf. - */ -void -xfs_da_buf_done(xfs_dabuf_t *dabuf) -{ - ASSERT(dabuf); - ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); - if (dabuf->dirty) - xfs_da_buf_clean(dabuf); - if (dabuf->nbuf > 1) - kmem_free(dabuf->data); -#ifdef XFS_DABUF_DEBUG - { - spin_lock(&xfs_dabuf_global_lock); - if (dabuf->prev) - dabuf->prev->next = dabuf->next; - else - xfs_dabuf_global_list = dabuf->next; - if (dabuf->next) - dabuf->next->prev = dabuf->prev; - spin_unlock(&xfs_dabuf_global_lock); - } - memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf)); -#endif - if (dabuf->nbuf == 1) - kmem_zone_free(xfs_dabuf_zone, dabuf); - else - kmem_free(dabuf); -} - -/* - * Log transaction from a dabuf. - */ -void -xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) -{ - xfs_buf_t *bp; - uint f; - int i; - uint l; - int off; - - ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); - if (dabuf->nbuf == 1) { - ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0])); - xfs_trans_log_buf(tp, dabuf->bps[0], first, last); - return; - } - dabuf->dirty = 1; - ASSERT(first <= last); - for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) { - bp = dabuf->bps[i]; - f = off; - l = f + XFS_BUF_COUNT(bp) - 1; - if (f < first) - f = first; - if (l > last) - l = last; - if (f <= l) - xfs_trans_log_buf(tp, bp, f - off, l - off); - /* - * B_DONE is set by xfs_trans_log buf. - * If we don't set it on a new buffer (get not read) - * then if we don't put anything in the buffer it won't - * be set, and at commit it it released into the cache, - * and then a read will fail. - */ - else if (!(XFS_BUF_ISDONE(bp))) - XFS_BUF_DONE(bp); - } - ASSERT(last < off); -} - -/* - * Release dabuf from a transaction. - * Have to free up the dabuf before the buffers are released, - * since the synchronization on the dabuf is really the lock on the buffer. - */ -void -xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf) -{ - xfs_buf_t *bp; - xfs_buf_t **bplist; - int i; - int nbuf; - - ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); - if ((nbuf = dabuf->nbuf) == 1) { - bplist = &bp; - bp = dabuf->bps[0]; - } else { - bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP); - memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist)); - } - xfs_da_buf_done(dabuf); - for (i = 0; i < nbuf; i++) - xfs_trans_brelse(tp, bplist[i]); - if (bplist != &bp) - kmem_free(bplist); -} - -/* - * Invalidate dabuf from a transaction. - */ -void -xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf) -{ - xfs_buf_t *bp; - xfs_buf_t **bplist; - int i; - int nbuf; - - ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); - if ((nbuf = dabuf->nbuf) == 1) { - bplist = &bp; - bp = dabuf->bps[0]; - } else { - bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP); - memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist)); - } - xfs_da_buf_done(dabuf); - for (i = 0; i < nbuf; i++) - xfs_trans_binval(tp, bplist[i]); - if (bplist != &bp) - kmem_free(bplist); -} - -/* - * Get the first daddr from a dabuf. - */ -xfs_daddr_t -xfs_da_blkno(xfs_dabuf_t *dabuf) -{ - ASSERT(dabuf->nbuf); - ASSERT(dabuf->data); - return XFS_BUF_ADDR(dabuf->bps[0]); -} diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c index d475118b5..6a4027f6b 100644 --- a/libxfs/xfs_dir2.c +++ b/libxfs/xfs_dir2.c @@ -98,15 +98,15 @@ int xfs_dir_isempty( xfs_inode_t *dp) { - xfs_dir2_sf_t *sfp; + xfs_dir2_sf_hdr_t *sfp; - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); + ASSERT(S_ISDIR(dp->i_d.di_mode)); if (dp->i_d.di_size == 0) /* might happen during shutdown. */ return 1; if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) return 0; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - return !sfp->hdr.count; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + return !sfp->count; } /* @@ -135,7 +135,7 @@ xfs_dir_ino_validate( XFS_AGINO_TO_INO(mp, agno, agino) == ino; if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, XFS_RANDOM_DIR_INO_VALIDATE))) { - xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", + xfs_warn(mp, "Invalid inode number 0x%Lx", (unsigned long long) ino); XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); @@ -158,7 +158,7 @@ xfs_dir_init( memset((char *)&args, 0, sizeof(args)); args.dp = dp; args.trans = tp; - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); + ASSERT(S_ISDIR(dp->i_d.di_mode)); if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) return error; return xfs_dir2_sf_create(&args, pdp->i_ino); @@ -181,7 +181,7 @@ xfs_dir_createname( int rval; int v; /* type-checking value */ - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); + ASSERT(S_ISDIR(dp->i_d.di_mode)); if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) return rval; XFS_STATS_INC(xs_dir_create); @@ -257,7 +257,7 @@ xfs_dir_lookup( int rval; int v; /* type-checking value */ - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); + ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_lookup); memset(&args, 0, sizeof(xfs_da_args_t)); @@ -312,7 +312,7 @@ xfs_dir_removename( int rval; int v; /* type-checking value */ - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); + ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_remove); memset(&args, 0, sizeof(xfs_da_args_t)); @@ -359,7 +359,7 @@ xfs_dir_replace( int rval; int v; /* type-checking value */ - ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); + ASSERT(S_ISDIR(dp->i_d.di_mode)); if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) return rval; @@ -397,129 +397,34 @@ xfs_dir_replace( /* * Add a block to the directory. - * This routine is for data and free blocks, not leaf/node blocks - * which are handled by xfs_da_grow_inode. + * + * This routine is for data and free blocks, not leaf/node blocks which are + * handled by xfs_da_grow_inode. */ int xfs_dir2_grow_inode( - xfs_da_args_t *args, - int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ - xfs_dir2_db_t *dbp) /* out: block number added */ + struct xfs_da_args *args, + int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ + xfs_dir2_db_t *dbp) /* out: block number added */ { - xfs_fileoff_t bno; /* directory offset of new block */ - int count; /* count of filesystem blocks */ - xfs_inode_t *dp; /* incore directory inode */ - int error; - int got; /* blocks actually mapped */ - int i; - xfs_bmbt_irec_t map; /* single structure for bmap */ - int mapi; /* mapping index */ - xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */ - xfs_mount_t *mp; - int nmap; /* number of bmap entries */ - xfs_trans_t *tp; - xfs_drfsbno_t nblks; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + xfs_fileoff_t bno; /* directory offset of new block */ + int count; /* count of filesystem blocks */ + int error; trace_xfs_dir2_grow_inode(args, space); - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); count = mp->m_dirblkfsbs; - /* - * Find the first hole for our block. - */ - if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) - return error; - nmap = 1; - ASSERT(args->firstblock != NULL); - /* - * Try mapping the new block contiguously (one extent). - */ - if ((error = xfs_bmapi(tp, dp, bno, count, - XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, - args->firstblock, args->total, &map, &nmap, - args->flist))) - return error; - ASSERT(nmap <= 1); - if (nmap == 1) { - mapp = ↦ - mapi = 1; - } - /* - * Didn't work and this is a multiple-fsb directory block. - * Try again with contiguous flag turned on. - */ - else if (nmap == 0 && count > 1) { - xfs_fileoff_t b; /* current file offset */ - /* - * Space for maximum number of mappings. - */ - mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); - /* - * Iterate until we get to the end of our block. - */ - for (b = bno, mapi = 0; b < bno + count; ) { - int c; /* current fsb count */ - - /* - * Can't map more than MAX_NMAP at once. - */ - nmap = MIN(XFS_BMAP_MAX_NMAP, count); - c = (int)(bno + count - b); - if ((error = xfs_bmapi(tp, dp, b, c, - XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, - args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist))) { - kmem_free(mapp); - return error; - } - if (nmap < 1) - break; - /* - * Add this bunch into our table, go to the next offset. - */ - mapi += nmap; - b = mapp[mapi - 1].br_startoff + - mapp[mapi - 1].br_blockcount; - } - } - /* - * Didn't work. - */ - else { - mapi = 0; - mapp = NULL; - } - /* - * See how many fsb's we got. - */ - for (i = 0, got = 0; i < mapi; i++) - got += mapp[i].br_blockcount; - /* - * Didn't get enough fsb's, or the first/last block's are wrong. - */ - if (got != count || mapp[0].br_startoff != bno || - mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != - bno + count) { - if (mapp != &map) - kmem_free(mapp); - return XFS_ERROR(ENOSPC); - } - /* - * Done with the temporary mapping table. - */ - if (mapp != &map) - kmem_free(mapp); + error = xfs_da_grow_inode_int(args, &bno, count); + if (error) + return error; - /* account for newly allocated blocks in reserved blocks total */ - args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); /* @@ -531,7 +436,7 @@ xfs_dir2_grow_inode( size = XFS_FSB_TO_B(mp, bno + count); if (size > dp->i_d.di_size) { dp->i_d.di_size = size; - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); } } return 0; @@ -588,7 +493,7 @@ int xfs_dir2_shrink_inode( xfs_da_args_t *args, xfs_dir2_db_t db, - xfs_dabuf_t *bp) + struct xfs_buf *bp) { xfs_fileoff_t bno; /* directory file offset */ xfs_dablk_t da; /* directory file offset */ @@ -630,7 +535,7 @@ xfs_dir2_shrink_inode( /* * Invalidate the buffer from the transaction. */ - xfs_da_binval(tp, bp); + xfs_trans_binval(tp, bp); /* * If it's not a data block, we're done. */ diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c index b614ea694..7397faa8e 100644 --- a/libxfs/xfs_dir2_block.c +++ b/libxfs/xfs_dir2_block.c @@ -21,10 +21,10 @@ /* * Local function prototypes. */ -static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, xfs_dabuf_t *bp, int first, - int last); -static void xfs_dir2_block_log_tail(xfs_trans_t *tp, xfs_dabuf_t *bp); -static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **bpp, +static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp, + int first, int last); +static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp); +static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp, int *entno); static int xfs_dir2_block_sort(const void *a, const void *b); @@ -40,6 +40,214 @@ xfs_dir_startup(void) xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); } +static void +xfs_dir2_block_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir2_data_hdr *hdr = bp->b_addr; + int block_ok = 0; + + block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; + + if (!block_ok) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +static void +xfs_dir2_block_read_verify( + struct xfs_buf *bp) +{ + xfs_dir2_block_verify(bp); +} + +static void +xfs_dir2_block_write_verify( + struct xfs_buf *bp) +{ + xfs_dir2_block_verify(bp); +} + +const struct xfs_buf_ops xfs_dir2_block_buf_ops = { + .verify_read = xfs_dir2_block_read_verify, + .verify_write = xfs_dir2_block_write_verify, +}; + +static int +xfs_dir2_block_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = dp->i_mount; + + return xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp, + XFS_DATA_FORK, &xfs_dir2_block_buf_ops); +} + +static void +xfs_dir2_block_need_space( + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_block_tail *btp, + struct xfs_dir2_leaf_entry *blp, + __be16 **tagpp, + struct xfs_dir2_data_unused **dupp, + struct xfs_dir2_data_unused **enddupp, + int *compact, + int len) +{ + struct xfs_dir2_data_free *bf; + __be16 *tagp = NULL; + struct xfs_dir2_data_unused *dup = NULL; + struct xfs_dir2_data_unused *enddup = NULL; + + *compact = 0; + bf = hdr->bestfree; + + /* + * If there are stale entries we'll use one for the leaf. + */ + if (btp->stale) { + if (be16_to_cpu(bf[0].length) >= len) { + /* + * The biggest entry enough to avoid compaction. + */ + dup = (xfs_dir2_data_unused_t *) + ((char *)hdr + be16_to_cpu(bf[0].offset)); + goto out; + } + + /* + * Will need to compact to make this work. + * Tag just before the first leaf entry. + */ + *compact = 1; + tagp = (__be16 *)blp - 1; + + /* Data object just before the first leaf entry. */ + dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + + /* + * If it's not free then the data will go where the + * leaf data starts now, if it works at all. + */ + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) * + (uint)sizeof(*blp) < len) + dup = NULL; + } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) + dup = NULL; + else + dup = (xfs_dir2_data_unused_t *)blp; + goto out; + } + + /* + * no stale entries, so just use free space. + * Tag just before the first leaf entry. + */ + tagp = (__be16 *)blp - 1; + + /* Data object just before the first leaf entry. */ + enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); + + /* + * If it's not free then can't do this add without cleaning up: + * the space before the first leaf entry needs to be free so it + * can be expanded to hold the pointer to the new entry. + */ + if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + /* + * Check out the biggest freespace and see if it's the same one. + */ + dup = (xfs_dir2_data_unused_t *) + ((char *)hdr + be16_to_cpu(bf[0].offset)); + if (dup != enddup) { + /* + * Not the same free entry, just check its length. + */ + if (be16_to_cpu(dup->length) < len) + dup = NULL; + goto out; + } + + /* + * It is the biggest freespace, can it hold the leaf too? + */ + if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) { + /* + * Yes, use the second-largest entry instead if it works. + */ + if (be16_to_cpu(bf[1].length) >= len) + dup = (xfs_dir2_data_unused_t *) + ((char *)hdr + be16_to_cpu(bf[1].offset)); + else + dup = NULL; + } + } +out: + *tagpp = tagp; + *dupp = dup; + *enddupp = enddup; +} + +/* + * compact the leaf entries. + * Leave the highest-numbered stale entry stale. + * XXX should be the one closest to mid but mid is not yet computed. + */ +static void +xfs_dir2_block_compact( + struct xfs_trans *tp, + struct xfs_buf *bp, + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_block_tail *btp, + struct xfs_dir2_leaf_entry *blp, + int *needlog, + int *lfloghigh, + int *lfloglow) +{ + int fromidx; /* source leaf index */ + int toidx; /* target leaf index */ + int needscan = 0; + int highstale; /* high stale index */ + + fromidx = toidx = be32_to_cpu(btp->count) - 1; + highstale = *lfloghigh = -1; + for (; fromidx >= 0; fromidx--) { + if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { + if (highstale == -1) + highstale = toidx; + else { + if (*lfloghigh == -1) + *lfloghigh = toidx; + continue; + } + } + if (fromidx < toidx) + blp[toidx] = blp[fromidx]; + toidx--; + } + *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); + *lfloghigh -= be32_to_cpu(btp->stale) - 1; + be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); + xfs_dir2_data_make_free(tp, bp, + (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), + (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), + needlog, &needscan); + blp += be32_to_cpu(btp->stale) - 1; + btp->stale = cpu_to_be32(1); + /* + * If we now need to rebuild the bestfree map, do so. + * This needs to happen before the next call to use_free. + */ + if (needscan) + xfs_dir2_data_freescan(tp->t_mountp, hdr, needlog); +} + /* * Add an entry to a block directory. */ @@ -47,10 +255,9 @@ int /* error */ xfs_dir2_block_addname( xfs_da_args_t *args) /* directory op arguments */ { - xfs_dir2_data_free_t *bf; /* bestfree table in block */ - xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* buffer for block */ + struct xfs_buf *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ int compact; /* need to compact leaf ents */ xfs_dir2_data_entry_t *dep; /* block data entry */ @@ -78,203 +285,72 @@ xfs_dir2_block_addname( dp = args->dp; tp = args->trans; mp = dp->i_mount; - /* - * Read the (one and only) directory block into dabuf bp. - */ - if ((error = - xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) { + + /* Read the (one and only) directory block into bp. */ + error = xfs_dir2_block_read(tp, dp, &bp); + if (error) return error; - } - ASSERT(bp != NULL); - block = bp->data; - /* - * Check the magic number, corrupted if wrong. - */ - if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) { - XFS_CORRUPTION_ERROR("xfs_dir2_block_addname", - XFS_ERRLEVEL_LOW, mp, block); - xfs_da_brelse(tp, bp); - return XFS_ERROR(EFSCORRUPTED); - } + len = xfs_dir2_data_entsize(args->namelen); + /* * Set up pointers to parts of the block. */ - bf = block->hdr.bestfree; - btp = xfs_dir2_block_tail_p(mp, block); + hdr = bp->b_addr; + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); + /* - * No stale entries? Need space for entry and new leaf. - */ - if (!btp->stale) { - /* - * Tag just before the first leaf entry. - */ - tagp = (__be16 *)blp - 1; - /* - * Data object just before the first leaf entry. - */ - enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); - /* - * If it's not free then can't do this add without cleaning up: - * the space before the first leaf entry needs to be free so it - * can be expanded to hold the pointer to the new entry. - */ - if (be16_to_cpu(enddup->freetag) != XFS_DIR2_DATA_FREE_TAG) - dup = enddup = NULL; - /* - * Check out the biggest freespace and see if it's the same one. - */ - else { - dup = (xfs_dir2_data_unused_t *) - ((char *)block + be16_to_cpu(bf[0].offset)); - if (dup == enddup) { - /* - * It is the biggest freespace, is it too small - * to hold the new leaf too? - */ - if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) { - /* - * Yes, we use the second-largest - * entry instead if it works. - */ - if (be16_to_cpu(bf[1].length) >= len) - dup = (xfs_dir2_data_unused_t *) - ((char *)block + - be16_to_cpu(bf[1].offset)); - else - dup = NULL; - } - } else { - /* - * Not the same free entry, - * just check its length. - */ - if (be16_to_cpu(dup->length) < len) { - dup = NULL; - } - } - } - compact = 0; - } - /* - * If there are stale entries we'll use one for the leaf. - * Is the biggest entry enough to avoid compaction? + * Find out if we can reuse stale entries or whether we need extra + * space for entry and new leaf. */ - else if (be16_to_cpu(bf[0].length) >= len) { - dup = (xfs_dir2_data_unused_t *) - ((char *)block + be16_to_cpu(bf[0].offset)); - compact = 0; - } + xfs_dir2_block_need_space(hdr, btp, blp, &tagp, &dup, + &enddup, &compact, len); + /* - * Will need to compact to make this work. + * Done everything we need for a space check now. */ - else { - /* - * Tag just before the first leaf entry. - */ - tagp = (__be16 *)blp - 1; - /* - * Data object just before the first leaf entry. - */ - dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); - /* - * If it's not free then the data will go where the - * leaf data starts now, if it works at all. - */ - if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) * - (uint)sizeof(*blp) < len) - dup = NULL; - } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) - dup = NULL; - else - dup = (xfs_dir2_data_unused_t *)blp; - compact = 1; + if (args->op_flags & XFS_DA_OP_JUSTCHECK) { + xfs_trans_brelse(tp, bp); + if (!dup) + return XFS_ERROR(ENOSPC); + return 0; } - /* - * If this isn't a real add, we're done with the buffer. - */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) - xfs_da_brelse(tp, bp); + /* * If we don't have space for the new entry & leaf ... */ if (!dup) { - /* - * Not trying to actually do anything, or don't have - * a space reservation: return no-space. - */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) + /* Don't have a space reservation: return no-space. */ + if (args->total == 0) return XFS_ERROR(ENOSPC); /* * Convert to the next larger format. * Then add the new entry in that format. */ error = xfs_dir2_block_to_leaf(args, bp); - xfs_da_buf_done(bp); if (error) return error; return xfs_dir2_leaf_addname(args); } - /* - * Just checking, and it would work, so say so. - */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) - return 0; + needlog = needscan = 0; + /* * If need to compact the leaf entries, do it now. - * Leave the highest-numbered stale entry stale. - * XXX should be the one closest to mid but mid is not yet computed. */ - if (compact) { - int fromidx; /* source leaf index */ - int toidx; /* target leaf index */ - - for (fromidx = toidx = be32_to_cpu(btp->count) - 1, - highstale = lfloghigh = -1; - fromidx >= 0; - fromidx--) { - if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) { - if (highstale == -1) - highstale = toidx; - else { - if (lfloghigh == -1) - lfloghigh = toidx; - continue; - } - } - if (fromidx < toidx) - blp[toidx] = blp[fromidx]; - toidx--; - } - lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); - lfloghigh -= be32_to_cpu(btp->stale) - 1; - be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); - xfs_dir2_data_make_free(tp, bp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), - (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), - &needlog, &needscan); - blp += be32_to_cpu(btp->stale) - 1; - btp->stale = cpu_to_be32(1); + if (compact) + xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, + &lfloghigh, &lfloglow); + else if (btp->stale) { /* - * If we now need to rebuild the bestfree map, do so. - * This needs to happen before the next call to use_free. + * Set leaf logging boundaries to impossible state. + * For the no-stale case they're set explicitly. */ - if (needscan) { - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); - needscan = 0; - } - } - /* - * Set leaf logging boundaries to impossible state. - * For the no-stale case they're set explicitly. - */ - else if (btp->stale) { lfloglow = be32_to_cpu(btp->count); lfloghigh = -1; } + /* * Find the slot that's first lower than our hash value, -1 if none. */ @@ -299,7 +375,7 @@ xfs_dir2_block_addname( */ xfs_dir2_data_use_free(tp, bp, enddup, (xfs_dir2_data_aoff_t) - ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) - + ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - sizeof(*blp)), (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog, &needscan); @@ -312,8 +388,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, - &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); needscan = 0; } /* @@ -334,12 +409,14 @@ xfs_dir2_block_addname( else { for (lowstale = mid; lowstale >= 0 && - be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR; + blp[lowstale].address != + cpu_to_be32(XFS_DIR2_NULL_DATAPTR); lowstale--) continue; for (highstale = mid + 1; highstale < be32_to_cpu(btp->count) && - be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR && + blp[highstale].address != + cpu_to_be32(XFS_DIR2_NULL_DATAPTR) && (lowstale < 0 || mid - lowstale > highstale - mid); highstale++) continue; @@ -378,13 +455,13 @@ xfs_dir2_block_addname( */ blp[mid].hashval = cpu_to_be32(args->hashval); blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)block)); + (char *)dep - (char *)hdr)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* * Mark space for the data entry used. */ xfs_dir2_data_use_free(tp, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* * Create the new data entry. @@ -393,18 +470,17 @@ xfs_dir2_block_addname( dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)block); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_block_log_tail(tp, bp); xfs_dir2_data_log_entry(tp, bp, dep); xfs_dir2_data_check(dp, bp); - xfs_da_buf_done(bp); return 0; } @@ -414,21 +490,18 @@ xfs_dir2_block_addname( static void xfs_dir2_block_log_leaf( xfs_trans_t *tp, /* transaction structure */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_buf *bp, /* block buffer */ int first, /* index of first logged leaf */ int last) /* index of last logged leaf */ { - xfs_dir2_block_t *block; /* directory block structure */ - xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + xfs_dir2_leaf_entry_t *blp; + xfs_dir2_block_tail_t *btp; - mp = tp->t_mountp; - block = bp->data; - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); blp = xfs_dir2_block_leaf_p(btp); - xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), - (uint)((char *)&blp[last + 1] - (char *)block - 1)); + xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), + (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); } /* @@ -437,17 +510,14 @@ xfs_dir2_block_log_leaf( static void xfs_dir2_block_log_tail( xfs_trans_t *tp, /* transaction structure */ - xfs_dabuf_t *bp) /* block buffer */ + struct xfs_buf *bp) /* block buffer */ { - xfs_dir2_block_t *block; /* directory block structure */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + xfs_dir2_block_tail_t *btp; - mp = tp->t_mountp; - block = bp->data; - btp = xfs_dir2_block_tail_p(mp, block); - xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block), - (uint)((char *)(btp + 1) - (char *)block - 1)); + btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); + xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), + (uint)((char *)(btp + 1) - (char *)hdr - 1)); } /* @@ -458,9 +528,9 @@ int /* error */ xfs_dir2_block_lookup( xfs_da_args_t *args) /* dir lookup arguments */ { - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ @@ -478,21 +548,21 @@ xfs_dir2_block_lookup( return error; dp = args->dp; mp = dp->i_mount; - block = bp->data; + hdr = bp->b_addr; xfs_dir2_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Get the offset from the leaf entry, to point to the data. */ - dep = (xfs_dir2_data_entry_t *)((char *)block + + dep = (xfs_dir2_data_entry_t *)((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Fill in inode number, CI name if appropriate, release the block. */ args->inumber = be64_to_cpu(dep->inumber); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); - xfs_da_brelse(args->trans, bp); + xfs_trans_brelse(args->trans, bp); return XFS_ERROR(error); } @@ -502,13 +572,13 @@ xfs_dir2_block_lookup( static int /* error */ xfs_dir2_block_lookup_int( xfs_da_args_t *args, /* dir lookup arguments */ - xfs_dabuf_t **bpp, /* returned block buffer */ + struct xfs_buf **bpp, /* returned block buffer */ int *entno) /* returned entry number */ { xfs_dir2_dataptr_t addr; /* data entry address */ - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ @@ -524,17 +594,14 @@ xfs_dir2_block_lookup_int( dp = args->dp; tp = args->trans; mp = dp->i_mount; - /* - * Read the buffer, return error if we can't get it. - */ - if ((error = - xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) { + + error = xfs_dir2_block_read(tp, dp, &bp); + if (error) return error; - } - ASSERT(bp != NULL); - block = bp->data; + + hdr = bp->b_addr; xfs_dir2_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Loop doing a binary search for our hash value. @@ -551,7 +618,7 @@ xfs_dir2_block_lookup_int( high = mid - 1; if (low > high) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); return XFS_ERROR(ENOENT); } } @@ -572,7 +639,7 @@ xfs_dir2_block_lookup_int( * Get pointer to the entry from the leaf. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store @@ -599,7 +666,7 @@ xfs_dir2_block_lookup_int( /* * No match, release the buffer and return ENOENT. */ - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); return XFS_ERROR(ENOENT); } @@ -611,9 +678,9 @@ int /* error */ xfs_dir2_block_removename( xfs_da_args_t *args) /* directory operation args */ { - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ @@ -638,20 +705,20 @@ xfs_dir2_block_removename( dp = args->dp; tp = args->trans; mp = dp->i_mount; - block = bp->data; - btp = xfs_dir2_block_tail_p(mp, block); + hdr = bp->b_addr; + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry using the leaf entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Mark the data entry's space free. */ needlog = needscan = 0; xfs_dir2_data_make_free(tp, bp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), + (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * Fix up the block tail. @@ -667,18 +734,17 @@ xfs_dir2_block_removename( * Fix up bestfree, log the header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_data_check(dp, bp); /* * See if the size as a shortform is good enough. */ - if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > - XFS_IFORK_DSIZE(dp)) { - xfs_da_buf_done(bp); + size = xfs_dir2_block_sfsize(dp, hdr, &sfh); + if (size > XFS_IFORK_DSIZE(dp)) return 0; - } + /* * If it works, do the conversion. */ @@ -693,9 +759,9 @@ int /* error */ xfs_dir2_block_replace( xfs_da_args_t *args) /* directory operation args */ { - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ @@ -714,14 +780,14 @@ xfs_dir2_block_replace( } dp = args->dp; mp = dp->i_mount; - block = bp->data; - btp = xfs_dir2_block_tail_p(mp, block); + hdr = bp->b_addr; + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry we need to change. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); ASSERT(be64_to_cpu(dep->inumber) != args->inumber); /* * Change the inode number to the new value. @@ -729,7 +795,6 @@ xfs_dir2_block_replace( dep->inumber = cpu_to_be64(args->inumber); xfs_dir2_data_log_entry(args->trans, bp, dep); xfs_dir2_data_check(dp, bp); - xfs_da_buf_done(bp); return 0; } @@ -756,11 +821,11 @@ xfs_dir2_block_sort( int /* error */ xfs_dir2_leaf_to_block( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *lbp, /* leaf buffer */ - xfs_dabuf_t *dbp) /* data buffer */ + struct xfs_buf *lbp, /* leaf buffer */ + struct xfs_buf *dbp) /* data buffer */ { __be16 *bestsp; /* leaf bests table */ - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused data entry */ @@ -783,8 +848,8 @@ xfs_dir2_leaf_to_block( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = lbp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); + leaf = lbp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * If there are data blocks other than the first one, take this @@ -795,48 +860,46 @@ xfs_dir2_leaf_to_block( while (dp->i_d.di_size > mp->m_dirblksize) { bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == - mp->m_dirblksize - (uint)sizeof(block->hdr)) { + mp->m_dirblksize - (uint)sizeof(*hdr)) { if ((error = xfs_dir2_leaf_trim_data(args, lbp, (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) - goto out; - } else { - error = 0; - goto out; - } + return error; + } else + return 0; } /* * Read the data block if we don't already have it, give up if it fails. */ - if (dbp == NULL && - (error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp, - XFS_DATA_FORK))) { - goto out; + if (!dbp) { + error = xfs_dir2_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp); + if (error) + return error; } - block = dbp->data; - ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC); + hdr = dbp->b_addr; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); /* * Size of the "leaf" area in the block. */ - size = (uint)sizeof(block->tail) + + size = (uint)sizeof(xfs_dir2_block_tail_t) + (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); /* * Look at the last data entry. */ - tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1; - dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); + tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1; + dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); /* * If it's not free or is too short we can't do it. */ if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG || - be16_to_cpu(dup->length) < size) { - error = 0; - goto out; - } + be16_to_cpu(dup->length) < size) + return 0; + /* * Start converting it to block form. */ - block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + dbp->b_ops = &xfs_dir2_block_buf_ops; + hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); needlog = 1; needscan = 0; /* @@ -847,7 +910,7 @@ xfs_dir2_leaf_to_block( /* * Initialize the block tail. */ - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); btp->stale = 0; xfs_dir2_block_log_tail(tp, dbp); @@ -856,7 +919,8 @@ xfs_dir2_leaf_to_block( */ lep = xfs_dir2_block_leaf_p(btp); for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { - if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf->ents[from].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) continue; lep[to++] = leaf->ents[from]; } @@ -866,32 +930,24 @@ xfs_dir2_leaf_to_block( * Scan the bestfree if we need it and log the data block header. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* * Pitch the old leaf block. */ error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp); - lbp = NULL; - if (error) { - goto out; - } + if (error) + return error; + /* * Now see if the resulting block can be shrunken to shortform. */ - if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > - XFS_IFORK_DSIZE(dp)) { - error = 0; - goto out; - } + size = xfs_dir2_block_sfsize(dp, hdr, &sfh); + if (size > XFS_IFORK_DSIZE(dp)) + return 0; + return xfs_dir2_block_to_sf(args, dbp, size, &sfh); -out: - if (lbp) - xfs_da_buf_done(lbp); - if (dbp) - xfs_da_buf_done(dbp); - return error; } /* @@ -902,12 +958,10 @@ xfs_dir2_sf_to_block( xfs_da_args_t *args) /* operation arguments */ { xfs_dir2_db_t blkno; /* dir-relative block # (0) */ - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dabuf_t *bp; /* block buffer */ + struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ - char *buf; /* sf buffer */ - int buf_len; xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ int dummy; /* trash */ @@ -921,7 +975,8 @@ xfs_dir2_sf_to_block( int newoffset; /* offset from current entry */ int offset; /* target block offset */ xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ + xfs_dir2_sf_hdr_t *sfp; /* shortform header */ __be16 *tagp; /* end of data entry */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; @@ -939,32 +994,30 @@ xfs_dir2_sf_to_block( ASSERT(XFS_FORCED_SHUTDOWN(mp)); return XFS_ERROR(EIO); } + + oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); + /* - * Copy the directory into the stack buffer. + * Copy the directory into a temporary buffer. * Then pitch the incore inode data so we can make extents. */ + sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); + memcpy(sfp, oldsfp, dp->i_df.if_bytes); - buf_len = dp->i_df.if_bytes; - buf = kmem_alloc(buf_len, KM_SLEEP); - - memcpy(buf, sfp, buf_len); - xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK); + xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); dp->i_d.di_size = 0; xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - /* - * Reset pointer - old sfp is gone. - */ - sfp = (xfs_dir2_sf_t *)buf; + /* * Add block 0 to the inode. */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); if (error) { - kmem_free(buf); + kmem_free(sfp); return error; } /* @@ -972,21 +1025,22 @@ xfs_dir2_sf_to_block( */ error = xfs_dir2_data_init(args, blkno, &bp); if (error) { - kmem_free(buf); + kmem_free(sfp); return error; } - block = bp->data; - block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + bp->b_ops = &xfs_dir2_block_buf_ops; + hdr = bp->b_addr; + hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); /* * Compute size of block "tail" area. */ i = (uint)sizeof(*btp) + - (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); /* * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ - dup = (xfs_dir2_data_unused_t *)block->u; + dup = (xfs_dir2_data_unused_t *)(hdr + 1); needlog = needscan = 0; xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, &needscan); @@ -994,50 +1048,51 @@ xfs_dir2_sf_to_block( /* * Fill in the tail. */ - btp = xfs_dir2_block_tail_p(mp, block); - btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ + btp = xfs_dir2_block_tail_p(mp, hdr); + btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ btp->stale = 0; blp = xfs_dir2_block_leaf_p(btp); - endoffset = (uint)((char *)blp - (char *)block); + endoffset = (uint)((char *)blp - (char *)hdr); /* * Remove the freespace, we'll manage it. */ xfs_dir2_data_use_free(tp, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), be16_to_cpu(dup->length), &needlog, &needscan); /* * Create entry for . */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); + ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)block); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(tp, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)block)); + (char *)dep - (char *)hdr)); /* * Create entry for .. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); - dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); + ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)block); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(tp, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)block)); + (char *)dep - (char *)hdr)); offset = XFS_DIR2_DATA_FIRST_OFFSET; /* * Loop over existing entries, stuff them in. */ - if ((i = 0) == sfp->hdr.count) + i = 0; + if (!sfp->count) sfep = NULL; else sfep = xfs_dir2_sf_firstentry(sfp); @@ -1057,43 +1112,40 @@ xfs_dir2_sf_to_block( * There should be a hole here, make one. */ if (offset < newoffset) { - dup = (xfs_dir2_data_unused_t *) - ((char *)block + offset); + dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = cpu_to_be16(newoffset - offset); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( - ((char *)dup - (char *)block)); + ((char *)dup - (char *)hdr)); xfs_dir2_data_log_unused(tp, bp, dup); - (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, - dup, &dummy); + xfs_dir2_data_freeinsert(hdr, dup, &dummy); offset += be16_to_cpu(dup->length); continue; } /* * Copy a real entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); - dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep))); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); + dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep)); dep->namelen = sfep->namelen; memcpy(dep->name, sfep->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)block); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(tp, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> hashname(&name)); blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)block)); - offset = (int)((char *)(tagp + 1) - (char *)block); - if (++i == sfp->hdr.count) + (char *)dep - (char *)hdr)); + offset = (int)((char *)(tagp + 1) - (char *)hdr); + if (++i == sfp->count) sfep = NULL; else sfep = xfs_dir2_sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ - kmem_free(buf); + kmem_free(sfp); /* * Sort the leaf entries by hash value. */ @@ -1106,6 +1158,5 @@ xfs_dir2_sf_to_block( xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); xfs_dir2_block_log_tail(tp, bp); xfs_dir2_data_check(dp, bp); - xfs_da_buf_done(bp); return 0; } diff --git a/libxfs/xfs_dir2_data.c b/libxfs/xfs_dir2_data.c index d89b5b1f0..eb8673999 100644 --- a/libxfs/xfs_dir2_data.c +++ b/libxfs/xfs_dir2_data.c @@ -18,23 +18,21 @@ #include - -#ifdef DEBUG /* * Check the consistency of the data block. * The input can also be a block-format directory. - * Pop an assert if we find anything bad. + * Return 0 is the buffer is good, otherwise an error. */ -void -xfs_dir2_data_check( - xfs_inode_t *dp, /* incore inode pointer */ - xfs_dabuf_t *bp) /* data block's buffer */ +int +__xfs_dir2_data_check( + struct xfs_inode *dp, /* incore inode pointer */ + struct xfs_buf *bp) /* data block's buffer */ { xfs_dir2_dataptr_t addr; /* addr for leaf lookup */ xfs_dir2_data_free_t *bf; /* bestfree table */ xfs_dir2_block_tail_t *btp=NULL; /* block tail */ int count; /* count of entries found */ - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_free_t *dfp; /* bestfree entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ @@ -49,36 +47,46 @@ xfs_dir2_data_check( int stale; /* count of stale leaves */ struct xfs_name name; - mp = dp->i_mount; - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - bf = d->hdr.bestfree; - p = (char *)d->u; - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { - btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + mp = bp->b_target->bt_mount; + hdr = bp->b_addr; + bf = hdr->bestfree; + p = (char *)(hdr + 1); + + switch (be32_to_cpu(hdr->magic)) { + case XFS_DIR2_BLOCK_MAGIC: + btp = xfs_dir2_block_tail_p(mp, hdr); lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; - } else - endp = (char *)d + mp->m_dirblksize; + break; + case XFS_DIR2_DATA_MAGIC: + endp = (char *)hdr + mp->m_dirblksize; + break; + default: + XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); + return EFSCORRUPTED; + } + count = lastfree = freeseen = 0; /* * Account for zero bestfree entries. */ if (!bf[0].length) { - ASSERT(!bf[0].offset); + XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); freeseen |= 1 << 0; } if (!bf[1].length) { - ASSERT(!bf[1].offset); + XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); freeseen |= 1 << 1; } if (!bf[2].length) { - ASSERT(!bf[2].offset); + XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); freeseen |= 1 << 2; } - ASSERT(be16_to_cpu(bf[0].length) >= be16_to_cpu(bf[1].length)); - ASSERT(be16_to_cpu(bf[1].length) >= be16_to_cpu(bf[2].length)); + + XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= + be16_to_cpu(bf[1].length)); + XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= + be16_to_cpu(bf[2].length)); /* * Loop over the data/unused entries. */ @@ -90,17 +98,20 @@ xfs_dir2_data_check( * doesn't need to be there. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ASSERT(lastfree == 0); - ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)d); - dfp = xfs_dir2_data_freefind(d, dup); + XFS_WANT_CORRUPTED_RETURN(lastfree == 0); + XFS_WANT_CORRUPTED_RETURN( + be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == + (char *)dup - (char *)hdr); + dfp = xfs_dir2_data_freefind(hdr, dup); if (dfp) { i = (int)(dfp - bf); - ASSERT((freeseen & (1 << i)) == 0); + XFS_WANT_CORRUPTED_RETURN( + (freeseen & (1 << i)) == 0); freeseen |= 1 << i; } else { - ASSERT(be16_to_cpu(dup->length) <= - be16_to_cpu(bf[2].length)); + XFS_WANT_CORRUPTED_RETURN( + be16_to_cpu(dup->length) <= + be16_to_cpu(bf[2].length)); } p += be16_to_cpu(dup->length); lastfree = 1; @@ -113,16 +124,18 @@ xfs_dir2_data_check( * The linear search is crude but this is DEBUG code. */ dep = (xfs_dir2_data_entry_t *)p; - ASSERT(dep->namelen != 0); - ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); - ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == - (char *)dep - (char *)d); + XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); + XFS_WANT_CORRUPTED_RETURN( + !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); + XFS_WANT_CORRUPTED_RETURN( + be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == + (char *)dep - (char *)hdr); count++; lastfree = 0; - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)d)); + ((char *)dep - (char *)hdr)); name.name = dep->name; name.len = dep->namelen; hash = mp->m_dirnameops->hashname(&name); @@ -131,26 +144,122 @@ xfs_dir2_data_check( be32_to_cpu(lep[i].hashval) == hash) break; } - ASSERT(i < be32_to_cpu(btp->count)); + XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); } p += xfs_dir2_data_entsize(dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. */ - ASSERT(freeseen == 7); - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + XFS_WANT_CORRUPTED_RETURN(freeseen == 7); + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { - if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR) + if (lep[i].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; if (i > 0) - ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval)); + XFS_WANT_CORRUPTED_RETURN( + be32_to_cpu(lep[i].hashval) >= + be32_to_cpu(lep[i - 1].hashval)); } - ASSERT(count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); - ASSERT(stale == be32_to_cpu(btp->stale)); + XFS_WANT_CORRUPTED_RETURN(count == + be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); + XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); + } + return 0; +} + +static void +xfs_dir2_data_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir2_data_hdr *hdr = bp->b_addr; + int block_ok = 0; + + block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC); + block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; + + if (!block_ok) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +/* + * Readahead of the first block of the directory when it is opened is completely + * oblivious to the format of the directory. Hence we can either get a block + * format buffer or a data format buffer on readahead. + */ +static void +xfs_dir2_data_reada_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir2_data_hdr *hdr = bp->b_addr; + + switch (be32_to_cpu(hdr->magic)) { + case XFS_DIR2_BLOCK_MAGIC: + bp->b_ops = &xfs_dir2_block_buf_ops; + bp->b_ops->verify_read(bp); + return; + case XFS_DIR2_DATA_MAGIC: + xfs_dir2_data_verify(bp); + return; + default: + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + break; } } -#endif + +static void +xfs_dir2_data_read_verify( + struct xfs_buf *bp) +{ + xfs_dir2_data_verify(bp); +} + +static void +xfs_dir2_data_write_verify( + struct xfs_buf *bp) +{ + xfs_dir2_data_verify(bp); +} + +const struct xfs_buf_ops xfs_dir2_data_buf_ops = { + .verify_read = xfs_dir2_data_read_verify, + .verify_write = xfs_dir2_data_write_verify, +}; + +static const struct xfs_buf_ops xfs_dir2_data_reada_buf_ops = { + .verify_read = xfs_dir2_data_reada_verify, + .verify_write = xfs_dir2_data_write_verify, +}; + + +int +xfs_dir2_data_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mapped_bno, + struct xfs_buf **bpp) +{ + return xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, + XFS_DATA_FORK, &xfs_dir2_data_buf_ops); +} + +int +xfs_dir2_data_readahead( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t bno, + xfs_daddr_t mapped_bno) +{ + return xfs_da_reada_buf(tp, dp, bno, mapped_bno, + XFS_DATA_FORK, &xfs_dir2_data_reada_buf_ops); +} /* * Given a data block and an unused entry from that block, @@ -158,7 +267,7 @@ xfs_dir2_data_check( */ xfs_dir2_data_free_t * xfs_dir2_data_freefind( - xfs_dir2_data_t *d, /* data block */ + xfs_dir2_data_hdr_t *hdr, /* data block */ xfs_dir2_data_unused_t *dup) /* data unused entry */ { xfs_dir2_data_free_t *dfp; /* bestfree entry */ @@ -168,17 +277,17 @@ xfs_dir2_data_freefind( int seenzero; /* saw a 0 bestfree entry */ #endif - off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d); + off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); #if defined(DEBUG) && defined(__KERNEL__) /* * Validate some consistency in the bestfree table. * Check order, non-overlapping entries, and if we find the * one we're looking for it has to be exact. */ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0; - dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + for (dfp = &hdr->bestfree[0], seenzero = matched = 0; + dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) { ASSERT(!dfp->length); @@ -194,7 +303,7 @@ xfs_dir2_data_freefind( else ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); - if (dfp > &d->hdr.bestfree[0]) + if (dfp > &hdr->bestfree[0]) ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); } #endif @@ -203,13 +312,13 @@ xfs_dir2_data_freefind( * it can't be there since they're sorted. */ if (be16_to_cpu(dup->length) < - be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) + be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) return NULL; /* * Look at the three bestfree entries for our guy. */ - for (dfp = &d->hdr.bestfree[0]; - dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; + for (dfp = &hdr->bestfree[0]; + dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) return NULL; @@ -227,7 +336,7 @@ xfs_dir2_data_freefind( */ xfs_dir2_data_free_t * /* entry inserted */ xfs_dir2_data_freeinsert( - xfs_dir2_data_t *d, /* data block pointer */ + xfs_dir2_data_hdr_t *hdr, /* data block pointer */ xfs_dir2_data_unused_t *dup, /* unused space */ int *loghead) /* log the data header (out) */ { @@ -235,12 +344,13 @@ xfs_dir2_data_freeinsert( xfs_dir2_data_free_t new; /* new bestfree entry */ #ifdef __KERNEL__ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); #endif - dfp = d->hdr.bestfree; + dfp = hdr->bestfree; new.length = dup->length; - new.offset = cpu_to_be16((char *)dup - (char *)d); + new.offset = cpu_to_be16((char *)dup - (char *)hdr); + /* * Insert at position 0, 1, or 2; or not at all. */ @@ -270,36 +380,36 @@ xfs_dir2_data_freeinsert( */ STATIC void xfs_dir2_data_freeremove( - xfs_dir2_data_t *d, /* data block pointer */ + xfs_dir2_data_hdr_t *hdr, /* data block header */ xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ int *loghead) /* out: log data header */ { #ifdef __KERNEL__ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); #endif /* * It's the first entry, slide the next 2 up. */ - if (dfp == &d->hdr.bestfree[0]) { - d->hdr.bestfree[0] = d->hdr.bestfree[1]; - d->hdr.bestfree[1] = d->hdr.bestfree[2]; + if (dfp == &hdr->bestfree[0]) { + hdr->bestfree[0] = hdr->bestfree[1]; + hdr->bestfree[1] = hdr->bestfree[2]; } /* * It's the second entry, slide the 3rd entry up. */ - else if (dfp == &d->hdr.bestfree[1]) - d->hdr.bestfree[1] = d->hdr.bestfree[2]; + else if (dfp == &hdr->bestfree[1]) + hdr->bestfree[1] = hdr->bestfree[2]; /* * Must be the last entry. */ else - ASSERT(dfp == &d->hdr.bestfree[2]); + ASSERT(dfp == &hdr->bestfree[2]); /* * Clear the 3rd entry, must be zero now. */ - d->hdr.bestfree[2].length = 0; - d->hdr.bestfree[2].offset = 0; + hdr->bestfree[2].length = 0; + hdr->bestfree[2].offset = 0; *loghead = 1; } @@ -309,7 +419,7 @@ xfs_dir2_data_freeremove( void xfs_dir2_data_freescan( xfs_mount_t *mp, /* filesystem mount point */ - xfs_dir2_data_t *d, /* data block pointer */ + xfs_dir2_data_hdr_t *hdr, /* data block header */ int *loghead) /* out: log data header */ { xfs_dir2_block_tail_t *btp; /* block tail */ @@ -319,23 +429,23 @@ xfs_dir2_data_freescan( char *p; /* current entry pointer */ #ifdef __KERNEL__ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); #endif /* * Start by clearing the table. */ - memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree)); + memset(hdr->bestfree, 0, sizeof(hdr->bestfree)); *loghead = 1; /* * Set up pointers. */ - p = (char *)d->u; - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { - btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + p = (char *)(hdr + 1); + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { + btp = xfs_dir2_block_tail_p(mp, hdr); endp = (char *)xfs_dir2_block_leaf_p(btp); } else - endp = (char *)d + mp->m_dirblksize; + endp = (char *)hdr + mp->m_dirblksize; /* * Loop over the block's entries. */ @@ -345,9 +455,9 @@ xfs_dir2_data_freescan( * If it's a free entry, insert it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ASSERT((char *)dup - (char *)d == + ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); - xfs_dir2_data_freeinsert(d, dup, loghead); + xfs_dir2_data_freeinsert(hdr, dup, loghead); p += be16_to_cpu(dup->length); } /* @@ -355,7 +465,7 @@ xfs_dir2_data_freescan( */ else { dep = (xfs_dir2_data_entry_t *)p; - ASSERT((char *)dep - (char *)d == + ASSERT((char *)dep - (char *)hdr == be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep))); p += xfs_dir2_data_entsize(dep->namelen); } @@ -370,10 +480,10 @@ int /* error */ xfs_dir2_data_init( xfs_da_args_t *args, /* directory operation args */ xfs_dir2_db_t blkno, /* logical dir block number */ - xfs_dabuf_t **bpp) /* output block buffer */ + struct xfs_buf **bpp) /* output block buffer */ { - xfs_dabuf_t *bp; /* block buffer */ - xfs_dir2_data_t *d; /* pointer to block */ + struct xfs_buf *bp; /* block buffer */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused entry pointer */ int error; /* error return value */ @@ -390,30 +500,31 @@ xfs_dir2_data_init( */ error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp, XFS_DATA_FORK); - if (error) { + if (error) return error; - } - ASSERT(bp != NULL); + bp->b_ops = &xfs_dir2_data_buf_ops; + /* * Initialize the header. */ - d = bp->data; - d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr)); + hdr = bp->b_addr; + hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr)); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { - d->hdr.bestfree[i].length = 0; - d->hdr.bestfree[i].offset = 0; + hdr->bestfree[i].length = 0; + hdr->bestfree[i].offset = 0; } + /* * Set up an unused entry for the block's body. */ - dup = &d->u[0].unused; + dup = (xfs_dir2_data_unused_t *)(hdr + 1); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - t=mp->m_dirblksize - (uint)sizeof(d->hdr); - d->hdr.bestfree[0].length = cpu_to_be16(t); + t = mp->m_dirblksize - (uint)sizeof(*hdr); + hdr->bestfree[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); - *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d); + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); /* * Log it and return it. */ @@ -428,18 +539,18 @@ xfs_dir2_data_init( */ void xfs_dir2_data_log_entry( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d), + xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - - (char *)d - 1)); + (char *)hdr - 1)); } /* @@ -447,16 +558,15 @@ xfs_dir2_data_log_entry( */ void xfs_dir2_data_log_header( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp) /* block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp) { - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d), - (uint)(sizeof(d->hdr) - 1)); + xfs_trans_log_buf(tp, bp, 0, sizeof(*hdr) - 1); } /* @@ -464,27 +574,27 @@ xfs_dir2_data_log_header( */ void xfs_dir2_data_log_unused( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, xfs_dir2_data_unused_t *dup) /* data unused pointer */ { - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr = bp->b_addr; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); /* * Log the first part of the unused entry. */ - xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d), + xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), (uint)((char *)&dup->length + sizeof(dup->length) - - 1 - (char *)d)); + 1 - (char *)hdr)); /* * Log the end (tag) of the unused entry. */ - xfs_da_log_buf(tp, bp, - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d), - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d + + xfs_trans_log_buf(tp, bp, + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + sizeof(xfs_dir2_data_off_t) - 1)); } @@ -494,14 +604,14 @@ xfs_dir2_data_log_unused( */ void xfs_dir2_data_make_free( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, /* starting byte offset */ xfs_dir2_data_aoff_t len, /* length in bytes */ int *needlogp, /* out: log header */ int *needscanp) /* out: regen bestfree */ { - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ char *endptr; /* end of data area */ xfs_mount_t *mp; /* filesystem mount point */ @@ -511,28 +621,29 @@ xfs_dir2_data_make_free( xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ mp = tp->t_mountp; - d = bp->data; + hdr = bp->b_addr; + /* * Figure out where the end of the data area is. */ - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC) - endptr = (char *)d + mp->m_dirblksize; + if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)) + endptr = (char *)hdr + mp->m_dirblksize; else { xfs_dir2_block_tail_t *btp; /* block tail */ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + btp = xfs_dir2_block_tail_p(mp, hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > sizeof(d->hdr)) { + if (offset > sizeof(*hdr)) { __be16 *tagp; /* tag just before us */ - tagp = (__be16 *)((char *)d + offset) - 1; - prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp)); + tagp = (__be16 *)((char *)hdr + offset) - 1; + prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) prevdup = NULL; } else @@ -541,9 +652,9 @@ xfs_dir2_data_make_free( * If this isn't the end of the block, see if the entry after * us is free. */ - if ((char *)d + offset + len < endptr) { + if ((char *)hdr + offset + len < endptr) { postdup = - (xfs_dir2_data_unused_t *)((char *)d + offset + len); + (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) postdup = NULL; } else @@ -560,21 +671,21 @@ xfs_dir2_data_make_free( /* * See if prevdup and/or postdup are in bestfree table. */ - dfp = xfs_dir2_data_freefind(d, prevdup); - dfp2 = xfs_dir2_data_freefind(d, postdup); + dfp = xfs_dir2_data_freefind(hdr, prevdup); + dfp2 = xfs_dir2_data_freefind(hdr, postdup); /* * We need a rescan unless there are exactly 2 free entries * namely our two. Then we know what's happening, otherwise * since the third bestfree is there, there might be more * entries. */ - needscan = (d->hdr.bestfree[2].length != 0); + needscan = (hdr->bestfree[2].length != 0); /* * Fix up the new big freespace. */ be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)d); + cpu_to_be16((char *)prevdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, prevdup); if (!needscan) { /* @@ -584,18 +695,18 @@ xfs_dir2_data_make_free( * Remove entry 1 first then entry 0. */ ASSERT(dfp && dfp2); - if (dfp == &d->hdr.bestfree[1]) { - dfp = &d->hdr.bestfree[0]; + if (dfp == &hdr->bestfree[1]) { + dfp = &hdr->bestfree[0]; ASSERT(dfp2 == dfp); - dfp2 = &d->hdr.bestfree[1]; + dfp2 = &hdr->bestfree[1]; } - xfs_dir2_data_freeremove(d, dfp2, needlogp); - xfs_dir2_data_freeremove(d, dfp, needlogp); + xfs_dir2_data_freeremove(hdr, dfp2, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); /* * Now insert the new entry. */ - dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp); - ASSERT(dfp == &d->hdr.bestfree[0]); + dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); + ASSERT(dfp == &hdr->bestfree[0]); ASSERT(dfp->length == prevdup->length); ASSERT(!dfp[1].length); ASSERT(!dfp[2].length); @@ -605,10 +716,10 @@ xfs_dir2_data_make_free( * The entry before us is free, merge with it. */ else if (prevdup) { - dfp = xfs_dir2_data_freefind(d, prevdup); + dfp = xfs_dir2_data_freefind(hdr, prevdup); be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)d); + cpu_to_be16((char *)prevdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, prevdup); /* * If the previous entry was in the table, the new entry @@ -616,27 +727,27 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(prevdup->length) > - be16_to_cpu(d->hdr.bestfree[2].length); + be16_to_cpu(hdr->bestfree[2].length); } } /* * The following entry is free, merge with it. */ else if (postdup) { - dfp = xfs_dir2_data_freefind(d, postdup); - newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); + dfp = xfs_dir2_data_freefind(hdr, postdup); + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If the following entry was in the table, the new entry @@ -644,28 +755,28 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(newdup->length) > - be16_to_cpu(d->hdr.bestfree[2].length); + be16_to_cpu(hdr->bestfree[2].length); } } /* * Neither neighbor is free. Make a new entry. */ else { - newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); - (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); } *needscanp = needscan; } @@ -675,15 +786,15 @@ xfs_dir2_data_make_free( */ void xfs_dir2_data_use_free( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* data block buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, xfs_dir2_data_unused_t *dup, /* unused entry */ xfs_dir2_data_aoff_t offset, /* starting offset to use */ xfs_dir2_data_aoff_t len, /* length to use */ int *needlogp, /* out: need to log header */ int *needscanp) /* out: need regen bestfree */ { - xfs_dir2_data_t *d; /* data block */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ int matchback; /* matches end of freespace */ int matchfront; /* matches start of freespace */ @@ -692,24 +803,24 @@ xfs_dir2_data_use_free( xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ int oldlen; /* old unused entry's length */ - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + hdr = bp->b_addr; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); - ASSERT(offset >= (char *)dup - (char *)d); - ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d); - ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); + ASSERT(offset >= (char *)dup - (char *)hdr); + ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); + ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); /* * Look up the entry in the bestfree table. */ - dfp = xfs_dir2_data_freefind(d, dup); + dfp = xfs_dir2_data_freefind(hdr, dup); oldlen = be16_to_cpu(dup->length); - ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length)); + ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length)); /* * Check for alignment with front and back of the entry. */ - matchfront = (char *)dup - (char *)d == offset; - matchback = (char *)dup + oldlen - (char *)d == offset + len; + matchfront = (char *)dup - (char *)hdr == offset; + matchback = (char *)dup + oldlen - (char *)hdr == offset + len; ASSERT(*needscanp == 0); needscan = 0; /* @@ -718,9 +829,9 @@ xfs_dir2_data_use_free( */ if (matchfront && matchback) { if (dfp) { - needscan = (d->hdr.bestfree[2].offset != 0); + needscan = (hdr->bestfree[2].offset != 0); if (!needscan) - xfs_dir2_data_freeremove(d, dfp, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); } } /* @@ -728,27 +839,27 @@ xfs_dir2_data_use_free( * Make a new entry with the remaining freespace. */ else if (matchfront) { - newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(oldlen - len); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ - needscan = dfp == &d->hdr.bestfree[2]; + needscan = dfp == &hdr->bestfree[2]; } } /* @@ -757,25 +868,25 @@ xfs_dir2_data_use_free( */ else if (matchback) { newdup = dup; - newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); + newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ - needscan = dfp == &d->hdr.bestfree[2]; + needscan = dfp == &hdr->bestfree[2]; } } /* @@ -784,15 +895,15 @@ xfs_dir2_data_use_free( */ else { newdup = dup; - newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); + newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); - newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); + newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); *xfs_dir2_data_unused_tag_p(newdup2) = - cpu_to_be16((char *)newdup2 - (char *)d); + cpu_to_be16((char *)newdup2 - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup2); /* * If the old entry was in the table, we need to scan @@ -803,13 +914,12 @@ xfs_dir2_data_use_free( * the 2 new will work. */ if (dfp) { - needscan = (d->hdr.bestfree[2].length != 0); + needscan = (hdr->bestfree[2].length != 0); if (!needscan) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - (void)xfs_dir2_data_freeinsert(d, newdup, - needlogp); - (void)xfs_dir2_data_freeinsert(d, newdup2, - needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup2, + needlogp); } } } diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c index 9ce2320f4..d303813c2 100644 --- a/libxfs/xfs_dir2_leaf.c +++ b/libxfs/xfs_dir2_leaf.c @@ -22,16 +22,93 @@ * Local function declarations. */ #ifdef DEBUG -static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp); +static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir2_leaf_check(dp, bp) #endif -static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp, - int *indexp, xfs_dabuf_t **dbpp); -static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, +static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, + int *indexp, struct xfs_buf **dbpp); +static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp, int first, int last); -static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); +static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); +static void +xfs_dir2_leaf_verify( + struct xfs_buf *bp, + __be16 magic) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir2_leaf_hdr *hdr = bp->b_addr; + int block_ok = 0; + + block_ok = hdr->info.magic == magic; + if (!block_ok) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +static void +xfs_dir2_leaf1_read_verify( + struct xfs_buf *bp) +{ + xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); +} + +static void +xfs_dir2_leaf1_write_verify( + struct xfs_buf *bp) +{ + xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); +} + +void +xfs_dir2_leafn_read_verify( + struct xfs_buf *bp) +{ + xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); +} + +void +xfs_dir2_leafn_write_verify( + struct xfs_buf *bp) +{ + xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); +} + +static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = { + .verify_read = xfs_dir2_leaf1_read_verify, + .verify_write = xfs_dir2_leaf1_write_verify, +}; + +const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = { + .verify_read = xfs_dir2_leafn_read_verify, + .verify_write = xfs_dir2_leafn_write_verify, +}; + +static int +xfs_dir2_leaf_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp) +{ + return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, + XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops); +} + +int +xfs_dir2_leafn_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp) +{ + return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, + XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops); +} /* * Convert a block form directory to a leaf form directory. @@ -39,16 +116,16 @@ static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); int /* error */ xfs_dir2_block_to_leaf( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *dbp) /* input block's buffer */ + struct xfs_buf *dbp) /* input block's buffer */ { __be16 *bestsp; /* leaf's bestsp entries */ xfs_dablk_t blkno; /* leaf block's bno */ - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ xfs_dir2_block_tail_t *btp; /* block's tail */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ - xfs_dabuf_t *lbp; /* leaf block's buffer */ + struct xfs_buf *lbp; /* leaf block's buffer */ xfs_dir2_db_t ldb; /* leaf block's bno */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ @@ -79,10 +156,10 @@ xfs_dir2_block_to_leaf( return error; } ASSERT(lbp != NULL); - leaf = lbp->data; - block = dbp->data; + leaf = lbp->b_addr; + hdr = dbp->b_addr; xfs_dir2_data_check(dp, dbp); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Set the counts in the leaf header. @@ -102,23 +179,24 @@ xfs_dir2_block_to_leaf( * tail be free. */ xfs_dir2_data_make_free(tp, dbp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), - (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize - + (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - (char *)blp), &needlog, &needscan); /* * Fix up the block header, make it a data block. */ - block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + dbp->b_ops = &xfs_dir2_data_buf_ops; + hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Set up leaf tail and bests table. */ ltp = xfs_dir2_leaf_tail_p(mp, leaf); ltp->bestcount = cpu_to_be32(1); bestsp = xfs_dir2_leaf_bests_p(ltp); - bestsp[0] = block->hdr.bestfree[0].length; + bestsp[0] = hdr->bestfree[0].length; /* * Log the data header and leaf bests table. */ @@ -127,10 +205,134 @@ xfs_dir2_block_to_leaf( xfs_dir2_leaf_check(dp, lbp); xfs_dir2_data_check(dp, dbp); xfs_dir2_leaf_log_bests(tp, lbp, 0, 0); - xfs_da_buf_done(lbp); return 0; } +STATIC void +xfs_dir2_leaf_find_stale( + struct xfs_dir2_leaf *leaf, + int index, + int *lowstale, + int *highstale) +{ + /* + * Find the first stale entry before our index, if any. + */ + for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { + if (leaf->ents[*lowstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + break; + } + + /* + * Find the first stale entry at or after our index, if any. + * Stop if the result would require moving more entries than using + * lowstale. + */ + for (*highstale = index; + *highstale < be16_to_cpu(leaf->hdr.count); + ++*highstale) { + if (leaf->ents[*highstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + break; + if (*lowstale >= 0 && index - *lowstale <= *highstale - index) + break; + } +} + +struct xfs_dir2_leaf_entry * +xfs_dir2_leaf_find_entry( + xfs_dir2_leaf_t *leaf, /* leaf structure */ + int index, /* leaf table position */ + int compact, /* need to compact leaves */ + int lowstale, /* index of prev stale leaf */ + int highstale, /* index of next stale leaf */ + int *lfloglow, /* low leaf logging index */ + int *lfloghigh) /* high leaf logging index */ +{ + if (!leaf->hdr.stale) { + xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ + + /* + * Now we need to make room to insert the leaf entry. + * + * If there are no stale entries, just insert a hole at index. + */ + lep = &leaf->ents[index]; + if (index < be16_to_cpu(leaf->hdr.count)) + memmove(lep + 1, lep, + (be16_to_cpu(leaf->hdr.count) - index) * + sizeof(*lep)); + + /* + * Record low and high logging indices for the leaf. + */ + *lfloglow = index; + *lfloghigh = be16_to_cpu(leaf->hdr.count); + be16_add_cpu(&leaf->hdr.count, 1); + return lep; + } + + /* + * There are stale entries. + * + * We will use one of them for the new entry. It's probably not at + * the right location, so we'll have to shift some up or down first. + * + * If we didn't compact before, we need to find the nearest stale + * entries before and after our insertion point. + */ + if (compact == 0) + xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); + + /* + * If the low one is better, use it. + */ + if (lowstale >= 0 && + (highstale == be16_to_cpu(leaf->hdr.count) || + index - lowstale - 1 < highstale - index)) { + ASSERT(index - lowstale - 1 >= 0); + ASSERT(leaf->ents[lowstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); + + /* + * Copy entries up to cover the stale entry and make room + * for the new entry. + */ + if (index - lowstale - 1 > 0) { + memmove(&leaf->ents[lowstale], + &leaf->ents[lowstale + 1], + (index - lowstale - 1) * + sizeof(xfs_dir2_leaf_entry_t)); + } + *lfloglow = MIN(lowstale, *lfloglow); + *lfloghigh = MAX(index - 1, *lfloghigh); + be16_add_cpu(&leaf->hdr.stale, -1); + return &leaf->ents[index - 1]; + } + + /* + * The high one is better, so use that one. + */ + ASSERT(highstale - index >= 0); + ASSERT(leaf->ents[highstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); + + /* + * Copy entries down to cover the stale entry and make room for the + * new entry. + */ + if (highstale - index > 0) { + memmove(&leaf->ents[index + 1], + &leaf->ents[index], + (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); + } + *lfloglow = MIN(index, *lfloglow); + *lfloghigh = MAX(highstale, *lfloghigh); + be16_add_cpu(&leaf->hdr.stale, -1); + return &leaf->ents[index]; +} + /* * Add an entry to a leaf form directory. */ @@ -140,8 +342,8 @@ xfs_dir2_leaf_addname( { __be16 *bestsp; /* freespace table in leaf */ int compact; /* need to compact leaves */ - xfs_dir2_data_t *data; /* data block structure */ - xfs_dabuf_t *dbp; /* data block buffer */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* data unused entry */ @@ -150,7 +352,7 @@ xfs_dir2_leaf_addname( int highstale; /* index of next stale leaf */ int i; /* temporary, index */ int index; /* leaf table position */ - xfs_dabuf_t *lbp; /* leaf's buffer */ + struct xfs_buf *lbp; /* leaf's buffer */ xfs_dir2_leaf_t *leaf; /* leaf structure */ int length; /* length of new entry */ xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ @@ -171,15 +373,11 @@ xfs_dir2_leaf_addname( dp = args->dp; tp = args->trans; mp = dp->i_mount; - /* - * Read the leaf block. - */ - error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, - XFS_DATA_FORK); - if (error) { + + error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); + if (error) return error; - } - ASSERT(lbp != NULL); + /* * Look up the entry by hash value and name. * We know it's not there, our caller has already done a lookup. @@ -187,7 +385,7 @@ xfs_dir2_leaf_addname( * But if there are dup hash values the index is of the first of those. */ index = xfs_dir2_leaf_search_hash(args, lbp); - leaf = lbp->data; + leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); length = xfs_dir2_data_entsize(args->namelen); @@ -204,7 +402,7 @@ xfs_dir2_leaf_addname( continue; i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); ASSERT(i < be32_to_cpu(ltp->bestcount)); - ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF); + ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); if (be16_to_cpu(bestsp[i]) >= length) { use_block = i; break; @@ -218,7 +416,8 @@ xfs_dir2_leaf_addname( /* * Remember a block we see that's missing. */ - if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1) + if (bestsp[i] == cpu_to_be16(NULLDATAOFF) && + use_block == -1) use_block = i; else if (be16_to_cpu(bestsp[i]) >= length) { use_block = i; @@ -229,14 +428,17 @@ xfs_dir2_leaf_addname( /* * How many bytes do we need in the leaf block? */ - needbytes = - (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) + - (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0])); + needbytes = 0; + if (!leaf->hdr.stale) + needbytes += sizeof(xfs_dir2_leaf_entry_t); + if (use_block == -1) + needbytes += sizeof(xfs_dir2_data_off_t); + /* * Now kill use_block if it refers to a missing block, so we * can use it as an indication of allocation needed. */ - if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF) + if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF)) use_block = -1; /* * If we don't have enough free bytes but we can make enough @@ -257,14 +459,13 @@ xfs_dir2_leaf_addname( */ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return XFS_ERROR(ENOSPC); } /* * Convert to node form. */ error = xfs_dir2_leaf_to_node(args, lbp); - xfs_da_buf_done(lbp); if (error) return error; /* @@ -282,7 +483,7 @@ xfs_dir2_leaf_addname( * a new data block. */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; } /* @@ -290,7 +491,7 @@ xfs_dir2_leaf_addname( * changed anything. */ if (args->total == 0 && use_block == -1) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return XFS_ERROR(ENOSPC); } /* @@ -321,14 +522,14 @@ xfs_dir2_leaf_addname( */ if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &use_block))) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } /* * Initialize the block. */ if ((error = xfs_dir2_data_init(args, use_block, &dbp))) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } /* @@ -348,37 +549,36 @@ xfs_dir2_leaf_addname( */ else xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); - data = dbp->data; - bestsp[use_block] = data->hdr.bestfree[0].length; + hdr = dbp->b_addr; + bestsp[use_block] = hdr->bestfree[0].length; grown = 1; - } - /* - * Already had space in some data block. - * Just read that one in. - */ - else { - if ((error = - xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block), - -1, &dbp, XFS_DATA_FORK))) { - xfs_da_brelse(tp, lbp); + } else { + /* + * Already had space in some data block. + * Just read that one in. + */ + error = xfs_dir2_data_read(tp, dp, + xfs_dir2_db_to_da(mp, use_block), + -1, &dbp); + if (error) { + xfs_trans_brelse(tp, lbp); return error; } - data = dbp->data; + hdr = dbp->b_addr; grown = 0; } - xfs_dir2_data_check(dp, dbp); /* * Point to the biggest freespace in our data block. */ dup = (xfs_dir2_data_unused_t *) - ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); + ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); ASSERT(be16_to_cpu(dup->length) >= length); needscan = needlog = 0; /* * Mark the initial part of our freespace in use for the new entry. */ xfs_dir2_data_use_free(tp, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* * Initialize our new entry (at last). @@ -388,12 +588,12 @@ xfs_dir2_leaf_addname( dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)data); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Need to scan fix up the bestfree table. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Need to log the data block's header. */ @@ -404,107 +604,15 @@ xfs_dir2_leaf_addname( * If the bests table needs to be changed, do it. * Log the change unless we've already done that. */ - if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) { - bestsp[use_block] = data->hdr.bestfree[0].length; + if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) { + bestsp[use_block] = hdr->bestfree[0].length; if (!grown) xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); } - /* - * Now we need to make room to insert the leaf entry. - * If there are no stale entries, we just insert a hole at index. - */ - if (!leaf->hdr.stale) { - /* - * lep is still good as the index leaf entry. - */ - if (index < be16_to_cpu(leaf->hdr.count)) - memmove(lep + 1, lep, - (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); - /* - * Record low and high logging indices for the leaf. - */ - lfloglow = index; - lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add_cpu(&leaf->hdr.count, 1); - } - /* - * There are stale entries. - * We will use one of them for the new entry. - * It's probably not at the right location, so we'll have to - * shift some up or down first. - */ - else { - /* - * If we didn't compact before, we need to find the nearest - * stale entries before and after our insertion point. - */ - if (compact == 0) { - /* - * Find the first stale entry before the insertion - * point, if any. - */ - for (lowstale = index - 1; - lowstale >= 0 && - be32_to_cpu(leaf->ents[lowstale].address) != - XFS_DIR2_NULL_DATAPTR; - lowstale--) - continue; - /* - * Find the next stale entry at or after the insertion - * point, if any. Stop if we go so far that the - * lowstale entry would be better. - */ - for (highstale = index; - highstale < be16_to_cpu(leaf->hdr.count) && - be32_to_cpu(leaf->ents[highstale].address) != - XFS_DIR2_NULL_DATAPTR && - (lowstale < 0 || - index - lowstale - 1 >= highstale - index); - highstale++) - continue; - } - /* - * If the low one is better, use it. - */ - if (lowstale >= 0 && - (highstale == be16_to_cpu(leaf->hdr.count) || - index - lowstale - 1 < highstale - index)) { - ASSERT(index - lowstale - 1 >= 0); - ASSERT(be32_to_cpu(leaf->ents[lowstale].address) == - XFS_DIR2_NULL_DATAPTR); - /* - * Copy entries up to cover the stale entry - * and make room for the new entry. - */ - if (index - lowstale - 1 > 0) - memmove(&leaf->ents[lowstale], - &leaf->ents[lowstale + 1], - (index - lowstale - 1) * sizeof(*lep)); - lep = &leaf->ents[index - 1]; - lfloglow = MIN(lowstale, lfloglow); - lfloghigh = MAX(index - 1, lfloghigh); - } - /* - * The high one is better, so use that one. - */ - else { - ASSERT(highstale - index >= 0); - ASSERT(be32_to_cpu(leaf->ents[highstale].address) == - XFS_DIR2_NULL_DATAPTR); - /* - * Copy entries down to cover the stale entry - * and make room for the new entry. - */ - if (highstale - index > 0) - memmove(&leaf->ents[index + 1], - &leaf->ents[index], - (highstale - index) * sizeof(*lep)); - lep = &leaf->ents[index]; - lfloglow = MIN(index, lfloglow); - lfloghigh = MAX(highstale, lfloghigh); - } - be16_add_cpu(&leaf->hdr.stale, -1); - } + + lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, + highstale, &lfloglow, &lfloghigh); + /* * Fill in the new leaf entry. */ @@ -517,9 +625,7 @@ xfs_dir2_leaf_addname( xfs_dir2_leaf_log_header(tp, lbp); xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh); xfs_dir2_leaf_check(dp, lbp); - xfs_da_buf_done(lbp); xfs_dir2_data_check(dp, dbp); - xfs_da_buf_done(dbp); return 0; } @@ -530,8 +636,8 @@ xfs_dir2_leaf_addname( */ STATIC void xfs_dir2_leaf_check( - xfs_inode_t *dp, /* incore directory inode */ - xfs_dabuf_t *bp) /* leaf's buffer */ + struct xfs_inode *dp, /* incore directory inode */ + struct xfs_buf *bp) /* leaf's buffer */ { int i; /* leaf index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -539,9 +645,9 @@ xfs_dir2_leaf_check( xfs_mount_t *mp; /* filesystem mount point */ int stale; /* count of stale leaves */ - leaf = bp->data; + leaf = bp->b_addr; mp = dp->i_mount; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); /* * This value is not restrictive enough. * Should factor in the size of the bests table as well. @@ -561,7 +667,7 @@ xfs_dir2_leaf_check( if (i + 1 < be16_to_cpu(leaf->hdr.count)) ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= be32_to_cpu(leaf->ents[i + 1].hashval)); - if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); @@ -575,14 +681,14 @@ xfs_dir2_leaf_check( void xfs_dir2_leaf_compact( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *bp) /* leaf buffer */ + struct xfs_buf *bp) /* leaf buffer */ { int from; /* source leaf index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ int loglow; /* first leaf entry to log */ int to; /* target leaf index */ - leaf = bp->data; + leaf = bp->b_addr; if (!leaf->hdr.stale) { return; } @@ -590,7 +696,8 @@ xfs_dir2_leaf_compact( * Compress out the stale entries in place. */ for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) { - if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf->ents[from].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) continue; /* * Only actually copy the entries that are different. @@ -623,7 +730,7 @@ xfs_dir2_leaf_compact( */ void xfs_dir2_leaf_compact_x1( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int *indexp, /* insertion index */ int *lowstalep, /* out: stale entry before us */ int *highstalep, /* out: stale entry after us */ @@ -639,27 +746,12 @@ xfs_dir2_leaf_compact_x1( int newindex=0; /* new insertion index */ int to; /* destination copy index */ - leaf = bp->data; + leaf = bp->b_addr; ASSERT(be16_to_cpu(leaf->hdr.stale) > 1); index = *indexp; - /* - * Find the first stale entry before our index, if any. - */ - for (lowstale = index - 1; - lowstale >= 0 && - be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR; - lowstale--) - continue; - /* - * Find the first stale entry at or after our index, if any. - * Stop if the answer would be worse than lowstale. - */ - for (highstale = index; - highstale < be16_to_cpu(leaf->hdr.count) && - be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR && - (lowstale < 0 || index - lowstale > highstale - index); - highstale++) - continue; + + xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); + /* * Pick the better of lowstale and highstale. */ @@ -680,7 +772,8 @@ xfs_dir2_leaf_compact_x1( if (index == from) newindex = to; if (from != keepstale && - be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) { + leaf->ents[from].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { if (from == to) *lowlogp = to; continue; @@ -730,10 +823,10 @@ int xfs_dir2_leaf_init( xfs_da_args_t *args, /* operation arguments */ xfs_dir2_db_t bno, /* directory block number */ - xfs_dabuf_t **bpp, /* out: leaf buffer */ + struct xfs_buf **bpp, /* out: leaf buffer */ int magic) /* magic number for block */ { - xfs_dabuf_t *bp; /* leaf buffer */ + struct xfs_buf *bp; /* leaf buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -751,15 +844,14 @@ xfs_dir2_leaf_init( * Get the buffer for the block. */ error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp, - XFS_DATA_FORK); - if (error) { + XFS_DATA_FORK); + if (error) return error; - } - ASSERT(bp != NULL); - leaf = bp->data; + /* * Initialize the header. */ + leaf = bp->b_addr; leaf->hdr.info.magic = cpu_to_be16(magic); leaf->hdr.info.forw = 0; leaf->hdr.info.back = 0; @@ -772,10 +864,12 @@ xfs_dir2_leaf_init( * the block. */ if (magic == XFS_DIR2_LEAF1_MAGIC) { + bp->b_ops = &xfs_dir2_leaf1_buf_ops; ltp = xfs_dir2_leaf_tail_p(mp, leaf); ltp->bestcount = 0; xfs_dir2_leaf_log_tail(tp, bp); - } + } else + bp->b_ops = &xfs_dir2_leafn_buf_ops; *bpp = bp; return 0; } @@ -786,7 +880,7 @@ xfs_dir2_leaf_init( static void xfs_dir2_leaf_log_bests( xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int first, /* first entry to log */ int last) /* last entry to log */ { @@ -795,12 +889,12 @@ xfs_dir2_leaf_log_bests( xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); firstb = xfs_dir2_leaf_bests_p(ltp) + first; lastb = xfs_dir2_leaf_bests_p(ltp) + last; - xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), + xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); } @@ -810,7 +904,7 @@ xfs_dir2_leaf_log_bests( void xfs_dir2_leaf_log_ents( xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int first, /* first entry to log */ int last) /* last entry to log */ { @@ -818,12 +912,12 @@ xfs_dir2_leaf_log_ents( xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ xfs_dir2_leaf_t *leaf; /* leaf structure */ - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || - be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); firstlep = &leaf->ents[first]; lastlep = &leaf->ents[last]; - xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), + xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); } @@ -832,15 +926,15 @@ xfs_dir2_leaf_log_ents( */ void xfs_dir2_leaf_log_header( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp) /* leaf buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp) { xfs_dir2_leaf_t *leaf; /* leaf structure */ - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || - be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); - xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), (uint)(sizeof(leaf->hdr) - 1)); } @@ -849,18 +943,18 @@ xfs_dir2_leaf_log_header( */ STATIC void xfs_dir2_leaf_log_tail( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp) /* leaf buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp) { xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ xfs_mount_t *mp; /* filesystem mount point */ mp = tp->t_mountp; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(mp, leaf); - xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), + xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), (uint)(mp->m_dirblksize - 1)); } @@ -873,12 +967,12 @@ int xfs_dir2_leaf_lookup( xfs_da_args_t *args) /* operation arguments */ { - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ int index; /* found entry index */ - xfs_dabuf_t *lbp; /* leaf buffer */ + struct xfs_buf *lbp; /* leaf buffer */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_trans_t *tp; /* transaction pointer */ @@ -894,7 +988,7 @@ xfs_dir2_leaf_lookup( tp = args->trans; dp = args->dp; xfs_dir2_leaf_check(dp, lbp); - leaf = lbp->data; + leaf = lbp->b_addr; /* * Get to the leaf entry and contained data entry address. */ @@ -903,15 +997,15 @@ xfs_dir2_leaf_lookup( * Point to the data entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)dbp->data + + ((char *)dbp->b_addr + xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); /* * Return the found inode number & CI name if appropriate */ args->inumber = be64_to_cpu(dep->inumber); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); - xfs_da_brelse(tp, dbp); - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, dbp); + xfs_trans_brelse(tp, lbp); return XFS_ERROR(error); } @@ -924,17 +1018,17 @@ xfs_dir2_leaf_lookup( static int /* error */ xfs_dir2_leaf_lookup_int( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t **lbpp, /* out: leaf buffer */ + struct xfs_buf **lbpp, /* out: leaf buffer */ int *indexp, /* out: index in leaf block */ - xfs_dabuf_t **dbpp) /* out: data buffer */ + struct xfs_buf **dbpp) /* out: data buffer */ { xfs_dir2_db_t curdb = -1; /* current data block number */ - xfs_dabuf_t *dbp = NULL; /* data buffer */ + struct xfs_buf *dbp = NULL; /* data buffer */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ int index; /* index in leaf block */ - xfs_dabuf_t *lbp; /* leaf buffer */ + struct xfs_buf *lbp; /* leaf buffer */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_mount_t *mp; /* filesystem mount point */ @@ -946,15 +1040,13 @@ xfs_dir2_leaf_lookup_int( dp = args->dp; tp = args->trans; mp = dp->i_mount; - /* - * Read the leaf block into the buffer. - */ - error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, - XFS_DATA_FORK); + + error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); if (error) return error; + *lbpp = lbp; - leaf = lbp->data; + leaf = lbp->b_addr; xfs_dir2_leaf_check(dp, lbp); /* * Look for the first leaf entry with our hash value. @@ -982,21 +1074,20 @@ xfs_dir2_leaf_lookup_int( */ if (newdb != curdb) { if (dbp) - xfs_da_brelse(tp, dbp); - error = xfs_da_read_buf(tp, dp, - xfs_dir2_db_to_da(mp, newdb), - -1, &dbp, XFS_DATA_FORK); + xfs_trans_brelse(tp, dbp); + error = xfs_dir2_data_read(tp, dp, + xfs_dir2_db_to_da(mp, newdb), + -1, &dbp); if (error) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } - xfs_dir2_data_check(dp, dbp); curdb = newdb; } /* * Point to the data entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)dbp->data + + dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); /* * Compare name and if it's an exact match, return the index @@ -1024,12 +1115,12 @@ xfs_dir2_leaf_lookup_int( if (args->cmpresult == XFS_CMP_CASE) { ASSERT(cidb != -1); if (cidb != curdb) { - xfs_da_brelse(tp, dbp); - error = xfs_da_read_buf(tp, dp, - xfs_dir2_db_to_da(mp, cidb), - -1, &dbp, XFS_DATA_FORK); + xfs_trans_brelse(tp, dbp); + error = xfs_dir2_data_read(tp, dp, + xfs_dir2_db_to_da(mp, cidb), + -1, &dbp); if (error) { - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return error; } } @@ -1041,8 +1132,8 @@ xfs_dir2_leaf_lookup_int( */ ASSERT(cidb == -1); if (dbp) - xfs_da_brelse(tp, dbp); - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, dbp); + xfs_trans_brelse(tp, lbp); return XFS_ERROR(ENOENT); } @@ -1054,15 +1145,15 @@ xfs_dir2_leaf_removename( xfs_da_args_t *args) /* operation arguments */ { __be16 *bestsp; /* leaf block best freespace */ - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t db; /* data block number */ - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry structure */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ xfs_dir2_db_t i; /* temporary data block # */ int index; /* index into leaf entries */ - xfs_dabuf_t *lbp; /* leaf buffer */ + struct xfs_buf *lbp; /* leaf buffer */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ @@ -1083,8 +1174,8 @@ xfs_dir2_leaf_removename( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = lbp->data; - data = dbp->data; + leaf = lbp->b_addr; + hdr = dbp->b_addr; xfs_dir2_data_check(dp, dbp); /* * Point to the leaf entry, use that to point to the data entry. @@ -1092,9 +1183,9 @@ xfs_dir2_leaf_removename( lep = &leaf->ents[index]; db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); dep = (xfs_dir2_data_entry_t *) - ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); needscan = needlog = 0; - oldbest = be16_to_cpu(data->hdr.bestfree[0].length); + oldbest = be16_to_cpu(hdr->bestfree[0].length); ltp = xfs_dir2_leaf_tail_p(mp, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); ASSERT(be16_to_cpu(bestsp[db]) == oldbest); @@ -1102,7 +1193,7 @@ xfs_dir2_leaf_removename( * Mark the former data entry unused. */ xfs_dir2_data_make_free(tp, dbp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), + (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * We just mark the leaf entry stale by putting a null in it. @@ -1116,23 +1207,23 @@ xfs_dir2_leaf_removename( * log the data block header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* * If the longest freespace in the data block has changed, * put the new value in the bests table and log that. */ - if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) { - bestsp[db] = data->hdr.bestfree[0].length; + if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) { + bestsp[db] = hdr->bestfree[0].length; xfs_dir2_leaf_log_bests(tp, lbp, db, db); } xfs_dir2_data_check(dp, dbp); /* * If the data block is now empty then get rid of the data block. */ - if (be16_to_cpu(data->hdr.bestfree[0].length) == - mp->m_dirblksize - (uint)sizeof(data->hdr)) { + if (be16_to_cpu(hdr->bestfree[0].length) == + mp->m_dirblksize - (uint)sizeof(*hdr)) { ASSERT(db != mp->m_dirdatablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* @@ -1141,12 +1232,9 @@ xfs_dir2_leaf_removename( * Just go on, returning success, leaving the * empty block in place. */ - if (error == ENOSPC && args->total == 0) { - xfs_da_buf_done(dbp); + if (error == ENOSPC && args->total == 0) error = 0; - } xfs_dir2_leaf_check(dp, lbp); - xfs_da_buf_done(lbp); return error; } dbp = NULL; @@ -1159,7 +1247,7 @@ xfs_dir2_leaf_removename( * Look for the last active entry (i). */ for (i = db - 1; i > 0; i--) { - if (be16_to_cpu(bestsp[i]) != NULLDATAOFF) + if (bestsp[i] != cpu_to_be16(NULLDATAOFF)) break; } /* @@ -1177,10 +1265,9 @@ xfs_dir2_leaf_removename( /* * If the data block was not the first one, drop it. */ - else if (db != mp->m_dirdatablk && dbp != NULL) { - xfs_da_buf_done(dbp); + else if (db != mp->m_dirdatablk) dbp = NULL; - } + xfs_dir2_leaf_check(dp, lbp); /* * See if we can convert to block form. @@ -1195,12 +1282,12 @@ int /* error */ xfs_dir2_leaf_replace( xfs_da_args_t *args) /* operation arguments */ { - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ int index; /* index of leaf entry */ - xfs_dabuf_t *lbp; /* leaf buffer */ + struct xfs_buf *lbp; /* leaf buffer */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_trans_t *tp; /* transaction pointer */ @@ -1214,7 +1301,7 @@ xfs_dir2_leaf_replace( return error; } dp = args->dp; - leaf = lbp->data; + leaf = lbp->b_addr; /* * Point to the leaf entry, get data address from it. */ @@ -1223,7 +1310,7 @@ xfs_dir2_leaf_replace( * Point to the data entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)dbp->data + + ((char *)dbp->b_addr + xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); ASSERT(args->inumber != be64_to_cpu(dep->inumber)); /* @@ -1232,9 +1319,8 @@ xfs_dir2_leaf_replace( dep->inumber = cpu_to_be64(args->inumber); tp = args->trans; xfs_dir2_data_log_entry(tp, dbp, dep); - xfs_da_buf_done(dbp); xfs_dir2_leaf_check(dp, lbp); - xfs_da_brelse(tp, lbp); + xfs_trans_brelse(tp, lbp); return 0; } @@ -1246,7 +1332,7 @@ xfs_dir2_leaf_replace( int /* index value */ xfs_dir2_leaf_search_hash( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *lbp) /* leaf buffer */ + struct xfs_buf *lbp) /* leaf buffer */ { xfs_dahash_t hash=0; /* hash from this entry */ xfs_dahash_t hashwant; /* hash value looking for */ @@ -1256,7 +1342,7 @@ xfs_dir2_leaf_search_hash( xfs_dir2_leaf_entry_t *lep; /* leaf entry */ int mid=0; /* current leaf index */ - leaf = lbp->data; + leaf = lbp->b_addr; #ifndef __KERNEL__ if (!leaf->hdr.count) return 0; @@ -1299,14 +1385,11 @@ xfs_dir2_leaf_search_hash( int /* error */ xfs_dir2_leaf_trim_data( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *lbp, /* leaf buffer */ + struct xfs_buf *lbp, /* leaf buffer */ xfs_dir2_db_t db) /* data block number */ { __be16 *bestsp; /* leaf bests table */ -#ifdef DEBUG - xfs_dir2_data_t *data; /* data block structure */ -#endif - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -1320,30 +1403,30 @@ xfs_dir2_leaf_trim_data( /* * Read the offending data block. We need its buffer. */ - if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp, - XFS_DATA_FORK))) { + error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp); + if (error) return error; - } -#ifdef DEBUG - data = dbp->data; - ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); -#endif - /* this seems to be an error - * data is only valid if DEBUG is defined? - * RMC 09/08/1999 - */ - leaf = lbp->data; + leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); - ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) == - mp->m_dirblksize - (uint)sizeof(data->hdr)); + +#ifdef DEBUG +{ + struct xfs_dir2_data_hdr *hdr = dbp->b_addr; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); + ASSERT(be16_to_cpu(hdr->bestfree[0].length) == + mp->m_dirblksize - (uint)sizeof(*hdr)); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); +} +#endif + /* * Get rid of the data block. */ if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { ASSERT(error != ENOSPC); - xfs_da_brelse(tp, dbp); + xfs_trans_brelse(tp, dbp); return error; } /* @@ -1357,6 +1440,20 @@ xfs_dir2_leaf_trim_data( return 0; } +static inline size_t +xfs_dir2_leaf_size( + struct xfs_dir2_leaf_hdr *hdr, + int counts) +{ + int entries; + + entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale); + return sizeof(xfs_dir2_leaf_hdr_t) + + entries * sizeof(xfs_dir2_leaf_entry_t) + + counts * sizeof(xfs_dir2_data_off_t) + + sizeof(xfs_dir2_leaf_tail_t); +} + /* * Convert node form directory to leaf form directory. * The root of the node form dir needs to already be a LEAFN block. @@ -1369,10 +1466,10 @@ xfs_dir2_node_to_leaf( xfs_da_args_t *args; /* operation arguments */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ - xfs_dabuf_t *fbp; /* buffer for freespace block */ + struct xfs_buf *fbp; /* buffer for freespace block */ xfs_fileoff_t fo; /* freespace file offset */ xfs_dir2_free_t *free; /* freespace structure */ - xfs_dabuf_t *lbp; /* buffer for leaf block */ + struct xfs_buf *lbp; /* buffer for leaf block */ xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_mount_t *mp; /* filesystem mount point */ @@ -1426,30 +1523,28 @@ xfs_dir2_node_to_leaf( if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize) return 0; lbp = state->path.blk[0].bp; - leaf = lbp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + leaf = lbp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* * Read the freespace block. */ - if ((error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp, - XFS_DATA_FORK))) { + error = xfs_dir2_free_read(tp, dp, mp->m_dirfreeblk, &fbp); + if (error) return error; - } - free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + free = fbp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); ASSERT(!free->hdr.firstdb); + /* * Now see if the leafn and free data will fit in a leaf1. * If not, release the buffer and give up. */ - if ((uint)sizeof(leaf->hdr) + - (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) + - be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) + - (uint)sizeof(leaf->tail) > - mp->m_dirblksize) { - xfs_da_brelse(tp, fbp); + if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) > + mp->m_dirblksize) { + xfs_trans_brelse(tp, fbp); return 0; } + /* * If the leaf has any stale entries in it, compress them out. * The compact routine will log the header. @@ -1458,7 +1553,10 @@ xfs_dir2_node_to_leaf( xfs_dir2_leaf_compact(args, lbp); else xfs_dir2_leaf_log_header(tp, lbp); + + lbp->b_ops = &xfs_dir2_leaf1_buf_ops; leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC); + /* * Set up the leaf tail from the freespace block. */ @@ -1468,7 +1566,7 @@ xfs_dir2_node_to_leaf( * Set up the leaf bests table. */ memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests, - be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0])); + be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t)); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_check(dp, lbp); diff --git a/libxfs/xfs_dir2_node.c b/libxfs/xfs_dir2_node.c index e4e20d626..649f67764 100644 --- a/libxfs/xfs_dir2_node.c +++ b/libxfs/xfs_dir2_node.c @@ -21,40 +21,108 @@ /* * Function declarations. */ -static void xfs_dir2_free_log_header(xfs_trans_t *tp, xfs_dabuf_t *bp); -static int xfs_dir2_leafn_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index); +static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args, + int index); #ifdef DEBUG -static void xfs_dir2_leafn_check(xfs_inode_t *dp, xfs_dabuf_t *bp); +static void xfs_dir2_leafn_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir2_leafn_check(dp, bp) #endif -static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, xfs_dabuf_t *bp_s, - int start_s, xfs_dabuf_t *bp_d, int start_d, - int count); +static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, struct xfs_buf *bp_s, + int start_s, struct xfs_buf *bp_d, + int start_d, int count); static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, xfs_da_state_blk_t *blk2); -static int xfs_dir2_leafn_remove(xfs_da_args_t *args, xfs_dabuf_t *bp, +static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, int index, xfs_da_state_blk_t *dblk, int *rval); static int xfs_dir2_node_addname_int(xfs_da_args_t *args, xfs_da_state_blk_t *fblk); +static void +xfs_dir2_free_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir2_free_hdr *hdr = bp->b_addr; + int block_ok = 0; + + block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC); + if (!block_ok) { + XFS_CORRUPTION_ERROR("xfs_dir2_free_verify magic", + XFS_ERRLEVEL_LOW, mp, hdr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +static void +xfs_dir2_free_read_verify( + struct xfs_buf *bp) +{ + xfs_dir2_free_verify(bp); +} + +static void +xfs_dir2_free_write_verify( + struct xfs_buf *bp) +{ + xfs_dir2_free_verify(bp); +} + +static const struct xfs_buf_ops xfs_dir2_free_buf_ops = { + .verify_read = xfs_dir2_free_read_verify, + .verify_write = xfs_dir2_free_write_verify, +}; + + +static int +__xfs_dir2_free_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + xfs_daddr_t mappedbno, + struct xfs_buf **bpp) +{ + return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, + XFS_DATA_FORK, &xfs_dir2_free_buf_ops); +} + +int +xfs_dir2_free_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + struct xfs_buf **bpp) +{ + return __xfs_dir2_free_read(tp, dp, fbno, -1, bpp); +} + +static int +xfs_dir2_free_try_read( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t fbno, + struct xfs_buf **bpp) +{ + return __xfs_dir2_free_read(tp, dp, fbno, -2, bpp); +} + /* * Log entries from a freespace block. */ STATIC void xfs_dir2_free_log_bests( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp, /* freespace buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp, int first, /* first entry to log */ int last) /* last entry to log */ { xfs_dir2_free_t *free; /* freespace structure */ - free = bp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); - xfs_da_log_buf(tp, bp, + free = bp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + xfs_trans_log_buf(tp, bp, (uint)((char *)&free->bests[first] - (char *)free), (uint)((char *)&free->bests[last] - (char *)free + sizeof(free->bests[0]) - 1)); @@ -65,14 +133,14 @@ xfs_dir2_free_log_bests( */ static void xfs_dir2_free_log_header( - xfs_trans_t *tp, /* transaction pointer */ - xfs_dabuf_t *bp) /* freespace buffer */ + struct xfs_trans *tp, + struct xfs_buf *bp) { xfs_dir2_free_t *free; /* freespace structure */ - free = bp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); - xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), + free = bp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); + xfs_trans_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); } @@ -84,11 +152,11 @@ xfs_dir2_free_log_header( int /* error */ xfs_dir2_leaf_to_node( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *lbp) /* leaf buffer */ + struct xfs_buf *lbp) /* leaf buffer */ { xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ - xfs_dabuf_t *fbp; /* freespace buffer */ + struct xfs_buf *fbp; /* freespace buffer */ xfs_dir2_db_t fdb; /* freespace block number */ xfs_dir2_free_t *free; /* freespace structure */ __be16 *from; /* pointer to freespace entry */ @@ -116,13 +184,14 @@ xfs_dir2_leaf_to_node( /* * Get the buffer for the new freespace block. */ - if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp, - XFS_DATA_FORK))) { + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp, + XFS_DATA_FORK); + if (error) return error; - } - ASSERT(fbp != NULL); - free = fbp->data; - leaf = lbp->data; + fbp->b_ops = &xfs_dir2_free_buf_ops; + + free = fbp->b_addr; + leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * Initialize the freespace block header. @@ -142,14 +211,16 @@ xfs_dir2_leaf_to_node( *to = cpu_to_be16(off); } free->hdr.nused = cpu_to_be32(n); + + lbp->b_ops = &xfs_dir2_leafn_buf_ops; leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC); + /* * Log everything. */ xfs_dir2_leaf_log_header(tp, lbp); xfs_dir2_free_log_header(tp, fbp); xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1); - xfs_da_buf_done(fbp); xfs_dir2_leafn_check(dp, lbp); return 0; } @@ -160,7 +231,7 @@ xfs_dir2_leaf_to_node( */ static int /* error */ xfs_dir2_leafn_add( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int index) /* insertion pt for new entry */ { @@ -180,7 +251,7 @@ xfs_dir2_leafn_add( dp = args->dp; mp = dp->i_mount; tp = args->trans; - leaf = bp->data; + leaf = bp->b_addr; /* * Quick check just to make sure we are not going to index @@ -224,89 +295,13 @@ xfs_dir2_leafn_add( lfloglow = be16_to_cpu(leaf->hdr.count); lfloghigh = -1; } - /* - * No stale entries, just insert a space for the new entry. - */ - if (!leaf->hdr.stale) { - lep = &leaf->ents[index]; - if (index < be16_to_cpu(leaf->hdr.count)) - memmove(lep + 1, lep, - (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); - lfloglow = index; - lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add_cpu(&leaf->hdr.count, 1); - } - /* - * There are stale entries. We'll use one for the new entry. - */ - else { - /* - * If we didn't do a compact then we need to figure out - * which stale entry will be used. - */ - if (compact == 0) { - /* - * Find first stale entry before our insertion point. - */ - for (lowstale = index - 1; - lowstale >= 0 && - be32_to_cpu(leaf->ents[lowstale].address) != - XFS_DIR2_NULL_DATAPTR; - lowstale--) - continue; - /* - * Find next stale entry after insertion point. - * Stop looking if the answer would be worse than - * lowstale already found. - */ - for (highstale = index; - highstale < be16_to_cpu(leaf->hdr.count) && - be32_to_cpu(leaf->ents[highstale].address) != - XFS_DIR2_NULL_DATAPTR && - (lowstale < 0 || - index - lowstale - 1 >= highstale - index); - highstale++) - continue; - } - /* - * Using the low stale entry. - * Shift entries up toward the stale slot. - */ - if (lowstale >= 0 && - (highstale == be16_to_cpu(leaf->hdr.count) || - index - lowstale - 1 < highstale - index)) { - ASSERT(be32_to_cpu(leaf->ents[lowstale].address) == - XFS_DIR2_NULL_DATAPTR); - ASSERT(index - lowstale - 1 >= 0); - if (index - lowstale - 1 > 0) - memmove(&leaf->ents[lowstale], - &leaf->ents[lowstale + 1], - (index - lowstale - 1) * sizeof(*lep)); - lep = &leaf->ents[index - 1]; - lfloglow = MIN(lowstale, lfloglow); - lfloghigh = MAX(index - 1, lfloghigh); - } - /* - * Using the high stale entry. - * Shift entries down toward the stale slot. - */ - else { - ASSERT(be32_to_cpu(leaf->ents[highstale].address) == - XFS_DIR2_NULL_DATAPTR); - ASSERT(highstale - index >= 0); - if (highstale - index > 0) - memmove(&leaf->ents[index + 1], - &leaf->ents[index], - (highstale - index) * sizeof(*lep)); - lep = &leaf->ents[index]; - lfloglow = MIN(index, lfloglow); - lfloghigh = MAX(highstale, lfloghigh); - } - be16_add_cpu(&leaf->hdr.stale, -1); - } + /* * Insert the new entry, log everything. */ + lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, + highstale, &lfloglow, &lfloghigh); + lep->hashval = cpu_to_be32(args->hashval); lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, args->blkno, args->index)); @@ -322,24 +317,24 @@ xfs_dir2_leafn_add( */ void xfs_dir2_leafn_check( - xfs_inode_t *dp, /* incore directory inode */ - xfs_dabuf_t *bp) /* leaf buffer */ + struct xfs_inode *dp, + struct xfs_buf *bp) { int i; /* leaf index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_mount_t *mp; /* filesystem mount point */ int stale; /* count of stale leaves */ - leaf = bp->data; + leaf = bp->b_addr; mp = dp->i_mount; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { if (i + 1 < be16_to_cpu(leaf->hdr.count)) { ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= be32_to_cpu(leaf->ents[i + 1].hashval)); } - if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); @@ -352,13 +347,13 @@ xfs_dir2_leafn_check( */ xfs_dahash_t /* hash value */ xfs_dir2_leafn_lasthash( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int *count) /* count of entries in leaf */ { xfs_dir2_leaf_t *leaf; /* leaf structure */ - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); if (count) *count = be16_to_cpu(leaf->hdr.count); if (!leaf->hdr.count) @@ -372,12 +367,12 @@ xfs_dir2_leafn_lasthash( */ STATIC int xfs_dir2_leafn_lookup_for_addname( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int *indexp, /* out: leaf entry index */ xfs_da_state_t *state) /* state to fill in */ { - xfs_dabuf_t *curbp = NULL; /* current data/free buffer */ + struct xfs_buf *curbp = NULL; /* current data/free buffer */ xfs_dir2_db_t curdb = -1; /* current data block number */ xfs_dir2_db_t curfdb = -1; /* current free block number */ xfs_inode_t *dp; /* incore directory inode */ @@ -396,8 +391,8 @@ xfs_dir2_leafn_lookup_for_addname( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); #endif @@ -413,8 +408,8 @@ xfs_dir2_leafn_lookup_for_addname( /* If so, it's a free block buffer, get the block number. */ curbp = state->extrablk.bp; curfdb = state->extrablk.blkno; - free = curbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + free = curbp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); } length = xfs_dir2_data_entsize(args->namelen); /* @@ -455,20 +450,18 @@ xfs_dir2_leafn_lookup_for_addname( * If we had one before, drop it. */ if (curbp) - xfs_da_brelse(tp, curbp); - /* - * Read the free block. - */ - error = xfs_da_read_buf(tp, dp, + xfs_trans_brelse(tp, curbp); + + error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, newfdb), - -1, &curbp, XFS_DATA_FORK); + &curbp); if (error) return error; - free = curbp->data; + free = curbp->b_addr; ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT((be32_to_cpu(free->hdr.firstdb) % - XFS_DIR2_MAX_FREE_BESTS(mp)) == 0); + xfs_dir2_free_max_bests(mp)) == 0); ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb); ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) + be32_to_cpu(free->hdr.nvalid)); @@ -480,11 +473,12 @@ xfs_dir2_leafn_lookup_for_addname( /* * If it has room, return it. */ - if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { + if (unlikely(free->bests[fi] == + cpu_to_be16(NULLDATAOFF))) { XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", XFS_ERRLEVEL_LOW, mp); if (curfdb != newfdb) - xfs_da_brelse(tp, curbp); + xfs_trans_brelse(tp, curbp); return XFS_ERROR(EFSCORRUPTED); } curfdb = newfdb; @@ -519,12 +513,12 @@ out: */ STATIC int xfs_dir2_leafn_lookup_for_entry( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int *indexp, /* out: leaf entry index */ xfs_da_state_t *state) /* state to fill in */ { - xfs_dabuf_t *curbp = NULL; /* current data/free buffer */ + struct xfs_buf *curbp = NULL; /* current data/free buffer */ xfs_dir2_db_t curdb = -1; /* current data block number */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ @@ -540,8 +534,8 @@ xfs_dir2_leafn_lookup_for_entry( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); #endif @@ -585,7 +579,7 @@ xfs_dir2_leafn_lookup_for_entry( */ if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT || curdb != state->extrablk.blkno)) - xfs_da_brelse(tp, curbp); + xfs_trans_brelse(tp, curbp); /* * If needing the block that is saved with a CI match, * use it otherwise read in the new data block. @@ -595,9 +589,9 @@ xfs_dir2_leafn_lookup_for_entry( ASSERT(state->extravalid); curbp = state->extrablk.bp; } else { - error = xfs_da_read_buf(tp, dp, + error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, newdb), - -1, &curbp, XFS_DATA_FORK); + -1, &curbp); if (error) return error; } @@ -607,7 +601,7 @@ xfs_dir2_leafn_lookup_for_entry( /* * Point to the data entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)curbp->data + + dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); /* * Compare the entry and if it's an exact match, return @@ -619,7 +613,7 @@ xfs_dir2_leafn_lookup_for_entry( /* If there is a CI match block, drop it */ if (args->cmpresult != XFS_CMP_DIFFERENT && curdb != state->extrablk.blkno) - xfs_da_brelse(tp, state->extrablk.bp); + xfs_trans_brelse(tp, state->extrablk.bp); args->cmpresult = cmp; args->inumber = be64_to_cpu(dep->inumber); *indexp = index; @@ -627,8 +621,9 @@ xfs_dir2_leafn_lookup_for_entry( state->extrablk.bp = curbp; state->extrablk.blkno = curdb; state->extrablk.index = (int)((char *)dep - - (char *)curbp->data); + (char *)curbp->b_addr); state->extrablk.magic = XFS_DIR2_DATA_MAGIC; + curbp->b_ops = &xfs_dir2_data_buf_ops; if (cmp == XFS_CMP_EXACT) return XFS_ERROR(EEXIST); } @@ -643,10 +638,11 @@ xfs_dir2_leafn_lookup_for_entry( state->extrablk.index = -1; state->extrablk.blkno = curdb; state->extrablk.magic = XFS_DIR2_DATA_MAGIC; + curbp->b_ops = &xfs_dir2_data_buf_ops; } else { /* If the curbp is not the CI match block, drop it */ if (state->extrablk.bp != curbp) - xfs_da_brelse(tp, curbp); + xfs_trans_brelse(tp, curbp); } } else { state->extravalid = 0; @@ -662,7 +658,7 @@ xfs_dir2_leafn_lookup_for_entry( */ int xfs_dir2_leafn_lookup_int( - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int *indexp, /* out: leaf entry index */ xfs_da_state_t *state) /* state to fill in */ @@ -680,9 +676,9 @@ xfs_dir2_leafn_lookup_int( static void xfs_dir2_leafn_moveents( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *bp_s, /* source leaf buffer */ + struct xfs_buf *bp_s, /* source leaf buffer */ int start_s, /* source leaf index */ - xfs_dabuf_t *bp_d, /* destination leaf buffer */ + struct xfs_buf *bp_d, /* destination leaf buffer */ int start_d, /* destination leaf index */ int count) /* count of leaves to copy */ { @@ -700,8 +696,8 @@ xfs_dir2_leafn_moveents( return; } tp = args->trans; - leaf_s = bp_s->data; - leaf_d = bp_d->data; + leaf_s = bp_s->b_addr; + leaf_d = bp_d->b_addr; /* * If the destination index is not the end of the current * destination leaf entries, open up a hole in the destination @@ -722,7 +718,8 @@ xfs_dir2_leafn_moveents( int i; /* temp leaf index */ for (i = start_s, stale = 0; i < start_s + count; i++) { - if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf_s->ents[i].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } } else @@ -761,16 +758,16 @@ xfs_dir2_leafn_moveents( */ int /* sort order */ xfs_dir2_leafn_order( - xfs_dabuf_t *leaf1_bp, /* leaf1 buffer */ - xfs_dabuf_t *leaf2_bp) /* leaf2 buffer */ + struct xfs_buf *leaf1_bp, /* leaf1 buffer */ + struct xfs_buf *leaf2_bp) /* leaf2 buffer */ { xfs_dir2_leaf_t *leaf1; /* leaf1 structure */ xfs_dir2_leaf_t *leaf2; /* leaf2 structure */ - leaf1 = leaf1_bp->data; - leaf2 = leaf2_bp->data; - ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); - ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + leaf1 = leaf1_bp->b_addr; + leaf2 = leaf2_bp->b_addr; + ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); if (be16_to_cpu(leaf1->hdr.count) > 0 && be16_to_cpu(leaf2->hdr.count) > 0 && (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) || @@ -816,8 +813,8 @@ xfs_dir2_leafn_rebalance( blk1 = blk2; blk2 = tmp; } - leaf1 = blk1->bp->data; - leaf2 = blk2->bp->data; + leaf1 = blk1->bp->b_addr; + leaf2 = blk2->bp->b_addr; oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count); #ifdef DEBUG oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale); @@ -879,11 +876,81 @@ xfs_dir2_leafn_rebalance( if(blk2->index < 0) { state->inleaf = 1; blk2->index = 0; - cmn_err(CE_ALERT, - "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: " - "blk1->index %d\n", - blk1->index); + xfs_alert(args->dp->i_mount, + "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n", + __func__, blk1->index); + } +} + +static int +xfs_dir2_data_block_free( + xfs_da_args_t *args, + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_free *free, + xfs_dir2_db_t fdb, + int findex, + struct xfs_buf *fbp, + int longest) +{ + struct xfs_trans *tp = args->trans; + int logfree = 0; + + if (!hdr) { + /* One less used entry in the free table. */ + be32_add_cpu(&free->hdr.nused, -1); + xfs_dir2_free_log_header(tp, fbp); + + /* + * If this was the last entry in the table, we can trim the + * table size back. There might be other entries at the end + * referring to non-existent data blocks, get those too. + */ + if (findex == be32_to_cpu(free->hdr.nvalid) - 1) { + int i; /* free entry index */ + + for (i = findex - 1; i >= 0; i--) { + if (free->bests[i] != cpu_to_be16(NULLDATAOFF)) + break; + } + free->hdr.nvalid = cpu_to_be32(i + 1); + logfree = 0; + } else { + /* Not the last entry, just punch it out. */ + free->bests[findex] = cpu_to_be16(NULLDATAOFF); + logfree = 1; + } + /* + * If there are no useful entries left in the block, + * get rid of the block if we can. + */ + if (!free->hdr.nused) { + int error; + + error = xfs_dir2_shrink_inode(args, fdb, fbp); + if (error == 0) { + fbp = NULL; + logfree = 0; + } else if (error != ENOSPC || args->total != 0) + return error; + /* + * It's possible to get ENOSPC if there is no + * space reservation. In this case some one + * else will eventually get rid of this block. + */ + } + } else { + /* + * Data block is not empty, just set the free entry to the new + * value. + */ + free->bests[findex] = cpu_to_be16(longest); + logfree = 1; } + + /* Log the free entry that changed, unless we got rid of it. */ + if (logfree) + xfs_dir2_free_log_bests(tp, fbp, findex, findex); + return 0; } /* @@ -894,14 +961,14 @@ xfs_dir2_leafn_rebalance( static int /* error */ xfs_dir2_leafn_remove( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *bp, /* leaf buffer */ + struct xfs_buf *bp, /* leaf buffer */ int index, /* leaf entry index */ xfs_da_state_blk_t *dblk, /* data block */ int *rval) /* resulting block needs join */ { - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t db; /* data block number */ - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -918,8 +985,8 @@ xfs_dir2_leafn_remove( dp = args->dp; tp = args->trans; mp = dp->i_mount; - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* * Point to the entry we're removing. */ @@ -944,9 +1011,9 @@ xfs_dir2_leafn_remove( * in the data block in case it changes. */ dbp = dblk->bp; - data = dbp->data; - dep = (xfs_dir2_data_entry_t *)((char *)data + off); - longest = be16_to_cpu(data->hdr.bestfree[0].length); + hdr = dbp->b_addr; + dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); + longest = be16_to_cpu(hdr->bestfree[0].length); needlog = needscan = 0; xfs_dir2_data_make_free(tp, dbp, off, xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); @@ -955,7 +1022,7 @@ xfs_dir2_leafn_remove( * Log the data block header if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); xfs_dir2_data_check(dp, dbp); @@ -963,126 +1030,63 @@ xfs_dir2_leafn_remove( * If the longest data block freespace changes, need to update * the corresponding freeblock entry. */ - if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) { + if (longest < be16_to_cpu(hdr->bestfree[0].length)) { int error; /* error return value */ - xfs_dabuf_t *fbp; /* freeblock buffer */ + struct xfs_buf *fbp; /* freeblock buffer */ xfs_dir2_db_t fdb; /* freeblock block number */ int findex; /* index in freeblock entries */ xfs_dir2_free_t *free; /* freeblock structure */ - int logfree; /* need to log free entry */ /* * Convert the data block number to a free block, * read in the free block. */ fdb = xfs_dir2_db_to_fdb(mp, db); - if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), - -1, &fbp, XFS_DATA_FORK))) { + error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb), + &fbp); + if (error) return error; - } - free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + free = fbp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); ASSERT(be32_to_cpu(free->hdr.firstdb) == - XFS_DIR2_MAX_FREE_BESTS(mp) * + xfs_dir2_free_max_bests(mp) * (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); /* * Calculate which entry we need to fix. */ findex = xfs_dir2_db_to_fdindex(mp, db); - longest = be16_to_cpu(data->hdr.bestfree[0].length); + longest = be16_to_cpu(hdr->bestfree[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ - if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) { + if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) { /* * Try to punch out the data block. */ error = xfs_dir2_shrink_inode(args, db, dbp); if (error == 0) { dblk->bp = NULL; - data = NULL; + hdr = NULL; } /* * We can get ENOSPC if there's no space reservation. * In this case just drop the buffer and some one else * will eventually get rid of the empty block. */ - else if (error == ENOSPC && args->total == 0) - xfs_da_buf_done(dbp); - else + else if (!(error == ENOSPC && args->total == 0)) return error; } /* * If we got rid of the data block, we can eliminate that entry * in the free block. */ - if (data == NULL) { - /* - * One less used entry in the free table. - */ - be32_add_cpu(&free->hdr.nused, -1); - xfs_dir2_free_log_header(tp, fbp); - /* - * If this was the last entry in the table, we can - * trim the table size back. There might be other - * entries at the end referring to non-existent - * data blocks, get those too. - */ - if (findex == be32_to_cpu(free->hdr.nvalid) - 1) { - int i; /* free entry index */ - - for (i = findex - 1; - i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF; - i--) - continue; - free->hdr.nvalid = cpu_to_be32(i + 1); - logfree = 0; - } - /* - * Not the last entry, just punch it out. - */ - else { - free->bests[findex] = cpu_to_be16(NULLDATAOFF); - logfree = 1; - } - /* - * If there are no useful entries left in the block, - * get rid of the block if we can. - */ - if (!free->hdr.nused) { - error = xfs_dir2_shrink_inode(args, fdb, fbp); - if (error == 0) { - fbp = NULL; - logfree = 0; - } else if (error != ENOSPC || args->total != 0) - return error; - /* - * It's possible to get ENOSPC if there is no - * space reservation. In this case some one - * else will eventually get rid of this block. - */ - } - } - /* - * Data block is not empty, just set the free entry to - * the new value. - */ - else { - free->bests[findex] = cpu_to_be16(longest); - logfree = 1; - } - /* - * Log the free entry that changed, unless we got rid of it. - */ - if (logfree) - xfs_dir2_free_log_bests(tp, fbp, findex, findex); - /* - * Drop the buffer if we still have it. - */ - if (fbp) - xfs_da_buf_done(fbp); + error = xfs_dir2_data_block_free(args, hdr, free, + fdb, findex, fbp, longest); + if (error) + return error; } + xfs_dir2_leafn_check(dp, bp); /* * Return indication of whether this leaf block is empty enough @@ -1173,7 +1177,7 @@ xfs_dir2_leafn_toosmall( { xfs_da_state_blk_t *blk; /* leaf block */ xfs_dablk_t blkno; /* leaf block number */ - xfs_dabuf_t *bp; /* leaf buffer */ + struct xfs_buf *bp; /* leaf buffer */ int bytes; /* bytes in use */ int count; /* leaf live entry count */ int error; /* error return value */ @@ -1189,8 +1193,8 @@ xfs_dir2_leafn_toosmall( * to coalesce with a sibling. */ blk = &state->path.blk[state->path.active - 1]; - info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC); + info = blk->bp->b_addr; + ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); leaf = (xfs_dir2_leaf_t *)info; count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]); @@ -1236,20 +1240,19 @@ xfs_dir2_leafn_toosmall( /* * Read the sibling leaf block. */ - if ((error = - xfs_da_read_buf(state->args->trans, state->args->dp, blkno, - -1, &bp, XFS_DATA_FORK))) { + error = xfs_dir2_leafn_read(state->args->trans, state->args->dp, + blkno, -1, &bp); + if (error) return error; - } - ASSERT(bp != NULL); + /* * Count bytes in the two blocks combined. */ leaf = (xfs_dir2_leaf_t *)info; count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = state->blocksize - (state->blocksize >> 2); - leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + leaf = bp->b_addr; + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes -= count * (uint)sizeof(leaf->ents[0]); /* @@ -1257,7 +1260,7 @@ xfs_dir2_leafn_toosmall( */ if (bytes >= 0) break; - xfs_da_brelse(state->args->trans, bp); + xfs_trans_brelse(state->args->trans, bp); } /* * Didn't like either block, give up. @@ -1266,11 +1269,7 @@ xfs_dir2_leafn_toosmall( *action = 0; return 0; } - /* - * Done with the sibling leaf block here, drop the dabuf - * so path_shift can get it. - */ - xfs_da_buf_done(bp); + /* * Make altpath point to the block we want to keep (the lower * numbered block) and path point to the block we want to drop. @@ -1306,10 +1305,10 @@ xfs_dir2_leafn_unbalance( args = state->args; ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); - drop_leaf = drop_blk->bp->data; - save_leaf = save_blk->bp->data; - ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); - ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + drop_leaf = drop_blk->bp->b_addr; + save_leaf = save_blk->bp->b_addr; + ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* * If there are any stale leaf entries, take this opportunity * to purge them. @@ -1413,15 +1412,15 @@ xfs_dir2_node_addname_int( xfs_da_args_t *args, /* operation arguments */ xfs_da_state_blk_t *fblk) /* optional freespace block */ { - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t dbno; /* data block number */ - xfs_dabuf_t *dbp; /* data block buffer */ + struct xfs_buf *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ int error; /* error return value */ xfs_dir2_db_t fbno; /* freespace block number */ - xfs_dabuf_t *fbp; /* freespace buffer */ + struct xfs_buf *fbp; /* freespace buffer */ int findex; /* freespace entry index */ xfs_dir2_free_t *free=NULL; /* freespace block structure */ xfs_dir2_db_t ifbno; /* initial freespace block no */ @@ -1449,8 +1448,8 @@ xfs_dir2_node_addname_int( * Remember initial freespace block number. */ ifbno = fblk->blkno; - free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + free = fbp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); findex = fblk->index; /* * This means the free entry showed that the data block had @@ -1525,16 +1524,15 @@ xfs_dir2_node_addname_int( * This should be really rare, so there's no reason * to avoid it. */ - if ((error = xfs_da_read_buf(tp, dp, - xfs_dir2_db_to_da(mp, fbno), -2, &fbp, - XFS_DATA_FORK))) { + error = xfs_dir2_free_try_read(tp, dp, + xfs_dir2_db_to_da(mp, fbno), + &fbp); + if (error) return error; - } - if (unlikely(fbp == NULL)) { + if (!fbp) continue; - } - free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + free = fbp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); findex = 0; } /* @@ -1551,7 +1549,7 @@ xfs_dir2_node_addname_int( /* * Drop the block. */ - xfs_da_brelse(tp, fbp); + xfs_trans_brelse(tp, fbp); fbp = NULL; if (fblk && fblk->bp) fblk->bp = NULL; @@ -1566,36 +1564,23 @@ xfs_dir2_node_addname_int( /* * Not allowed to allocate, return failure. */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || - args->total == 0) { - /* - * Drop the freespace buffer unless it came from our - * caller. - */ - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) return XFS_ERROR(ENOSPC); - } + /* * Allocate and initialize the new data block. */ if (unlikely((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &dbno)) || - (error = xfs_dir2_data_init(args, dbno, &dbp)))) { - /* - * Drop the freespace buffer unless it came from our - * caller. - */ - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); + (error = xfs_dir2_data_init(args, dbno, &dbp)))) return error; - } + /* * If (somehow) we have a freespace block, get rid of it. */ if (fbp) - xfs_da_brelse(tp, fbp); + xfs_trans_brelse(tp, fbp); if (fblk && fblk->bp) fblk->bp = NULL; @@ -1604,12 +1589,12 @@ xfs_dir2_node_addname_int( * that was just allocated. */ fbno = xfs_dir2_db_to_fdb(mp, dbno); - if (unlikely(error = xfs_da_read_buf(tp, dp, - xfs_dir2_db_to_da(mp, fbno), -2, &fbp, - XFS_DATA_FORK))) { - xfs_da_buf_done(dbp); + error = xfs_dir2_free_try_read(tp, dp, + xfs_dir2_db_to_da(mp, fbno), + &fbp); + if (error) return error; - } + /* * If there wasn't a freespace block, the read will * return a NULL fbp. Allocate and initialize a new one. @@ -1621,26 +1606,22 @@ xfs_dir2_node_addname_int( } if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { - cmn_err(CE_ALERT, - "xfs_dir2_node_addname_int: dir ino " - "%llu needed freesp block %lld for\n" - " data block %lld, got %lld\n" - " ifbno %llu lastfbno %d\n", - (unsigned long long)dp->i_ino, + xfs_alert(mp, + "%s: dir ino %llu needed freesp block %lld for\n" + " data block %lld, got %lld ifbno %llu lastfbno %d", + __func__, (unsigned long long)dp->i_ino, (long long)xfs_dir2_db_to_fdb(mp, dbno), (long long)dbno, (long long)fbno, (unsigned long long)ifbno, lastfbno); if (fblk) { - cmn_err(CE_ALERT, - " fblk 0x%p blkno %llu " - "index %d magic 0x%x\n", + xfs_alert(mp, + " fblk 0x%p blkno %llu index %d magic 0x%x", fblk, (unsigned long long)fblk->blkno, fblk->index, fblk->magic); } else { - cmn_err(CE_ALERT, - " ... fblk is NULL\n"); + xfs_alert(mp, " ... fblk is NULL"); } XFS_ERROR_REPORT("xfs_dir2_node_addname_int", XFS_ERRLEVEL_LOW, mp); @@ -1650,27 +1631,27 @@ xfs_dir2_node_addname_int( /* * Get a buffer for the new block. */ - if ((error = xfs_da_get_buf(tp, dp, - xfs_dir2_db_to_da(mp, fbno), - -1, &fbp, XFS_DATA_FORK))) { + error = xfs_da_get_buf(tp, dp, + xfs_dir2_db_to_da(mp, fbno), + -1, &fbp, XFS_DATA_FORK); + if (error) return error; - } - ASSERT(fbp != NULL); + fbp->b_ops = &xfs_dir2_free_buf_ops; /* * Initialize the new block to be empty, and remember * its first slot as our empty slot. */ - free = fbp->data; + free = fbp->b_addr; free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC); free->hdr.firstdb = cpu_to_be32( (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * - XFS_DIR2_MAX_FREE_BESTS(mp)); + xfs_dir2_free_max_bests(mp)); free->hdr.nvalid = 0; free->hdr.nused = 0; } else { - free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + free = fbp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); } /* @@ -1682,7 +1663,7 @@ xfs_dir2_node_addname_int( * freespace block, extend that table. */ if (findex >= be32_to_cpu(free->hdr.nvalid)) { - ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp)); + ASSERT(findex < xfs_dir2_free_max_bests(mp)); free->hdr.nvalid = cpu_to_be32(findex + 1); /* * Tag new entry so nused will go up. @@ -1693,7 +1674,7 @@ xfs_dir2_node_addname_int( * If this entry was for an empty data block * (this should always be true) then update the header. */ - if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) { + if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) { be32_add_cpu(&free->hdr.nused, 1); xfs_dir2_free_log_header(tp, fbp); } @@ -1702,8 +1683,8 @@ xfs_dir2_node_addname_int( * We haven't allocated the data entry yet so this will * change again. */ - data = dbp->data; - free->bests[findex] = data->hdr.bestfree[0].length; + hdr = dbp->b_addr; + free->bests[findex] = hdr->bestfree[0].length; logfree = 1; } /* @@ -1713,36 +1694,31 @@ xfs_dir2_node_addname_int( /* * If just checking, we succeeded. */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) { - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); + if (args->op_flags & XFS_DA_OP_JUSTCHECK) return 0; - } + /* * Read the data block in. */ - if (unlikely( - error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno), - -1, &dbp, XFS_DATA_FORK))) { - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); + error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno), + -1, &dbp); + if (error) return error; - } - data = dbp->data; + hdr = dbp->b_addr; logfree = 0; } - ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length); + ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length); /* * Point to the existing unused space. */ dup = (xfs_dir2_data_unused_t *) - ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); + ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); needscan = needlog = 0; /* * Mark the first part of the unused space, inuse for us. */ xfs_dir2_data_use_free(tp, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* * Fill in the new entry and log it. @@ -1752,13 +1728,13 @@ xfs_dir2_node_addname_int( dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)data); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(tp, dbp, dep); /* * Rescan the block for bestfree if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Log the data block header if needed. */ @@ -1767,8 +1743,8 @@ xfs_dir2_node_addname_int( /* * If the freespace entry is now wrong, update it. */ - if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) { - free->bests[findex] = data->hdr.bestfree[0].length; + if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) { + free->bests[findex] = hdr->bestfree[0].length; logfree = 1; } /* @@ -1776,17 +1752,11 @@ xfs_dir2_node_addname_int( */ if (logfree) xfs_dir2_free_log_bests(tp, fbp, findex, findex); - /* - * If the caller didn't hand us the freespace block, drop it. - */ - if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) - xfs_da_buf_done(fbp); /* * Return the data block and offset in args, then drop the data block. */ args->blkno = (xfs_dablk_t)dbno; args->index = be16_to_cpu(*tagp); - xfs_da_buf_done(dbp); return 0; } @@ -1824,22 +1794,23 @@ xfs_dir2_node_lookup( /* If a CI match, dup the actual name and return EEXIST */ xfs_dir2_data_entry_t *dep; - dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp-> - data + state->extrablk.index); + dep = (xfs_dir2_data_entry_t *) + ((char *)state->extrablk.bp->b_addr + + state->extrablk.index); rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen); } /* * Release the btree blocks and leaf block. */ for (i = 0; i < state->path.active; i++) { - xfs_da_brelse(args->trans, state->path.blk[i].bp); + xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } /* * Release the data block if we have it. */ if (state->extravalid && state->extrablk.bp) { - xfs_da_brelse(args->trans, state->extrablk.bp); + xfs_trans_brelse(args->trans, state->extrablk.bp); state->extrablk.bp = NULL; } xfs_da_state_free(state); @@ -1918,7 +1889,7 @@ xfs_dir2_node_replace( xfs_da_args_t *args) /* operation arguments */ { xfs_da_state_blk_t *blk; /* leaf block */ - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry changed */ int error; /* error return value */ int i; /* btree level */ @@ -1956,16 +1927,16 @@ xfs_dir2_node_replace( */ blk = &state->path.blk[state->path.active - 1]; ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); - leaf = blk->bp->data; + leaf = blk->bp->b_addr; lep = &leaf->ents[blk->index]; ASSERT(state->extravalid); /* * Point to the data entry. */ - data = state->extrablk.bp->data; - ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); + hdr = state->extrablk.bp->b_addr; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); dep = (xfs_dir2_data_entry_t *) - ((char *)data + + ((char *)hdr + xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); ASSERT(inum != be64_to_cpu(dep->inumber)); /* @@ -1979,14 +1950,14 @@ xfs_dir2_node_replace( * Didn't find it, and we're holding a data block. Drop it. */ else if (state->extravalid) { - xfs_da_brelse(args->trans, state->extrablk.bp); + xfs_trans_brelse(args->trans, state->extrablk.bp); state->extrablk.bp = NULL; } /* * Release all the buffers in the cursor. */ for (i = 0; i < state->path.active; i++) { - xfs_da_brelse(args->trans, state->path.blk[i].bp); + xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } xfs_da_state_free(state); @@ -2003,7 +1974,7 @@ xfs_dir2_node_trim_free( xfs_fileoff_t fo, /* free block number */ int *rvalp) /* out: did something */ { - xfs_dabuf_t *bp; /* freespace buffer */ + struct xfs_buf *bp; /* freespace buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ xfs_dir2_free_t *free; /* freespace structure */ @@ -2016,25 +1987,22 @@ xfs_dir2_node_trim_free( /* * Read the freespace block. */ - if (unlikely(error = xfs_da_read_buf(tp, dp, (xfs_dablk_t)fo, -2, &bp, - XFS_DATA_FORK))) { + error = xfs_dir2_free_try_read(tp, dp, fo, &bp); + if (error) return error; - } - /* * There can be holes in freespace. If fo is a hole, there's * nothing to do. */ - if (bp == NULL) { + if (!bp) return 0; - } - free = bp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + free = bp->b_addr; + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); /* * If there are used entries, there's nothing to do. */ if (be32_to_cpu(free->hdr.nused) > 0) { - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); *rvalp = 0; return 0; } @@ -2050,7 +2018,7 @@ xfs_dir2_node_trim_free( * pieces. This is the last block of an extent. */ ASSERT(error != ENOSPC); - xfs_da_brelse(tp, bp); + xfs_trans_brelse(tp, bp); return error; } /* diff --git a/libxfs/xfs_dir2_priv.h b/libxfs/xfs_dir2_priv.h new file mode 100644 index 000000000..7da79f651 --- /dev/null +++ b/libxfs/xfs_dir2_priv.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_PRIV_H__ +#define __XFS_DIR2_PRIV_H__ + +/* xfs_dir2.c */ +extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); +extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); +extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); +extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, + xfs_dir2_db_t *dbp); +extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, + struct xfs_buf *bp); +extern int xfs_dir_cilookup_result(struct xfs_da_args *args, + const unsigned char *name, int len); + +/* xfs_dir2_block.c */ +extern const struct xfs_buf_ops xfs_dir2_block_buf_ops; + +extern int xfs_dir2_block_addname(struct xfs_da_args *args); +extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, + xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_block_lookup(struct xfs_da_args *args); +extern int xfs_dir2_block_removename(struct xfs_da_args *args); +extern int xfs_dir2_block_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, + struct xfs_buf *lbp, struct xfs_buf *dbp); + +/* xfs_dir2_data.c */ +#ifdef DEBUG +#define xfs_dir2_data_check(dp,bp) __xfs_dir2_data_check(dp, bp); +#else +#define xfs_dir2_data_check(dp,bp) +#endif + +extern const struct xfs_buf_ops xfs_dir2_data_buf_ops; + +extern int __xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp); +extern int xfs_dir2_data_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); +extern int xfs_dir2_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t bno, xfs_daddr_t mapped_bno); + +extern struct xfs_dir2_data_free * +xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_data_unused *dup, int *loghead); +extern void xfs_dir2_data_freescan(struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr, int *loghead); +extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, + struct xfs_buf **bpp); +extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_dir2_data_entry *dep); +extern void xfs_dir2_data_log_header(struct xfs_trans *tp, + struct xfs_buf *bp); +extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_dir2_data_unused *dup); +extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp, + xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, + int *needlogp, int *needscanp); +extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); + +/* xfs_dir2_leaf.c */ +extern const struct xfs_buf_ops xfs_dir2_leafn_buf_ops; + +extern int xfs_dir2_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); +extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, + struct xfs_buf *dbp); +extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); +extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, + struct xfs_buf *bp); +extern void xfs_dir2_leaf_compact_x1(struct xfs_buf *bp, int *indexp, + int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); +extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, + size_t bufsize, xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, + struct xfs_buf **bpp, int magic); +extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, + int first, int last); +extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, + struct xfs_buf *bp); +extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); +extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, + struct xfs_buf *lbp); +extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, + struct xfs_buf *lbp, xfs_dir2_db_t db); +extern struct xfs_dir2_leaf_entry * +xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact, + int lowstale, int highstale, + int *lfloglow, int *lfloghigh); +extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); + +/* xfs_dir2_node.c */ +extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, + struct xfs_buf *lbp); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count); +extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp, + struct xfs_da_args *args, int *indexp, + struct xfs_da_state *state); +extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp, + struct xfs_buf *leaf2_bp); +extern int xfs_dir2_leafn_split(struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); +extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); +extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk); +extern int xfs_dir2_node_addname(struct xfs_da_args *args); +extern int xfs_dir2_node_lookup(struct xfs_da_args *args); +extern int xfs_dir2_node_removename(struct xfs_da_args *args); +extern int xfs_dir2_node_replace(struct xfs_da_args *args); +extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, + int *rvalp); +extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t fbno, struct xfs_buf **bpp); + +/* xfs_dir2_sf.c */ +extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); +extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp, + struct xfs_dir2_sf_entry *sfep); +extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, + struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); +extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp, + int size, xfs_dir2_sf_hdr_t *sfhp); +extern int xfs_dir2_sf_addname(struct xfs_da_args *args); +extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); +extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, + xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_sf_removename(struct xfs_da_args *args); +extern int xfs_dir2_sf_replace(struct xfs_da_args *args); + +#endif /* __XFS_DIR2_PRIV_H__ */ diff --git a/libxfs/xfs_dir2_sf.c b/libxfs/xfs_dir2_sf.c index 6b5e6d455..a96be7695 100644 --- a/libxfs/xfs_dir2_sf.c +++ b/libxfs/xfs_dir2_sf.c @@ -40,6 +40,82 @@ static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args); #endif /* XFS_BIG_INUMS */ +/* + * Inode numbers in short-form directories can come in two versions, + * either 4 bytes or 8 bytes wide. These helpers deal with the + * two forms transparently by looking at the headers i8count field. + * + * For 64-bit inode number the most significant byte must be zero. + */ +static xfs_ino_t +xfs_dir2_sf_get_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *from) +{ + if (hdr->i8count) + return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; + else + return get_unaligned_be32(&from->i4.i); +} + +static void +xfs_dir2_sf_put_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *to, + xfs_ino_t ino) +{ + ASSERT((ino & 0xff00000000000000ULL) == 0); + + if (hdr->i8count) + put_unaligned_be64(ino, &to->i8.i); + else + put_unaligned_be32(ino, &to->i4.i); +} + +xfs_ino_t +xfs_dir2_sf_get_parent_ino( + struct xfs_dir2_sf_hdr *hdr) +{ + return xfs_dir2_sf_get_ino(hdr, &hdr->parent); +} + +void +xfs_dir2_sf_put_parent_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); +} + +/* + * In short-form directory entries the inode numbers are stored at variable + * offset behind the entry name. The inode numbers may only be accessed + * through the helpers below. + */ +static xfs_dir2_inou_t * +xfs_dir2_sfe_inop( + struct xfs_dir2_sf_entry *sfep) +{ + return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]; +} + +xfs_ino_t +xfs_dir2_sfe_get_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep)); +} + +void +xfs_dir2_sfe_put_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino); +} + /* * Given a block directory (dp/block), calculate its size as a shortform (sf) * directory and a header for the sf directory, if it will fit it the @@ -49,7 +125,7 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args); int /* size for sf form */ xfs_dir2_block_sfsize( xfs_inode_t *dp, /* incore inode pointer */ - xfs_dir2_block_t *block, /* block directory data */ + xfs_dir2_data_hdr_t *hdr, /* block directory data */ xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ { xfs_dir2_dataptr_t addr; /* data entry address */ @@ -69,7 +145,7 @@ xfs_dir2_block_sfsize( mp = dp->i_mount; count = i8count = namelen = 0; - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* @@ -82,7 +158,7 @@ xfs_dir2_block_sfsize( * Calculate the pointer to the entry at hand. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); /* * Detect . and .., so we can special-case them. * . is not included in sf directories. @@ -119,7 +195,7 @@ xfs_dir2_block_sfsize( */ sfhp->count = count; sfhp->i8count = i8count; - xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); + xfs_dir2_sf_put_parent_ino(sfhp, parent); return size; } @@ -130,11 +206,11 @@ xfs_dir2_block_sfsize( int /* error */ xfs_dir2_block_to_sf( xfs_da_args_t *args, /* operation arguments */ - xfs_dabuf_t *bp, /* block buffer */ + struct xfs_buf *bp, int size, /* shortform directory size */ xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ @@ -145,8 +221,7 @@ xfs_dir2_block_to_sf( xfs_mount_t *mp; /* filesystem mount point */ char *ptr; /* current data pointer */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ - xfs_ino_t temp; + xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ trace_xfs_dir2_block_to_sf(args); @@ -157,13 +232,14 @@ xfs_dir2_block_to_sf( * Make a copy of the block data, so we can shrink the inode * and add local data. */ - block = kmem_alloc(mp->m_dirblksize, KM_SLEEP); - memcpy(block, bp->data, mp->m_dirblksize); + hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); + memcpy(hdr, bp->b_addr, mp->m_dirblksize); logflags = XFS_ILOG_CORE; if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { ASSERT(error != ENOSPC); goto out; } + /* * The buffer is now unconditionally gone, whether * xfs_dir2_shrink_inode worked or not. @@ -179,14 +255,14 @@ xfs_dir2_block_to_sf( /* * Copy the header into the newly allocate local space. */ - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); dp->i_d.di_size = size; /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(mp, block); - ptr = (char *)block->u; + btp = xfs_dir2_block_tail_p(mp, hdr); + ptr = (char *)(hdr + 1); endptr = (char *)xfs_dir2_block_leaf_p(btp); sfep = xfs_dir2_sf_firstentry(sfp); /* @@ -214,7 +290,7 @@ xfs_dir2_block_to_sf( else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') ASSERT(be64_to_cpu(dep->inumber) == - xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); + xfs_dir2_sf_get_parent_ino(sfp)); /* * Normal entry, copy it into shortform. */ @@ -222,11 +298,11 @@ xfs_dir2_block_to_sf( sfep->namelen = dep->namelen; xfs_dir2_sf_put_offset(sfep, (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)block)); + ((char *)dep - (char *)hdr)); memcpy(sfep->name, dep->name, dep->namelen); - temp = be64_to_cpu(dep->inumber); - xfs_dir2_sf_put_inumber(sfp, &temp, - xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, + be64_to_cpu(dep->inumber)); + sfep = xfs_dir2_sf_nextentry(sfp, sfep); } ptr += xfs_dir2_data_entsize(dep->namelen); @@ -235,7 +311,7 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(block); + kmem_free(hdr); return error; } @@ -258,7 +334,7 @@ xfs_dir2_sf_addname( xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ int old_isize; /* di_size before adding name */ int pick; /* which algorithm to use */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ trace_xfs_dir2_sf_addname(args); @@ -275,19 +351,19 @@ xfs_dir2_sf_addname( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Compute entry (and change in) size. */ - add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); + add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen); incr_isize = add_entsize; objchange = 0; #if XFS_BIG_INUMS /* * Do we have to change to 8 byte inodes? */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { /* * Yes, adjust the entry size and the total size. */ @@ -295,7 +371,7 @@ xfs_dir2_sf_addname( (uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t); incr_isize += - (sfp->hdr.count + 2) * + (sfp->count + 2) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); objchange = 1; @@ -365,21 +441,21 @@ xfs_dir2_sf_addname_easy( { int byteoff; /* byte offset in sf dir */ xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ dp = args->dp; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; byteoff = (int)((char *)sfep - (char *)sfp); /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen), + xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. */ - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); /* * Fill in the new entry. @@ -387,15 +463,14 @@ xfs_dir2_sf_addname_easy( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir2_sf_put_inumber(sfp, &args->inumber, - xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); /* * Update the header and inode. */ - sfp->hdr.count++; + sfp->count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) - sfp->hdr.i8count++; + sfp->i8count++; #endif dp->i_d.di_size = new_isize; xfs_dir2_sf_check(args); @@ -425,19 +500,19 @@ xfs_dir2_sf_addname_hard( xfs_dir2_data_aoff_t offset; /* current offset value */ int old_isize; /* previous di_size */ xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ - xfs_dir2_sf_t *oldsfp; /* original shortform dir */ + xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ - xfs_dir2_sf_t *sfp; /* new shortform dir */ + xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ /* * Copy the old directory to the stack buffer. */ dp = args->dp; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; old_isize = (int)dp->i_d.di_size; buf = kmem_alloc(old_isize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_t *)buf; + oldsfp = (xfs_dir2_sf_hdr_t *)buf; memcpy(oldsfp, sfp, old_isize); /* * Loop over the old directory finding the place we're going @@ -466,7 +541,7 @@ xfs_dir2_sf_addname_hard( /* * Reset the pointer since the buffer was reallocated. */ - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; /* * Copy the first part of the directory, including the header. */ @@ -479,12 +554,11 @@ xfs_dir2_sf_addname_hard( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir2_sf_put_inumber(sfp, &args->inumber, - xfs_dir2_sf_inumberp(sfep)); - sfp->hdr.count++; + xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); + sfp->count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) - sfp->hdr.i8count++; + sfp->i8count++; #endif /* * If there's more left to copy, do that. @@ -518,14 +592,14 @@ xfs_dir2_sf_addname_pick( xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_data_aoff_t offset; /* data block offset */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ int size; /* entry's data size */ int used; /* data bytes used */ dp = args->dp; mp = dp->i_mount; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; size = xfs_dir2_data_entsize(args->namelen); offset = XFS_DIR2_DATA_FIRST_OFFSET; sfep = xfs_dir2_sf_firstentry(sfp); @@ -535,7 +609,7 @@ xfs_dir2_sf_addname_pick( * Keep track of data offset and whether we've seen a place * to insert the new entry. */ - for (i = 0; i < sfp->hdr.count; i++) { + for (i = 0; i < sfp->count; i++) { if (!holefit) holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); offset = xfs_dir2_sf_get_offset(sfep) + @@ -547,7 +621,7 @@ xfs_dir2_sf_addname_pick( * was a data block (block form directory). */ used = offset + - (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t); /* * If it won't fit in a block form then we can't insert it, @@ -593,30 +667,30 @@ xfs_dir2_sf_check( xfs_ino_t ino; /* entry inode number */ int offset; /* data offset */ xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ dp = args->dp; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; offset = XFS_DIR2_DATA_FIRST_OFFSET; - ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->hdr.count; + i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); - ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); + ino = xfs_dir2_sfe_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; offset = xfs_dir2_sf_get_offset(sfep) + xfs_dir2_data_entsize(sfep->namelen); } - ASSERT(i8count == sfp->hdr.i8count); + ASSERT(i8count == sfp->i8count); ASSERT(XFS_BIG_INUMS || i8count == 0); ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); ASSERT(offset + - (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t) <= dp->i_mount->m_dirblksize); } @@ -632,7 +706,7 @@ xfs_dir2_sf_create( { xfs_inode_t *dp; /* incore directory inode */ int i8count; /* parent inode is an 8-byte number */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ int size; /* directory size */ trace_xfs_dir2_sf_create(args); @@ -662,13 +736,13 @@ xfs_dir2_sf_create( /* * Fill in the header, */ - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - sfp->hdr.i8count = i8count; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp->i8count = i8count; /* * Now can put in the inode number, since i8count is set. */ - xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent); - sfp->hdr.count = 0; + xfs_dir2_sf_put_parent_ino(sfp, pino); + sfp->count = 0; dp->i_d.di_size = size; xfs_dir2_sf_check(args); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); @@ -687,7 +761,7 @@ xfs_dir2_sf_lookup( int i; /* entry index */ int error; xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ enum xfs_dacmp cmp; /* comparison result */ xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ @@ -706,8 +780,8 @@ xfs_dir2_sf_lookup( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Special case for . */ @@ -721,7 +795,7 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + args->inumber = xfs_dir2_sf_get_parent_ino(sfp); args->cmpresult = XFS_CMP_EXACT; return XFS_ERROR(EEXIST); } @@ -729,7 +803,7 @@ xfs_dir2_sf_lookup( * Loop over all the entries trying to match ours. */ ci_sfep = NULL; - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { /* * Compare name and if it's an exact match, return the inode @@ -740,8 +814,7 @@ xfs_dir2_sf_lookup( sfep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; - args->inumber = xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)); + args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep); if (cmp == XFS_CMP_EXACT) return XFS_ERROR(EEXIST); ci_sfep = sfep; @@ -773,7 +846,7 @@ xfs_dir2_sf_removename( int newsize; /* new inode size */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_removename(args); @@ -790,32 +863,31 @@ xfs_dir2_sf_removename( } ASSERT(dp->i_df.if_bytes == oldsize); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Loop over the old directory entries. * Find the one we're deleting. */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { - ASSERT(xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)) == - args->inumber); + ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) == + args->inumber); break; } } /* * Didn't find it. */ - if (i == sfp->hdr.count) + if (i == sfp->count) return XFS_ERROR(ENOENT); /* * Calculate sizes. */ byteoff = (int)((char *)sfep - (char *)sfp); - entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); + entsize = xfs_dir2_sf_entsize(sfp, args->namelen); newsize = oldsize - entsize; /* * Copy the part if any after the removed entry, sliding it down. @@ -826,22 +898,22 @@ xfs_dir2_sf_removename( /* * Fix up the header and file size. */ - sfp->hdr.count--; + sfp->count--; dp->i_d.di_size = newsize; /* * Reallocate, making it smaller. */ xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; #if XFS_BIG_INUMS /* * Are we changing inode number size? */ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { - if (sfp->hdr.i8count == 1) + if (sfp->i8count == 1) xfs_dir2_sf_toino4(args); else - sfp->hdr.i8count--; + sfp->i8count--; } #endif xfs_dir2_sf_check(args); @@ -865,7 +937,7 @@ xfs_dir2_sf_replace( int i8elevated; /* sf_toino8 set i8count=1 */ #endif xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_replace(args); @@ -881,19 +953,19 @@ xfs_dir2_sf_replace( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); #if XFS_BIG_INUMS /* * New inode number is large, and need to convert to 8-byte inodes. */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { int error; /* error return value */ int newsize; /* new inode size */ newsize = dp->i_df.if_bytes + - (sfp->hdr.count + 1) * + (sfp->count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); /* @@ -911,7 +983,7 @@ xfs_dir2_sf_replace( */ xfs_dir2_sf_toino8(args); i8elevated = 1; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; } else i8elevated = 0; #endif @@ -922,34 +994,32 @@ xfs_dir2_sf_replace( if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_parent_ino(sfp); ASSERT(args->inumber != ino); #endif - xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent); + xfs_dir2_sf_put_parent_ino(sfp, args->inumber); } /* * Normal entry, look for the name. */ else { for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->hdr.count; + i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)); + ino = xfs_dir2_sfe_get_ino(sfp, sfep); ASSERT(args->inumber != ino); #endif - xfs_dir2_sf_put_inumber(sfp, &args->inumber, - xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); break; } } /* * Didn't find it. */ - if (i == sfp->hdr.count) { + if (i == sfp->count) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); #if XFS_BIG_INUMS if (i8elevated) @@ -967,10 +1037,10 @@ xfs_dir2_sf_replace( /* * And the old count was one, so need to convert to small. */ - if (sfp->hdr.i8count == 1) + if (sfp->i8count == 1) xfs_dir2_sf_toino4(args); else - sfp->hdr.i8count--; + sfp->i8count--; } /* * See if the old number was small, the new number is large. @@ -981,9 +1051,9 @@ xfs_dir2_sf_replace( * add to the i8count unless we just converted to 8-byte * inodes (which does an implied i8count = 1) */ - ASSERT(sfp->hdr.i8count != 0); + ASSERT(sfp->i8count != 0); if (!i8elevated) - sfp->hdr.i8count++; + sfp->i8count++; } #endif xfs_dir2_sf_check(args); @@ -1003,13 +1073,12 @@ xfs_dir2_sf_toino4( char *buf; /* old dir's buffer */ xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ - xfs_ino_t ino; /* entry inode number */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_t *oldsfp; /* old sf directory */ + xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_t *sfp; /* new sf directory */ + xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ trace_xfs_dir2_sf_toino4(args); @@ -1022,44 +1091,42 @@ xfs_dir2_sf_toino4( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->hdr.i8count == 1); + oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->i8count == 1); memcpy(buf, oldsfp, oldsize); /* * Compute the new inode size. */ newsize = oldsize - - (oldsfp->hdr.count + 1) * + (oldsfp->count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); /* * Reset our pointers, the data has moved. */ - oldsfp = (xfs_dir2_sf_t *)buf; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_hdr_t *)buf; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; /* * Fill in the new header. */ - sfp->hdr.count = oldsfp->hdr.count; - sfp->hdr.i8count = 0; - ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); - xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); + sfp->count = oldsfp->count; + sfp->i8count = 0; + xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->hdr.count; + i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - ino = xfs_dir2_sf_get_inumber(oldsfp, - xfs_dir2_sf_inumberp(oldsfep)); - xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, + xfs_dir2_sfe_get_ino(oldsfp, oldsfep)); } /* * Clean up the inode. @@ -1081,13 +1148,12 @@ xfs_dir2_sf_toino8( char *buf; /* old dir's buffer */ xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ - xfs_ino_t ino; /* entry inode number */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_t *oldsfp; /* old sf directory */ + xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_t *sfp; /* new sf directory */ + xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ trace_xfs_dir2_sf_toino8(args); @@ -1100,44 +1166,42 @@ xfs_dir2_sf_toino8( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->hdr.i8count == 0); + oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->i8count == 0); memcpy(buf, oldsfp, oldsize); /* * Compute the new inode size. */ newsize = oldsize + - (oldsfp->hdr.count + 1) * + (oldsfp->count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); /* * Reset our pointers, the data has moved. */ - oldsfp = (xfs_dir2_sf_t *)buf; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_hdr_t *)buf; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; /* * Fill in the new header. */ - sfp->hdr.count = oldsfp->hdr.count; - sfp->hdr.i8count = 1; - ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); - xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); + sfp->count = oldsfp->count; + sfp->i8count = 1; + xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->hdr.count; + i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - ino = xfs_dir2_sf_get_inumber(oldsfp, - xfs_dir2_sf_inumberp(oldsfep)); - xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, + xfs_dir2_sfe_get_ino(oldsfp, oldsfep)); } /* * Clean up the inode. diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c index 1fcafb694..529d92d47 100644 --- a/libxfs/xfs_ialloc.c +++ b/libxfs/xfs_ialloc.c @@ -131,7 +131,7 @@ xfs_check_agi_freecount( /* * Initialise a new set of inodes. */ -STATIC void +STATIC int xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, @@ -182,10 +182,9 @@ xfs_ialloc_inode_init( d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, - XBF_LOCK); - ASSERT(fbuf); - ASSERT(!XFS_BUF_GETERROR(fbuf)); - + XBF_UNMAPPED); + if (!fbuf) + return ENOMEM; /* * Initialize all inodes in this buffer and then log them. * @@ -193,6 +192,7 @@ xfs_ialloc_inode_init( * to log a whole cluster of inodes instead of all the * individual transactions causing a lot of log traffic. */ + fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { int ioffset = i << mp->m_sb.sb_inodelog; @@ -207,6 +207,7 @@ xfs_ialloc_inode_init( } xfs_trans_inode_alloc_buf(tp, fbuf); } + return 0; } /* @@ -232,6 +233,7 @@ xfs_ialloc_ag_alloc( /* boundary */ struct xfs_perag *pag; + memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = tp->t_mountp; @@ -258,8 +260,6 @@ xfs_ialloc_ag_alloc( (args.agbno < be32_to_cpu(agi->agi_length)))) { args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); args.type = XFS_ALLOCTYPE_THIS_BNO; - args.mod = args.total = args.wasdel = args.isfl = - args.userdata = args.minalignslop = 0; args.prod = 1; /* @@ -312,8 +312,6 @@ xfs_ialloc_ag_alloc( * Allocate a fixed-size extent of inodes. */ args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.mod = args.total = args.wasdel = args.isfl = - args.userdata = args.minalignslop = 0; args.prod = 1; /* * Allow space for the inode btree to split. @@ -351,9 +349,11 @@ xfs_ialloc_ag_alloc( * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len, - random32()); + error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, + args.len, random32()); + if (error) + return error; /* * Convert the results. */ @@ -411,7 +411,7 @@ xfs_ialloc_next_ag( spin_lock(&mp->m_agirotor_lock); agno = mp->m_agirotor; - if (++mp->m_agirotor == mp->m_maxagi) + if (++mp->m_agirotor >= mp->m_maxagi) mp->m_agirotor = 0; spin_unlock(&mp->m_agirotor_lock); @@ -422,14 +422,13 @@ xfs_ialloc_next_ag( * Select an allocation group to look for a free inode in, based on the parent * inode and then mode. Return the allocation group buffer. */ -STATIC xfs_buf_t * /* allocation group buffer */ +STATIC xfs_agnumber_t xfs_ialloc_ag_select( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent directory inode number */ - mode_t mode, /* bits set to indicate file type */ + umode_t mode, /* bits set to indicate file type */ int okalloc) /* ok to allocate more space */ { - xfs_buf_t *agbp; /* allocation group header buffer */ xfs_agnumber_t agcount; /* number of ag's in the filesystem */ xfs_agnumber_t agno; /* current ag number */ int flags; /* alloc buffer locking flags */ @@ -439,6 +438,7 @@ xfs_ialloc_ag_select( int needspace; /* file mode implies space allocated */ xfs_perag_t *pag; /* per allocation group data */ xfs_agnumber_t pagno; /* parent (starting) ag number */ + int error; /* * Files of these types need at least one block if length > 0 @@ -454,7 +454,9 @@ xfs_ialloc_ag_select( if (pagno >= agcount) pagno = 0; } + ASSERT(pagno < agcount); + /* * Loop through allocation groups, looking for one with a little * free space in it. Note we don't look for free inodes, exactly. @@ -466,51 +468,45 @@ xfs_ialloc_ag_select( flags = XFS_ALLOC_FLAG_TRYLOCK; for (;;) { pag = xfs_perag_get(mp, agno); + if (!pag->pagi_inodeok) { + xfs_ialloc_next_ag(mp); + goto nextag; + } + if (!pag->pagi_init) { - if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { - agbp = NULL; + error = xfs_ialloc_pagi_init(mp, tp, agno); + if (error) goto nextag; - } - } else - agbp = NULL; + } - if (!pag->pagi_inodeok) { - xfs_ialloc_next_ag(mp); - goto unlock_nextag; + if (pag->pagi_freecount) { + xfs_perag_put(pag); + return agno; } - /* - * Is there enough free space for the file plus a block - * of inodes (if we need to allocate some)? - */ - ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); - if (ineed && !pag->pagf_init) { - if (agbp == NULL && - xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { - agbp = NULL; + if (!okalloc) + goto nextag; + + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, tp, agno, flags); + if (error) goto nextag; - } - (void)xfs_alloc_pagf_init(mp, tp, agno, flags); } - if (!ineed || pag->pagf_init) { - if (ineed && !(longest = pag->pagf_longest)) - longest = pag->pagf_flcount > 0; - if (!ineed || - (pag->pagf_freeblks >= needspace + ineed && - longest >= ineed && - okalloc)) { - if (agbp == NULL && - xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { - agbp = NULL; - goto nextag; - } - xfs_perag_put(pag); - return agbp; - } + + /* + * Is there enough free space for the file plus a block of + * inodes? (if we need to allocate some)? + */ + ineed = XFS_IALLOC_BLOCKS(mp); + longest = pag->pagf_longest; + if (!longest) + longest = pag->pagf_flcount > 0; + + if (pag->pagf_freeblks >= needspace + ineed && + longest >= ineed) { + xfs_perag_put(pag); + return agno; } -unlock_nextag: - if (agbp) - xfs_trans_brelse(tp, agbp); nextag: xfs_perag_put(pag); /* @@ -518,13 +514,13 @@ nextag: * down. */ if (XFS_FORCED_SHUTDOWN(mp)) - return NULL; + return NULLAGNUMBER; agno++; if (agno >= agcount) agno = 0; if (agno == pagno) { if (flags == 0) - return NULL; + return NULLAGNUMBER; flags = 0; } } @@ -587,188 +583,39 @@ xfs_ialloc_get_rec( } /* - * Visible inode allocation functions. - */ - -/* - * Allocate an inode on disk. - * Mode is used to tell whether the new inode will need space, and whether - * it is a directory. + * Allocate an inode. * - * The arguments IO_agbp and alloc_done are defined to work within - * the constraint of one allocation per transaction. - * xfs_dialloc() is designed to be called twice if it has to do an - * allocation to make more free inodes. On the first call, - * IO_agbp should be set to NULL. If an inode is available, - * i.e., xfs_dialloc() did not need to do an allocation, an inode - * number is returned. In this case, IO_agbp would be set to the - * current ag_buf and alloc_done set to false. - * If an allocation needed to be done, xfs_dialloc would return - * the current ag_buf in IO_agbp and set alloc_done to true. - * The caller should then commit the current transaction, allocate a new - * transaction, and call xfs_dialloc() again, passing in the previous - * value of IO_agbp. IO_agbp should be held across the transactions. - * Since the agbp is locked across the two calls, the second call is - * guaranteed to have a free inode available. - * - * Once we successfully pick an inode its number is returned and the - * on-disk data structures are updated. The inode itself is not read - * in, since doing so would break ordering constraints with xfs_reclaim. + * The caller selected an AG for us, and made sure that free inodes are + * available. */ -int -xfs_dialloc( - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t parent, /* parent inode (directory) */ - mode_t mode, /* mode bits for new inode */ - int okalloc, /* ok to allocate more space */ - xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ - boolean_t *alloc_done, /* true if we needed to replenish - inode freelist */ - xfs_ino_t *inop) /* inode number allocated */ +STATIC int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop) { - xfs_agnumber_t agcount; /* number of allocation groups */ - xfs_buf_t *agbp; /* allocation group header's buffer */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_agi_t *agi; /* allocation group header structure */ - xfs_btree_cur_t *cur; /* inode allocation btree cursor */ - int error; /* error return value */ - int i; /* result code */ - int ialloced; /* inode allocation status */ - int noroom = 0; /* no space for inode blk allocation */ - xfs_ino_t ino; /* fs-relative inode to be returned */ - /* REFERENCED */ - int j; /* result code */ - xfs_mount_t *mp; /* file system mount structure */ - int offset; /* index of inode in chunk */ - xfs_agino_t pagino; /* parent's AG relative inode # */ - xfs_agnumber_t pagno; /* parent's AG number */ - xfs_inobt_rec_incore_t rec; /* inode allocation record */ - xfs_agnumber_t tagno; /* testing allocation group number */ - xfs_btree_cur_t *tcur; /* temp cursor */ - xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ - struct xfs_perag *pag; - - - if (*IO_agbp == NULL) { - /* - * We do not have an agbp, so select an initial allocation - * group for inode allocation. - */ - agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); - /* - * Couldn't find an allocation group satisfying the - * criteria, give up. - */ - if (!agbp) { - *inop = NULLFSINO; - return 0; - } - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); - } else { - /* - * Continue where we left off before. In this case, we - * know that the allocation group has free inodes. - */ - agbp = *IO_agbp; - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); - ASSERT(be32_to_cpu(agi->agi_freecount) > 0); - } - mp = tp->t_mountp; - agcount = mp->m_sb.sb_agcount; - agno = be32_to_cpu(agi->agi_seqno); - tagno = agno; - pagno = XFS_INO_TO_AGNO(mp, parent); - pagino = XFS_INO_TO_AGINO(mp, parent); - - /* - * If we have already hit the ceiling of inode blocks then clear - * okalloc so we scan all available agi structures for a free - * inode. - */ - - if (mp->m_maxicount && - mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { - noroom = 1; - okalloc = 0; - } + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); + xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); + struct xfs_perag *pag; + struct xfs_btree_cur *cur, *tcur; + struct xfs_inobt_rec_incore rec, trec; + xfs_ino_t ino; + int error; + int offset; + int i, j; - /* - * Loop until we find an allocation group that either has free inodes - * or in which we can allocate some inodes. Iterate through the - * allocation groups upward, wrapping at the end. - */ - *alloc_done = B_FALSE; - while (!agi->agi_freecount) { - /* - * Don't do anything if we're not supposed to allocate - * any blocks, just go on to the next ag. - */ - if (okalloc) { - /* - * Try to allocate some new inodes in the allocation - * group. - */ - if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { - xfs_trans_brelse(tp, agbp); - if (error == ENOSPC) { - *inop = NULLFSINO; - return 0; - } else - return error; - } - if (ialloced) { - /* - * We successfully allocated some inodes, return - * the current context to the caller so that it - * can commit the current transaction and call - * us again where we left off. - */ - ASSERT(be32_to_cpu(agi->agi_freecount) > 0); - *alloc_done = B_TRUE; - *IO_agbp = agbp; - *inop = NULLFSINO; - return 0; - } - } - /* - * If it failed, give up on this ag. - */ - xfs_trans_brelse(tp, agbp); - /* - * Go on to the next ag: get its ag header. - */ -nextag: - if (++tagno == agcount) - tagno = 0; - if (tagno == agno) { - *inop = NULLFSINO; - return noroom ? ENOSPC : 0; - } - pag = xfs_perag_get(mp, tagno); - if (pag->pagi_inodeok == 0) { - xfs_perag_put(pag); - goto nextag; - } - error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); - xfs_perag_put(pag); - if (error) - goto nextag; - agi = XFS_BUF_TO_AGI(agbp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); - } - /* - * Here with an allocation group that has a free inode. - * Reset agno since we may have chosen a new ag in the - * loop above. - */ - agno = tagno; - *IO_agbp = NULL; pag = xfs_perag_get(mp, agno); + ASSERT(pag->pagi_init); + ASSERT(pag->pagi_inodeok); + ASSERT(pag->pagi_freecount > 0); + restart_pagno: - cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -925,7 +772,7 @@ nextag: * See if the most recently allocated block has any free. */ newino: - if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { + if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), XFS_LOOKUP_EQ, &i); if (error) @@ -968,7 +815,7 @@ newino: } alloc_inode: - offset = xfs_ialloc_find_free(&rec.ir_free); + offset = xfs_lowbit64(rec.ir_free); ASSERT(offset >= 0); ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % @@ -1000,6 +847,165 @@ error0: return error; } +/* + * Allocate an inode on disk. + * + * Mode is used to tell whether the new inode will need space, and whether it + * is a directory. + * + * This function is designed to be called twice if it has to do an allocation + * to make more free inodes. On the first call, *IO_agbp should be set to NULL. + * If an inode is available without having to performn an allocation, an inode + * number is returned. In this case, *IO_agbp is set to NULL. If an allocation + * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. + * The caller should then commit the current transaction, allocate a + * new transaction, and call xfs_dialloc() again, passing in the previous value + * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI + * buffer is locked across the two calls, the second call is guaranteed to have + * a free inode available. + * + * Once we successfully pick an inode its number is returned and the on-disk + * data structures are updated. The inode itself is not read in, since doing so + * would break ordering constraints with xfs_reclaim. + */ +int +xfs_dialloc( + struct xfs_trans *tp, + xfs_ino_t parent, + umode_t mode, + int okalloc, + struct xfs_buf **IO_agbp, + xfs_ino_t *inop) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_buf *agbp; + xfs_agnumber_t agno; + int error; + int ialloced; + int noroom = 0; + xfs_agnumber_t start_agno; + struct xfs_perag *pag; + + if (*IO_agbp) { + /* + * If the caller passes in a pointer to the AGI buffer, + * continue where we left off before. In this case, we + * know that the allocation group has free inodes. + */ + agbp = *IO_agbp; + goto out_alloc; + } + + /* + * We do not have an agbp, so select an initial allocation + * group for inode allocation. + */ + start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + if (start_agno == NULLAGNUMBER) { + *inop = NULLFSINO; + return 0; + } + + /* + * If we have already hit the ceiling of inode blocks then clear + * okalloc so we scan all available agi structures for a free + * inode. + */ + if (mp->m_maxicount && + mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { + noroom = 1; + okalloc = 0; + } + + /* + * Loop until we find an allocation group that either has free inodes + * or in which we can allocate some inodes. Iterate through the + * allocation groups upward, wrapping at the end. + */ + agno = start_agno; + for (;;) { + pag = xfs_perag_get(mp, agno); + if (!pag->pagi_inodeok) { + xfs_ialloc_next_ag(mp); + goto nextag; + } + + if (!pag->pagi_init) { + error = xfs_ialloc_pagi_init(mp, tp, agno); + if (error) + goto out_error; + } + + /* + * Do a first racy fast path check if this AG is usable. + */ + if (!pag->pagi_freecount && !okalloc) + goto nextag; + + /* + * Then read in the AGI buffer and recheck with the AGI buffer + * lock held. + */ + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) + goto out_error; + + if (pag->pagi_freecount) { + xfs_perag_put(pag); + goto out_alloc; + } + + if (!okalloc) + goto nextag_relse_buffer; + + + error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); + if (error) { + xfs_trans_brelse(tp, agbp); + + if (error != ENOSPC) + goto out_error; + + xfs_perag_put(pag); + *inop = NULLFSINO; + return 0; + } + + if (ialloced) { + /* + * We successfully allocated some inodes, return + * the current context to the caller so that it + * can commit the current transaction and call + * us again where we left off. + */ + ASSERT(pag->pagi_freecount > 0); + xfs_perag_put(pag); + + *IO_agbp = agbp; + *inop = NULLFSINO; + return 0; + } + +nextag_relse_buffer: + xfs_trans_brelse(tp, agbp); +nextag: + xfs_perag_put(pag); + if (++agno == mp->m_sb.sb_agcount) + agno = 0; + if (agno == start_agno) { + *inop = NULLFSINO; + return noroom ? ENOSPC : 0; + } + } + +out_alloc: + *IO_agbp = NULL; + return xfs_dialloc_ag(tp, agbp, parent, inop); +out_error: + xfs_perag_put(pag); + return XFS_ERROR(error); +} + STATIC int xfs_imap_lookup( struct xfs_mount *mp, @@ -1019,10 +1025,9 @@ xfs_imap_lookup( error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_ialloc_read_agi() returned " - "error %d, agno %d", - error, agno); + xfs_alert(mp, + "%s: xfs_ialloc_read_agi() returned error %d, agno %d", + __func__, error, agno); return error; } @@ -1100,24 +1105,21 @@ xfs_imap( if (flags & XFS_IGET_UNTRUSTED) return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_imap: agno (%d) >= " - "mp->m_sb.sb_agcount (%d)", - agno, mp->m_sb.sb_agcount); + xfs_alert(mp, + "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", + __func__, agno, mp->m_sb.sb_agcount); } if (agbno >= mp->m_sb.sb_agblocks) { - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_imap: agbno (0x%llx) >= " - "mp->m_sb.sb_agblocks (0x%lx)", - (unsigned long long) agbno, - (unsigned long) mp->m_sb.sb_agblocks); + xfs_alert(mp, + "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", + __func__, (unsigned long long)agbno, + (unsigned long)mp->m_sb.sb_agblocks); } if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_imap: ino (0x%llx) != " - "XFS_AGINO_TO_INO(mp, agno, agino) " - "(0x%llx)", - ino, XFS_AGINO_TO_INO(mp, agno, agino)); + xfs_alert(mp, + "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", + __func__, ino, + XFS_AGINO_TO_INO(mp, agno, agino)); } xfs_stack_trace(); #endif /* DEBUG */ @@ -1189,10 +1191,9 @@ out_map: */ if ((imap->im_blkno + imap->im_len) > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " - " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", - (unsigned long long) imap->im_blkno, + xfs_alert(mp, + "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", + __func__, (unsigned long long) imap->im_blkno, (unsigned long long) imap->im_len, XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); return XFS_ERROR(EINVAL); @@ -1253,7 +1254,7 @@ xfs_ialloc_log_agi( xfs_agi_t *agi; /* allocation group header */ agi = XFS_BUF_TO_AGI(bp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif /* * Compute byte offsets for the first and last fields. @@ -1279,6 +1280,57 @@ xfs_check_agi_unlinked( #define xfs_check_agi_unlinked(agi) #endif +static void +xfs_agi_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); + int agi_ok; + + /* + * Validate the magic number of the agi block. + */ + agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && + XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); + + /* + * during growfs operations, the perag is not fully initialised, + * so we can't use it for any useful checking. growfs ensures we can't + * use it by using uncached buffers that don't have the perag attached + * so we can detect and avoid this problem. + */ + if (bp->b_pag) + agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) == + bp->b_pag->pag_agno; + + if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, + XFS_RANDOM_IALLOC_READ_AGI))) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } + xfs_check_agi_unlinked(agi); +} + +static void +xfs_agi_read_verify( + struct xfs_buf *bp) +{ + xfs_agi_verify(bp); +} + +static void +xfs_agi_write_verify( + struct xfs_buf *bp) +{ + xfs_agi_verify(bp); +} + +const struct xfs_buf_ops xfs_agi_buf_ops = { + .verify_read = xfs_agi_read_verify, + .verify_write = xfs_agi_write_verify, +}; + /* * Read in the allocation group header (inode allocation section) */ @@ -1289,38 +1341,18 @@ xfs_read_agi( xfs_agnumber_t agno, /* allocation group number */ struct xfs_buf **bpp) /* allocation group hdr buf */ { - struct xfs_agi *agi; /* allocation group header */ - int agi_ok; /* agi is consistent */ int error; ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, bpp); + XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); if (error) return error; - ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); - agi = XFS_BUF_TO_AGI(*bpp); - - /* - * Validate the magic number of the agi block. - */ - agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && - XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && - be32_to_cpu(agi->agi_seqno) == agno; - if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, - XFS_RANDOM_IALLOC_READ_AGI))) { - XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW, - mp, agi); - xfs_trans_brelse(tp, *bpp); - return XFS_ERROR(EFSCORRUPTED); - } - - XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF); - - xfs_check_agi_unlinked(agi); + ASSERT(!xfs_buf_geterror(*bpp)); + xfs_buf_set_ref(*bpp, XFS_AGI_REF); return 0; } diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c index 35dd96f3c..0bc24cc87 100644 --- a/libxfs/xfs_ialloc_btree.c +++ b/libxfs/xfs_ialloc_btree.c @@ -163,6 +163,59 @@ xfs_inobt_key_diff( cur->bc_rec.i.ir_startino; } +void +xfs_inobt_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + unsigned int level; + int sblock_ok; /* block passes checks */ + + /* magic number and level verification */ + level = be16_to_cpu(block->bb_level); + sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) && + level < mp->m_in_maxlevels; + + /* numrecs verification */ + sblock_ok = sblock_ok && + be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0]; + + /* sibling pointer verification */ + sblock_ok = sblock_ok && + (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || + be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && + block->bb_u.s.bb_leftsib && + (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || + be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && + block->bb_u.s.bb_rightsib; + + if (!sblock_ok) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +static void +xfs_inobt_read_verify( + struct xfs_buf *bp) +{ + xfs_inobt_verify(bp); +} + +static void +xfs_inobt_write_verify( + struct xfs_buf *bp) +{ + xfs_inobt_verify(bp); +} + +const struct xfs_buf_ops xfs_inobt_buf_ops = { + .verify_read = xfs_inobt_read_verify, + .verify_write = xfs_inobt_write_verify, +}; + #ifdef DEBUG STATIC int xfs_inobt_keys_inorder( @@ -266,7 +319,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, - + .buf_ops = &xfs_inobt_buf_ops, #ifdef DEBUG .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, diff --git a/libxfs/xfs_inode.c b/libxfs/xfs_inode.c index e4474fda3..2970f46d0 100644 --- a/libxfs/xfs_inode.c +++ b/libxfs/xfs_inode.c @@ -21,6 +21,12 @@ kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; +/* + * Used in xfs_itruncate_extents(). This is the maximum number of extents + * freed from a file in a single transaction. + */ +#define XFS_ITRUNC_MAX_EXTENTS 2 + STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); @@ -73,8 +79,8 @@ xfs_inobp_check( dip = (xfs_dinode_t *)xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); if (!dip->di_next_unlinked) { - xfs_fs_cmn_err(CE_ALERT, mp, - "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", + xfs_alert(mp, + "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", bp); ASSERT(dip->di_next_unlinked); } @@ -82,176 +88,108 @@ xfs_inobp_check( } #endif -/* - * Find the buffer associated with the given inode map - * We do basic validation checks on the buffer once it has been - * retrieved from disk. - */ -int -xfs_imap_to_bp( - xfs_mount_t *mp, - xfs_trans_t *tp, - struct xfs_imap *imap, - xfs_buf_t **bpp, - uint buf_flags, - uint iget_flags) +static void +xfs_inode_buf_verify( + struct xfs_buf *bp) { - int error; + struct xfs_mount *mp = bp->b_target->bt_mount; int i; int ni; - xfs_buf_t *bp; - - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, - (int)imap->im_len, buf_flags, &bp); - if (error) { - if (error != EAGAIN) { - cmn_err(CE_WARN, - "xfs_imap_to_bp: xfs_trans_read_buf()returned " - "an error %d on %s. Returning error.", - error, mp->m_fsname); - } else { - ASSERT(buf_flags & XBF_TRYLOCK); - } - return error; - } /* * Validate the magic number and version of every inode in the buffer - * (if DEBUG kernel) or the first inode in the buffer, otherwise. */ -#ifdef DEBUG - ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; -#else /* usual case */ - ni = 1; -#endif - + ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { int di_ok; xfs_dinode_t *dip; - dip = (xfs_dinode_t *)xfs_buf_offset(bp, + dip = (struct xfs_dinode *)xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); - di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && + di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && XFS_DINODE_GOOD_VERSION(dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { - if (iget_flags & XFS_IGET_UNTRUSTED) { - xfs_trans_brelse(tp, bp); - return XFS_ERROR(EINVAL); - } - XFS_CORRUPTION_ERROR("xfs_imap_to_bp", - XFS_ERRLEVEL_HIGH, mp, dip); + xfs_buf_ioerror(bp, EFSCORRUPTED); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, + mp, dip); #ifdef DEBUG - cmn_err(CE_PANIC, - "Device %s - bad inode magic/vsn " - "daddr %lld #%d (magic=%x)", - XFS_BUFTARG_NAME(mp->m_ddev_targp), - (unsigned long long)imap->im_blkno, i, + xfs_emerg(mp, + "bad inode magic/vsn daddr %lld #%d (magic=%x)", + (unsigned long long)bp->b_bn, i, be16_to_cpu(dip->di_magic)); + ASSERT(0); #endif - xfs_trans_brelse(tp, bp); - return XFS_ERROR(EFSCORRUPTED); } } - xfs_inobp_check(mp, bp); +} - /* - * Mark the buffer as an inode buffer now that it looks good - */ - XFS_BUF_SET_VTYPE(bp, B_FS_INO); - *bpp = bp; - return 0; +static void +xfs_inode_buf_read_verify( + struct xfs_buf *bp) +{ + xfs_inode_buf_verify(bp); } -/* - * This routine is called to map an inode number within a file - * system to the buffer containing the on-disk version of the - * inode. It returns a pointer to the buffer containing the - * on-disk inode in the bpp parameter, and in the dip parameter - * it returns a pointer to the on-disk inode within that buffer. - * - * If a non-zero error is returned, then the contents of bpp and - * dipp are undefined. - * - * Use xfs_imap() to determine the size and location of the - * buffer to read from disk. - */ -int -xfs_inotobp( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - xfs_dinode_t **dipp, - xfs_buf_t **bpp, - int *offset, - uint imap_flags) +static void +xfs_inode_buf_write_verify( + struct xfs_buf *bp) { - struct xfs_imap imap; - xfs_buf_t *bp; - int error; - - imap.im_blkno = 0; - error = xfs_imap(mp, tp, ino, &imap, imap_flags); - if (error) - return error; - - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); - if (error) - return error; - - *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); - *bpp = bp; - *offset = imap.im_boffset; - return 0; + xfs_inode_buf_verify(bp); } +const struct xfs_buf_ops xfs_inode_buf_ops = { + .verify_read = xfs_inode_buf_read_verify, + .verify_write = xfs_inode_buf_write_verify, +}; + /* - * This routine is called to map an inode to the buffer containing - * the on-disk version of the inode. It returns a pointer to the - * buffer containing the on-disk inode in the bpp parameter, and in - * the dip parameter it returns a pointer to the on-disk inode within - * that buffer. + * This routine is called to map an inode to the buffer containing the on-disk + * version of the inode. It returns a pointer to the buffer containing the + * on-disk inode in the bpp parameter, and in the dipp parameter it returns a + * pointer to the on-disk inode within that buffer. * - * If a non-zero error is returned, then the contents of bpp and - * dipp are undefined. - * - * The inode is expected to already been mapped to its buffer and read - * in once, thus we can use the mapping information stored in the inode - * rather than calling xfs_imap(). This allows us to avoid the overhead - * of looking at the inode btree for small block file systems - * (see xfs_imap()). + * If a non-zero error is returned, then the contents of bpp and dipp are + * undefined. */ int -xfs_itobp( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dinode_t **dipp, - xfs_buf_t **bpp, - uint buf_flags) +xfs_imap_to_bp( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_imap *imap, + struct xfs_dinode **dipp, + struct xfs_buf **bpp, + uint buf_flags, + uint iget_flags) { - xfs_buf_t *bp; - int error; + struct xfs_buf *bp; + int error; - ASSERT(ip->i_imap.im_blkno != 0); + buf_flags |= XBF_UNMAPPED; + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, + (int)imap->im_len, buf_flags, &bp, + &xfs_inode_buf_ops); + if (error) { + if (error == EAGAIN) { + ASSERT(buf_flags & XBF_TRYLOCK); + return error; + } - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0); - if (error) - return error; + if (error == EFSCORRUPTED && + (iget_flags & XFS_IGET_UNTRUSTED)) + return XFS_ERROR(EINVAL); - if (!bp) { - ASSERT(buf_flags & XBF_TRYLOCK); - ASSERT(tp == NULL); - *bpp = NULL; - return EAGAIN; + xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", + __func__, error); + return error; } - *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); *bpp = bp; + *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); return 0; } @@ -264,23 +202,20 @@ xfs_itobp( * brought in-core. The rest will be in-lined in if_extents when it * is first referenced (see xfs_iread_extents()). */ -int +STATIC int xfs_iformat( xfs_inode_t *ip, xfs_dinode_t *dip) { xfs_attr_shortform_t *atp; int size; - int error; + int error = 0; xfs_fsize_t di_size; - ip->i_df.if_ext_max = - XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); - error = 0; if (unlikely(be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > be64_to_cpu(dip->di_nblocks))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + xfs_warn(ip->i_mount, "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", (unsigned long long)ip->i_ino, (int)(be32_to_cpu(dip->di_nextents) + @@ -293,8 +228,7 @@ xfs_iformat( } if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt dinode %Lu, forkoff = 0x%x.", + xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", (unsigned long long)ip->i_ino, dip->di_forkoff); XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, @@ -304,7 +238,7 @@ xfs_iformat( if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && !ip->i_mount->m_rtdev)) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + xfs_warn(ip->i_mount, "corrupt dinode %Lu, has realtime flag set.", ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", @@ -323,7 +257,6 @@ xfs_iformat( return XFS_ERROR(EFSCORRUPTED); } ip->i_d.di_size = 0; - ip->i_size = 0; ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); break; @@ -335,10 +268,9 @@ xfs_iformat( /* * no local regular files yet */ - if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu " - "(local format for regular file).", + if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { + xfs_warn(ip->i_mount, + "corrupt inode %Lu (local format for regular file).", (unsigned long long) ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat(4)", XFS_ERRLEVEL_LOW, @@ -348,9 +280,8 @@ xfs_iformat( di_size = be64_to_cpu(dip->di_size); if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu " - "(bad size %Ld for local inode).", + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad size %Ld for local inode).", (unsigned long long) ip->i_ino, (long long) di_size); XFS_CORRUPTION_ERROR("xfs_iformat(5)", @@ -384,19 +315,18 @@ xfs_iformat( } if (!XFS_DFORK_Q(dip)) return 0; + ASSERT(ip->i_afp == NULL); ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); - ip->i_afp->if_ext_max = - XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); + switch (dip->di_aformat) { case XFS_DINODE_FMT_LOCAL: atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); size = be16_to_cpu(atp->hdr.totsize); if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu " - "(bad attr fork size %Ld).", + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad attr fork size %Ld).", (unsigned long long) ip->i_ino, (long long) size); XFS_CORRUPTION_ERROR("xfs_iformat(8)", @@ -451,9 +381,8 @@ xfs_iformat_local( * kmem_alloc() or memcpy() below. */ if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu " - "(bad size %d for local fork, size = %d).", + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad size %d for local fork, size = %d).", (unsigned long long) ip->i_ino, size, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, @@ -510,8 +439,7 @@ xfs_iformat_extents( * kmem_alloc() or memcpy() below. */ if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu ((a)extents = %d).", + xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", (unsigned long long) ip->i_ino, nex); XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); @@ -582,15 +510,15 @@ xfs_iformat_btree( * or the number of extents is greater than the number of * blocks. */ - if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max - || XFS_BMDR_SPACE_CALC(nrecs) > - XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) - || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu (btree).", + if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= + XFS_IFORK_MAXEXT(ip, whichfork) || + XFS_BMDR_SPACE_CALC(nrecs) > + XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) || + XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { + xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", (unsigned long long) ip->i_ino); - XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, - ip->i_mount); + XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); return XFS_ERROR(EFSCORRUPTED); } @@ -682,6 +610,124 @@ xfs_dinode_to_disk( to->di_gen = cpu_to_be32(from->di_gen); } +/* + * Read the disk inode attributes into the in-core inode structure. + */ +int +xfs_iread( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_inode_t *ip, + uint iget_flags) +{ + xfs_buf_t *bp; + xfs_dinode_t *dip; + int error; + + /* + * Fill in the location information in the in-core inode. + */ + error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); + if (error) + return error; + + /* + * Get pointers to the on-disk inode and the buffer containing it. + */ + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); + if (error) + return error; + + /* + * If we got something that isn't an inode it means someone + * (nfs or dmi) has a stale handle. + */ + if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { +#ifdef DEBUG + xfs_alert(mp, + "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", + __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); +#endif /* DEBUG */ + error = XFS_ERROR(EINVAL); + goto out_brelse; + } + + /* + * If the on-disk inode is already linked to a directory + * entry, copy all of the inode into the in-core inode. + * xfs_iformat() handles copying in the inode format + * specific information. + * Otherwise, just get the truly permanent information. + */ + if (dip->di_mode) { + xfs_dinode_from_disk(&ip->i_d, dip); + error = xfs_iformat(ip, dip); + if (error) { +#ifdef DEBUG + xfs_alert(mp, "%s: xfs_iformat() returned error %d", + __func__, error); +#endif /* DEBUG */ + goto out_brelse; + } + } else { + ip->i_d.di_magic = be16_to_cpu(dip->di_magic); + ip->i_d.di_version = dip->di_version; + ip->i_d.di_gen = be32_to_cpu(dip->di_gen); + ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); + /* + * Make sure to pull in the mode here as well in + * case the inode is released without being used. + * This ensures that xfs_inactive() will see that + * the inode is already free and not try to mess + * with the uninitialized part of it. + */ + ip->i_d.di_mode = 0; + } + + /* + * The inode format changed when we moved the link count and + * made it 32 bits long. If this is an old format inode, + * convert it in memory to look like a new one. If it gets + * flushed to disk we will convert back before flushing or + * logging it. We zero out the new projid field and the old link + * count field. We'll handle clearing the pad field (the remains + * of the old uuid field) when we actually convert the inode to + * the new format. We don't change the version number so that we + * can distinguish this from a real new format inode. + */ + if (ip->i_d.di_version == 1) { + ip->i_d.di_nlink = ip->i_d.di_onlink; + ip->i_d.di_onlink = 0; + xfs_set_projid(&ip->i_d, 0); + } + + ip->i_delayed_blks = 0; + + /* + * Mark the buffer containing the inode as something to keep + * around for a while. This helps to keep recently accessed + * meta-data in-core longer. + */ + xfs_buf_set_ref(bp, XFS_INO_REF); + + /* + * Use xfs_trans_brelse() to release the buffer containing the + * on-disk inode, because it was acquired with xfs_trans_read_buf() + * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal + * brelse(). If we're within a transaction, then xfs_trans_brelse() + * will only release the buffer if it is not dirty within the + * transaction. It will be OK to release the buffer in this case, + * because inodes on disk are never destroyed and we will be + * locking the new in-core inode before putting it in the hash + * table where other processes can find it. Thus we don't have + * to worry about the inode being changed just because we released + * the buffer. + */ + out_brelse: + xfs_trans_brelse(tp, bp); + return error; +} + /* * Read in extents from a btree-format inode. * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. @@ -707,7 +753,6 @@ xfs_iread_extents( /* * We know that the size is valid (it's checked in iformat_btree) */ - ifp->if_lastex = NULLEXTNUM; ifp->if_bytes = ifp->if_real_bytes = 0; ifp->if_flags |= XFS_IFEXTENTS; xfs_iext_add(ifp, 0, nextents); @@ -1067,9 +1112,6 @@ xfs_iflush_fork( char *cp; xfs_ifork_t *ifp; xfs_mount_t *mp; -#ifdef XFS_TRANS_DEBUG - int first; -#endif static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = @@ -1092,7 +1134,7 @@ xfs_iflush_fork( mp = ip->i_mount; switch (XFS_IFORK_FORMAT(ip, whichfork)) { case XFS_DINODE_FMT_LOCAL: - if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && + if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_u1.if_data != NULL); ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); @@ -1102,13 +1144,10 @@ xfs_iflush_fork( case XFS_DINODE_FMT_EXTENTS: ASSERT((ifp->if_flags & XFS_IFEXTENTS) || - !(iip->ili_format.ilf_fields & extflag[whichfork])); - ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || - (ifp->if_bytes == 0)); - ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || - (ifp->if_bytes > 0)); - if ((iip->ili_format.ilf_fields & extflag[whichfork]) && + !(iip->ili_fields & extflag[whichfork])); + if ((iip->ili_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { + ASSERT(xfs_iext_get_ext(ifp, 0)); ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, whichfork); @@ -1116,7 +1155,7 @@ xfs_iflush_fork( break; case XFS_DINODE_FMT_BTREE: - if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && + if ((iip->ili_fields & brootflag[whichfork]) && (ifp->if_broot_bytes > 0)) { ASSERT(ifp->if_broot != NULL); ASSERT(ifp->if_broot_bytes <= @@ -1129,14 +1168,14 @@ xfs_iflush_fork( break; case XFS_DINODE_FMT_DEV: - if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { + if (iip->ili_fields & XFS_ILOG_DEV) { ASSERT(whichfork == XFS_DATA_FORK); xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); } break; case XFS_DINODE_FMT_UUID: - if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { + if (iip->ili_fields & XFS_ILOG_UUID) { ASSERT(whichfork == XFS_DATA_FORK); memcpy(XFS_DFORK_DPTR(dip), &ip->i_df.if_u2.if_uuid, @@ -1159,6 +1198,8 @@ xfs_iext_get_ext( xfs_extnum_t idx) /* index of target extent */ { ASSERT(idx >= 0); + ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { return ifp->if_u1.if_ext_irec->er_extbuf; } else if (ifp->if_flags & XFS_IFEXTIREC) { @@ -1238,7 +1279,6 @@ xfs_iext_add( } ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; ifp->if_real_bytes = 0; - ifp->if_lastex = nextents + ext_diff; } /* * Otherwise use a linear (direct) extent list. @@ -1933,8 +1973,10 @@ xfs_iext_idx_to_irec( xfs_extnum_t page_idx = *idxp; /* extent index in target list */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); - ASSERT(page_idx >= 0 && page_idx <= - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); + ASSERT(page_idx >= 0); + ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; erp_idx = 0; low = 0; diff --git a/libxfs/xfs_mount.c b/libxfs/xfs_mount.c index 32d22553b..a9155b39b 100644 --- a/libxfs/xfs_mount.c +++ b/libxfs/xfs_mount.c @@ -90,8 +90,8 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) ASSERT(atomic_read(&pag->pag_ref) >= 0); ref = atomic_inc_return(&pag->pag_ref); } - trace_xfs_perag_get(mp, agno, ref, _RET_IP_); rcu_read_unlock(); + trace_xfs_perag_get(mp, agno, ref, _RET_IP_); return pag; } @@ -105,6 +105,114 @@ xfs_perag_put(struct xfs_perag *pag) trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); } +/* + * Check the validity of the SB found. + */ +STATIC int +xfs_mount_validate_sb( + xfs_mount_t *mp, + xfs_sb_t *sbp, + bool check_inprogress) +{ + + /* + * If the log device and data device have the + * same device number, the log is internal. + * Consequently, the sb_logstart should be non-zero. If + * we have a zero sb_logstart in this case, we may be trying to mount + * a volume filesystem in a non-volume manner. + */ + if (sbp->sb_magicnum != XFS_SB_MAGIC) { + xfs_warn(mp, "bad magic number"); + return XFS_ERROR(EWRONGFS); + } + + if (!xfs_sb_good_version(sbp)) { + xfs_warn(mp, "bad version"); + return XFS_ERROR(EWRONGFS); + } + + if (unlikely( + sbp->sb_logstart == 0 && mp->m_logdev == mp->m_dev)) { + xfs_warn(mp, + "filesystem is marked as having an external log; " + "specify logdev on the mount command line."); + return XFS_ERROR(EINVAL); + } + + if (unlikely( + sbp->sb_logstart != 0 && mp->m_logdev != mp->m_dev)) { + xfs_warn(mp, + "filesystem is marked as having an internal log; " + "do not specify logdev on the mount command line."); + return XFS_ERROR(EINVAL); + } + + /* + * More sanity checking. Most of these were stolen directly from + * xfs_repair. + */ + if (unlikely( + sbp->sb_agcount <= 0 || + sbp->sb_sectsize < XFS_MIN_SECTORSIZE || + sbp->sb_sectsize > XFS_MAX_SECTORSIZE || + sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || + sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || + sbp->sb_sectsize != (1 << sbp->sb_sectlog) || + sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || + sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || + sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_blocksize != (1 << sbp->sb_blocklog) || + sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || + sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || + sbp->sb_inodelog < XFS_DINODE_MIN_LOG || + sbp->sb_inodelog > XFS_DINODE_MAX_LOG || + sbp->sb_inodesize != (1 << sbp->sb_inodelog) || + (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || + (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || + (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || + (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || + sbp->sb_dblocks == 0 || + sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || + sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { + XFS_CORRUPTION_ERROR("SB sanity check failed", + XFS_ERRLEVEL_LOW, mp, sbp); + return XFS_ERROR(EFSCORRUPTED); + } + + /* + * Currently only very few inode sizes are supported. + */ + switch (sbp->sb_inodesize) { + case 256: + case 512: + case 1024: + case 2048: + break; + default: + xfs_warn(mp, "inode size of %d bytes not supported", + sbp->sb_inodesize); + return XFS_ERROR(ENOSYS); + } + + + if (check_inprogress && sbp->sb_inprogress) { + xfs_warn(mp, "Offline file system operation in progress!"); + return XFS_ERROR(EFSCORRUPTED); + } + + /* + * Version 1 directory format has never worked on Linux. + */ + if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { + xfs_warn(mp, "file system using version 1 directory format"); + return XFS_ERROR(ENOSYS); + } + + return 0; +} + void xfs_sb_from_disk( xfs_sb_t *to, @@ -211,6 +319,72 @@ xfs_sb_to_disk( } } +static void +xfs_sb_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_sb sb; + int error; + + xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); + + /* + * Only check the in progress field for the primary superblock as + * mkfs.xfs doesn't clear it from secondary superblocks. + */ + error = xfs_mount_validate_sb(mp, &sb, bp->b_blkno == XFS_SB_DADDR); + if (error) + xfs_buf_ioerror(bp, error); +} + +static void +xfs_sb_read_verify( + struct xfs_buf *bp) +{ + xfs_sb_verify(bp); +} + +/* + * We may be probed for a filesystem match, so we may not want to emit + * messages when the superblock buffer is not actually an XFS superblock. + * If we find an XFS superblock, the run a normal, noisy mount because we are + * really going to mount it and want to know about errors. + */ +static void +xfs_sb_quiet_read_verify( + struct xfs_buf *bp) +{ + struct xfs_sb sb; + + xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); + + if (sb.sb_magicnum == XFS_SB_MAGIC) { + /* XFS filesystem, verify noisily! */ + xfs_sb_read_verify(bp); + return; + } + /* quietly fail */ + xfs_buf_ioerror(bp, EFSCORRUPTED); +} + +static void +xfs_sb_write_verify( + struct xfs_buf *bp) +{ + xfs_sb_verify(bp); +} + +const struct xfs_buf_ops xfs_sb_buf_ops = { + .verify_read = xfs_sb_read_verify, + .verify_write = xfs_sb_write_verify, +}; + +static const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { + .verify_read = xfs_sb_quiet_read_verify, + .verify_write = xfs_sb_write_verify, +}; + /* * xfs_mount_common * diff --git a/libxfs/xfs_rtalloc.c b/libxfs/xfs_rtalloc.c index 4fbdaa9d2..1de85fd2c 100644 --- a/libxfs/xfs_rtalloc.c +++ b/libxfs/xfs_rtalloc.c @@ -49,34 +49,24 @@ xfs_rtbuf_get( xfs_buf_t **bpp) /* output: buffer for the block */ { xfs_buf_t *bp; /* block buffer, result */ - xfs_daddr_t d; /* disk addr of block */ - int error; /* error value */ - xfs_fsblock_t fsb; /* fs block number for block */ xfs_inode_t *ip; /* bitmap or summary inode */ + xfs_bmbt_irec_t map; + int nmap = 1; + int error; /* error value */ ip = issum ? mp->m_rsumip : mp->m_rbmip; - /* - * Map from the file offset (block) and inode number to the - * file system block. - */ - error = xfs_bmapi_single(tp, ip, XFS_DATA_FORK, &fsb, block); - if (error) { + + error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); + if (error) return error; - } - ASSERT(fsb != NULLFSBLOCK); - /* - * Convert to disk address for buffer cache. - */ - d = XFS_FSB_TO_DADDR(mp, fsb); - /* - * Read the buffer. - */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, 0, &bp); - if (error) { + + ASSERT(map.br_startblock != NULLFSBLOCK); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + mp->m_bsize, 0, &bp, NULL); + if (error) return error; - } - ASSERT(bp && !XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); *bpp = bp; return 0; } @@ -115,7 +105,7 @@ xfs_rtfind_back( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; /* * Get the first word's index & point to it. */ @@ -167,7 +157,7 @@ xfs_rtfind_back( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; word = XFS_BLOCKWMASK(mp); b = &bufp[word]; } else { @@ -213,7 +203,7 @@ xfs_rtfind_back( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; word = XFS_BLOCKWMASK(mp); b = &bufp[word]; } else { @@ -290,7 +280,7 @@ xfs_rtfind_forw( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; /* * Get the first word's index & point to it. */ @@ -341,7 +331,7 @@ xfs_rtfind_forw( if (error) { return error; } - b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + b = bufp = bp->b_addr; word = 0; } else { /* @@ -386,7 +376,7 @@ xfs_rtfind_forw( if (error) { return error; } - b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + b = bufp = bp->b_addr; word = 0; } else { /* @@ -537,7 +527,7 @@ xfs_rtmodify_range( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; /* * Compute the starting word's address, and starting bit. */ @@ -582,7 +572,7 @@ xfs_rtmodify_range( if (error) { return error; } - first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + first = b = bufp = bp->b_addr; word = 0; } else { /* @@ -622,7 +612,7 @@ xfs_rtmodify_range( if (error) { return error; } - first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + first = b = bufp = bp->b_addr; word = 0; } else { /* @@ -720,8 +710,8 @@ xfs_rtmodify_summary( */ sp = XFS_SUMPTR(mp, bp, so); *sp += delta; - xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)), - (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1)); + xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), + (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); return 0; } @@ -736,18 +726,15 @@ xfs_rtfree_extent( xfs_extlen_t len) /* length of extent freed */ { int error; /* error value */ - xfs_inode_t *ip; /* bitmap file inode */ xfs_mount_t *mp; /* file system mount structure */ xfs_fsblock_t sb; /* summary file block number */ xfs_buf_t *sumbp; /* summary file block buffer */ mp = tp->t_mountp; - /* - * Synchronize by locking the bitmap inode. - */ - if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, - XFS_ILOCK_EXCL, &ip))) - return error; + + ASSERT(mp->m_rbmip->i_itemp != NULL); + ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + #if defined(__KERNEL__) && defined(DEBUG) /* * Check to see that this whole range is currently allocated. @@ -780,10 +767,10 @@ xfs_rtfree_extent( */ if (tp->t_frextents_delta + mp->m_sb.sb_frextents == mp->m_sb.sb_rextents) { - if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) - ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; - *(__uint64_t *)&ip->i_d.di_atime = 0; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) + mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; + *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; + xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); } return 0; } diff --git a/libxfs/xfs_trans.c b/libxfs/xfs_trans.c index 7249196ea..bdd0ebc2f 100644 --- a/libxfs/xfs_trans.c +++ b/libxfs/xfs_trans.c @@ -583,14 +583,13 @@ xfs_trans_add_item( { struct xfs_log_item_desc *lidp; - ASSERT(lip->li_mountp = tp->t_mountp); - ASSERT(lip->li_ailp = tp->t_mountp->m_ail); + ASSERT(lip->li_mountp == tp->t_mountp); + ASSERT(lip->li_ailp == tp->t_mountp->m_ail); lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS); lidp->lid_item = lip; lidp->lid_flags = 0; - lidp->lid_size = 0; list_add_tail(&lidp->lid_trans, &tp->t_items); lip->li_desc = lidp; @@ -673,8 +672,6 @@ xfs_trans_roll( if (error) return error; - xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(trans, dp); + xfs_trans_ijoin(trans, dp, 0); return 0; } - diff --git a/logprint/log_misc.c b/logprint/log_misc.c index d94c0cc8e..567cdf225 100644 --- a/logprint/log_misc.c +++ b/logprint/log_misc.c @@ -558,7 +558,7 @@ xlog_print_trans_inode_core(xfs_icdinode_t *ip) } void -xlog_print_dir2_sf(xfs_dir2_sf_t *sfp, int size) +xlog_print_dir2_sf(xfs_dir2_sf_hdr_t *sfp, int size) { xfs_ino_t ino; int count; @@ -566,9 +566,6 @@ xlog_print_dir2_sf(xfs_dir2_sf_t *sfp, int size) char namebuf[257]; xfs_dir2_sf_entry_t *sfep; - /* XXX need to determine whether this is v1 or v2, then - print appropriate structure */ - printf(_("SHORTFORM DIRECTORY size %d\n"), size); /* bail out for now */ @@ -576,14 +573,14 @@ xlog_print_dir2_sf(xfs_dir2_sf_t *sfp, int size) return; printf(_("SHORTFORM DIRECTORY size %d count %d\n"), - size, sfp->hdr.count); - memmove(&ino, &(sfp->hdr.parent), sizeof(ino)); - printf(_(".. ino 0x%llx\n"), (unsigned long long) be64_to_cpu(ino)); + size, sfp->count); + memmove(&ino, &(sfp->parent), sizeof(ino)); + printf(_(".. ino 0x%llx\n"), (unsigned long long) be64_to_cpu(ino)); - count = (uint)(sfp->hdr.count); - sfep = &(sfp->list[0]); + count = sfp->count; + sfep = xfs_dir2_sf_firstentry(sfp); for (i = 0; i < count; i++) { - memmove(&ino, &(sfep->inumber), sizeof(ino)); + ino = xfs_dir2_sfe_get_ino(sfp, sfep); memmove(namebuf, (sfep->name), sfep->namelen); namebuf[sfep->namelen] = '\0'; printf(_("%s ino 0x%llx namelen %d\n"), @@ -691,7 +688,7 @@ xlog_print_trans_inode(xfs_caddr_t *ptr, case XFS_ILOG_DDATA: printf(_("LOCAL inode data\n")); if (mode == S_IFDIR) - xlog_print_dir2_sf((xfs_dir2_sf_t *)*ptr, size); + xlog_print_dir2_sf((xfs_dir2_sf_hdr_t *)*ptr, size); break; default: ASSERT((f->ilf_fields & XFS_ILOG_DFORK) == 0); @@ -718,7 +715,7 @@ xlog_print_trans_inode(xfs_caddr_t *ptr, case XFS_ILOG_ADATA: printf(_("LOCAL attr data\n")); if (mode == S_IFDIR) - xlog_print_dir2_sf((xfs_dir2_sf_t *)*ptr, size); + xlog_print_dir2_sf((xfs_dir2_sf_hdr_t *)*ptr, size); break; default: ASSERT((f->ilf_fields & XFS_ILOG_AFORK) == 0); @@ -1039,7 +1036,7 @@ xlog_print_rec_head(xlog_rec_header_t *head, int *len) } /* check for cleared blocks written by xlog_clear_stale_blocks() */ - if (!head->h_len && !head->h_chksum && !head->h_prev_block && + if (!head->h_len && !head->h_crc && !head->h_prev_block && !head->h_num_logops && !head->h_size) return CLEARED_BLKS; diff --git a/logprint/log_print_all.c b/logprint/log_print_all.c index 2c45ff290..8f1c8abe7 100644 --- a/logprint/log_print_all.c +++ b/logprint/log_print_all.c @@ -262,7 +262,7 @@ xlog_recover_print_inode_core( (di->di_magic>>8) & 0xff, di->di_magic & 0xff, di->di_mode, di->di_version, di->di_format, di->di_onlink); printf(_(" uid:%d gid:%d nlink:%d projid:%u\n"), - di->di_uid, di->di_gid, di->di_nlink, xfs_get_projid(*di)); + di->di_uid, di->di_gid, di->di_nlink, xfs_get_projid(di)); printf(_(" atime:%d mtime:%d ctime:%d\n"), di->di_atime.t_sec, di->di_mtime.t_sec, di->di_ctime.t_sec); printf(_(" flushiter:%d\n"), di->di_flushiter); diff --git a/mkfs/proto.c b/mkfs/proto.c index 302102868..56eed31d2 100644 --- a/mkfs/proto.c +++ b/mkfs/proto.c @@ -243,7 +243,7 @@ newfile( } else if (len > 0) { nb = XFS_B_TO_FSB(mp, len); nmap = 1; - error = libxfs_bmapi(tp, ip, 0, nb, XFS_BMAPI_WRITE, first, nb, + error = libxfs_bmapi_write(tp, ip, 0, nb, 0, first, nb, &map, &nmap, flist); if (error) { fail(_("error allocating space for a file"), error); @@ -667,9 +667,9 @@ rtinit( xfs_bmap_init(&flist, &first); while (bno < mp->m_sb.sb_rbmblocks) { nmap = XFS_BMAP_MAX_NMAP; - error = libxfs_bmapi(tp, rbmip, bno, + error = libxfs_bmapi_write(tp, rbmip, bno, (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno), - XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks, + 0, &first, mp->m_sb.sb_rbmblocks, map, &nmap, &flist); if (error) { fail(_("Allocation of the realtime bitmap failed"), @@ -704,9 +704,9 @@ rtinit( xfs_bmap_init(&flist, &first); while (bno < nsumblocks) { nmap = XFS_BMAP_MAX_NMAP; - error = libxfs_bmapi(tp, rsumip, bno, + error = libxfs_bmapi_write(tp, rsumip, bno, (xfs_extlen_t)(nsumblocks - bno), - XFS_BMAPI_WRITE, &first, nsumblocks, + 0, &first, nsumblocks, map, &nmap, &flist); if (error) { fail(_("Allocation of the realtime summary failed"), diff --git a/repair/dir2.c b/repair/dir2.c index 932e99436..c01e0bc76 100644 --- a/repair/dir2.c +++ b/repair/dir2.c @@ -99,182 +99,36 @@ namecheck(char *name, int length) * Multibuffer handling. * V2 directory blocks can be noncontiguous, needing multiple buffers. */ -static xfs_dabuf_t * +static struct xfs_buf * da_read_buf( xfs_mount_t *mp, int nex, bmap_ext_t *bmp) { - xfs_buf_t *bp; - xfs_buf_t *bparray[4]; - xfs_buf_t **bplist; - xfs_dabuf_t *dabuf; +#define MAP_ARRAY_SZ 4 + struct xfs_buf_map map_array[MAP_ARRAY_SZ]; + struct xfs_buf_map *map; + struct xfs_buf *bp; int i; - int off; - if (nex > (sizeof(bparray)/sizeof(xfs_buf_t *))) { - bplist = calloc(nex, sizeof(*bplist)); - if (bplist == NULL) { + if (nex > MAP_ARRAY_SZ) { + map = calloc(nex, sizeof(*map)); + if (map == NULL) { do_error(_("couldn't malloc dir2 buffer list\n")); exit(1); } - } - else { + } else { /* common case avoids calloc/free */ - bplist = bparray; + map = map_array; } for (i = 0; i < nex; i++) { - pftrace("about to read off %llu (len = %d)", - (long long)XFS_FSB_TO_DADDR(mp, bmp[i].startblock), - XFS_FSB_TO_BB(mp, bmp[i].blockcount)); - - bplist[i] = libxfs_readbuf(mp->m_dev, - XFS_FSB_TO_DADDR(mp, bmp[i].startblock), - XFS_FSB_TO_BB(mp, bmp[i].blockcount), 0); - if (!bplist[i]) { - nex = i; - goto failed; - } - - pftrace("readbuf %p (%llu, %d)", bplist[i], - (long long)XFS_BUF_ADDR(bplist[i]), - XFS_BUF_COUNT(bplist[i])); - } - dabuf = malloc(XFS_DA_BUF_SIZE(nex)); - if (dabuf == NULL) { - do_error(_("couldn't malloc dir2 buffer header\n")); - exit(1); - } - dabuf->dirty = 0; - dabuf->nbuf = nex; - if (nex == 1) { - bp = bplist[0]; - dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); - dabuf->data = XFS_BUF_PTR(bp); - dabuf->bps[0] = bp; - } else { - for (i = 0, dabuf->bbcount = 0; i < nex; i++) { - dabuf->bps[i] = bp = bplist[i]; - dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp)); - } - dabuf->data = malloc(BBTOB(dabuf->bbcount)); - if (dabuf->data == NULL) { - do_error(_("couldn't malloc dir2 buffer data\n")); - exit(1); - } - for (i = off = 0; i < nex; i++, off += XFS_BUF_COUNT(bp)) { - bp = bplist[i]; - memmove((char *)dabuf->data + off, XFS_BUF_PTR(bp), - XFS_BUF_COUNT(bp)); - } - } - if (bplist != bparray) - free(bplist); - return dabuf; -failed: - for (i = 0; i < nex; i++) - libxfs_putbuf(bplist[i]); - if (bplist != bparray) - free(bplist); - return NULL; -} - -static void -da_buf_clean( - xfs_dabuf_t *dabuf) -{ - xfs_buf_t *bp; - int i; - int off; - - if (dabuf->dirty) { - dabuf->dirty = 0; - for (i=off=0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) { - bp = dabuf->bps[i]; - memmove(XFS_BUF_PTR(bp), (char *)dabuf->data + off, - XFS_BUF_COUNT(bp)); - } - } -} - -static void -da_buf_done( - xfs_dabuf_t *dabuf) -{ - da_buf_clean(dabuf); - if (dabuf->nbuf > 1) - free(dabuf->data); - free(dabuf); -} - -static int -da_bwrite( - xfs_mount_t *mp, - xfs_dabuf_t *dabuf) -{ - xfs_buf_t *bp; - xfs_buf_t **bplist; - int e; - int error; - int i; - int nbuf; - int off; - - if ((nbuf = dabuf->nbuf) == 1) { - bplist = &bp; - bp = dabuf->bps[0]; - } else { - bplist = malloc(nbuf * sizeof(*bplist)); - if (bplist == NULL) { - do_error(_("couldn't malloc dir2 buffer list\n")); - exit(1); - } - memmove(bplist, dabuf->bps, nbuf * sizeof(*bplist)); - for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { - bp = bplist[i]; - memmove(XFS_BUF_PTR(bp), (char *)dabuf->data + off, - XFS_BUF_COUNT(bp)); - } - } - da_buf_done(dabuf); - for (i = error = 0; i < nbuf; i++) { - e = libxfs_writebuf(bplist[i], 0); - if (e) - error = e; - } - if (bplist != &bp) - free(bplist); - return error; -} - -static void -da_brelse( - xfs_dabuf_t *dabuf) -{ - xfs_buf_t *bp; - xfs_buf_t **bplist; - int i; - int nbuf; - - if ((nbuf = dabuf->nbuf) == 1) { - bplist = &bp; - bp = dabuf->bps[0]; - } else { - bplist = malloc(nbuf * sizeof(*bplist)); - if (bplist == NULL) { - do_error(_("couldn't malloc dir2 buffer list\n")); - exit(1); - } - memmove(bplist, dabuf->bps, nbuf * sizeof(*bplist)); - } - da_buf_done(dabuf); - for (i = 0; i < nbuf; i++) { - pftrace("putbuf %p (%llu)", bplist[i], - (long long)XFS_BUF_ADDR(bplist[i])); - libxfs_putbuf(bplist[i]); + map[i].bm_bn = XFS_FSB_TO_DADDR(mp, bmp[i].startblock); + map[i].bm_len = XFS_FSB_TO_BB(mp, bmp[i].blockcount); } - if (bplist != &bp) - free(bplist); + bp = libxfs_readbuf_map(mp->m_dev, map, nex, 0); + if (map != map_array) + free(map); + return bp; } /* @@ -290,7 +144,7 @@ traverse_int_dir2block(xfs_mount_t *mp, { bmap_ext_t *bmp; xfs_dablk_t bno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int i; int nex; xfs_da_blkinfo_t *info; @@ -327,7 +181,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"), goto error_out; } - info = bp->data; + info = bp->b_addr; if (be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC) { if ( i != -1 ) { @@ -336,10 +190,10 @@ _("found non-root LEAFN node in inode %" PRIu64 " bno = %u\n"), da_cursor->ino, bno); } *rbno = 0; - da_brelse(bp); + libxfs_putbuf(bp); return(1); } else if (be16_to_cpu(info->magic) != XFS_DA_NODE_MAGIC) { - da_brelse(bp); + libxfs_putbuf(bp); do_warn( _("bad dir magic number 0x%x in inode %" PRIu64 " bno = %u\n"), be16_to_cpu(info->magic), @@ -348,7 +202,7 @@ _("bad dir magic number 0x%x in inode %" PRIu64 " bno = %u\n"), } node = (xfs_da_intnode_t*)info; if (be16_to_cpu(node->hdr.count) > mp->m_dir_node_ents) { - da_brelse(bp); + libxfs_putbuf(bp); do_warn( _("bad record count in inode %" PRIu64 ", count = %d, max = %d\n"), da_cursor->ino, be16_to_cpu(node->hdr.count), @@ -364,7 +218,7 @@ _("bad record count in inode %" PRIu64 ", count = %d, max = %d\n"), da_cursor->i do_warn( _("bad header depth for directory inode %" PRIu64 "\n"), da_cursor->ino); - da_brelse(bp); + libxfs_putbuf(bp); i = -1; goto error_out; } @@ -375,7 +229,7 @@ _("bad header depth for directory inode %" PRIu64 "\n"), do_warn( _("bad directory btree for directory inode %" PRIu64 "\n"), da_cursor->ino); - da_brelse(bp); + libxfs_putbuf(bp); goto error_out; } } @@ -400,7 +254,7 @@ _("bad directory btree for directory inode %" PRIu64 "\n"), error_out: while (i > 1 && i <= da_cursor->active) { - da_brelse(da_cursor->level[i].bp); + libxfs_putbuf(da_cursor->level[i].bp); i++; } @@ -429,7 +283,7 @@ release_dir2_cursor_int(xfs_mount_t *mp, } ASSERT(error != 0); - da_brelse(cursor->level[level].bp); + libxfs_putbuf(cursor->level[level].bp); cursor->level[level].bp = NULL; } @@ -478,7 +332,7 @@ verify_final_dir2_path(xfs_mount_t *mp, * in the block which should be the final (rightmost) entry */ entry = cursor->level[this_level].index; - node = (xfs_da_intnode_t *)(cursor->level[this_level].bp->data); + node = (xfs_da_intnode_t *)(cursor->level[this_level].bp->b_addr); /* * check internal block consistency on this level -- ensure * that all entries are used, encountered and expected hashvals @@ -550,9 +404,9 @@ _("would correct bad hashval in non-leaf dir block\n" (cursor->level[this_level].dirty && !no_modify)); if (cursor->level[this_level].dirty && !no_modify) - da_bwrite(mp, cursor->level[this_level].bp); + libxfs_writebuf(cursor->level[this_level].bp, 0); else - da_brelse(cursor->level[this_level].bp); + libxfs_putbuf(cursor->level[this_level].bp); cursor->level[this_level].bp = NULL; @@ -618,7 +472,7 @@ verify_dir2_path(xfs_mount_t *mp, xfs_da_intnode_t *node; xfs_da_intnode_t *newnode; xfs_dablk_t dabno; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int bad; int entry; int this_level = p_level + 1; @@ -631,7 +485,7 @@ verify_dir2_path(xfs_mount_t *mp, * should be processed now in this level. */ entry = cursor->level[this_level].index; - node = cursor->level[this_level].bp->data; + node = cursor->level[this_level].bp->b_addr; /* * if this block is out of entries, validate this @@ -685,7 +539,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"), return(1); } - newnode = bp->data; + newnode = bp->b_addr; /* * verify magic number and back pointer, sanity-check * entry count, verify level @@ -720,7 +574,7 @@ _("bad level %d in block %u for directory inode %" PRIu64 "\n"), bad++; } if (bad) { - da_brelse(bp); + libxfs_putbuf(bp); return(1); } /* @@ -731,9 +585,9 @@ _("bad level %d in block %u for directory inode %" PRIu64 "\n"), (cursor->level[this_level].dirty && !no_modify)); if (cursor->level[this_level].dirty && !no_modify) - da_bwrite(mp, cursor->level[this_level].bp); + libxfs_writebuf(cursor->level[this_level].bp, 0); else - da_brelse(cursor->level[this_level].bp); + libxfs_putbuf(cursor->level[this_level].bp); cursor->level[this_level].bp = bp; cursor->level[this_level].dirty = 0; cursor->level[this_level].bno = dabno; @@ -805,21 +659,19 @@ process_sf_dir2_fixi8( memmove(oldsfp, newsfp, oldsize); newsfp->hdr.count = oldsfp->hdr.count; newsfp->hdr.i8count = 0; - ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); - xfs_dir2_sf_put_inumber(newsfp, &ino, &newsfp->hdr.parent); - oldsfep = xfs_dir2_sf_firstentry(oldsfp); - newsfep = xfs_dir2_sf_firstentry(newsfp); + ino = xfs_dir2_sf_get_parent_ino(&sfp->hdr); + xfs_dir2_sf_put_parent_ino(&newsfp->hdr, ino); + oldsfep = xfs_dir2_sf_firstentry(&oldsfp->hdr); + newsfep = xfs_dir2_sf_firstentry(&newsfp->hdr); while ((int)((char *)oldsfep - (char *)oldsfp) < oldsize) { newsfep->namelen = oldsfep->namelen; xfs_dir2_sf_put_offset(newsfep, xfs_dir2_sf_get_offset(oldsfep)); memmove(newsfep->name, oldsfep->name, newsfep->namelen); - ino = xfs_dir2_sf_get_inumber(oldsfp, - xfs_dir2_sf_inumberp(oldsfep)); - xfs_dir2_sf_put_inumber(newsfp, &ino, - xfs_dir2_sf_inumberp(newsfep)); - oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep); - newsfep = xfs_dir2_sf_nextentry(newsfp, newsfep); + ino = xfs_dir2_sfe_get_ino(&oldsfp->hdr, oldsfep); + xfs_dir2_sfe_put_ino(&newsfp->hdr, newsfep, ino); + oldsfep = xfs_dir2_sf_nextentry(&oldsfp->hdr, oldsfep); + newsfep = xfs_dir2_sf_nextentry(&newsfp->hdr, newsfep); } *next_sfep = newsfep; free(oldsfp); @@ -838,13 +690,13 @@ process_sf_dir2_fixoff( xfs_dir2_sf_t *sfp; sfp = (xfs_dir2_sf_t *)XFS_DFORK_DPTR(dip); - sfep = xfs_dir2_sf_firstentry(sfp); + sfep = xfs_dir2_sf_firstentry(&sfp->hdr); offset = XFS_DIR2_DATA_FIRST_OFFSET; for (i = 0; i < sfp->hdr.count; i++) { xfs_dir2_sf_put_offset(sfep, offset); offset += xfs_dir2_data_entsize(sfep->namelen); - sfep = xfs_dir2_sf_nextentry(sfp, sfep); + sfep = xfs_dir2_sf_nextentry(&sfp->hdr, sfep); } } @@ -901,13 +753,12 @@ process_sf_dir2( /* * Initialize i8 based on size of parent inode number. */ - i8 = (xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent) - > XFS_DIR2_MAX_SHORT_INUM); + i8 = (xfs_dir2_sf_get_parent_ino(&sfp->hdr) > XFS_DIR2_MAX_SHORT_INUM); /* * check for bad entry count */ - if (num_entries * xfs_dir2_sf_entsize_byname(sfp, 1) + + if (num_entries * xfs_dir2_sf_entsize(&sfp->hdr, 1) + xfs_dir2_sf_hdr_size(0) > max_size || num_entries == 0) num_entries = 0xFF; @@ -915,7 +766,7 @@ process_sf_dir2( * run through entries, stop at first bad entry, don't need * to check for .. since that's encoded in its own field */ - sfep = next_sfep = xfs_dir2_sf_firstentry(sfp); + sfep = next_sfep = xfs_dir2_sf_firstentry(&sfp->hdr); for (i = 0; i < num_entries && ino_dir_size > (char *)next_sfep - (char *)sfp; i++) { @@ -923,7 +774,7 @@ process_sf_dir2( sfep = next_sfep; junkit = 0; bad_sfnamelen = 0; - lino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); + lino = xfs_dir2_sfe_get_ino(&sfp->hdr, sfep); /* * if entry points to self, junk it since only '.' or '..' * should do that and shortform dirs don't contain either @@ -1037,7 +888,7 @@ _("zero length entry in shortform dir %" PRIu64 ""), break; } } else if ((__psint_t) sfep - (__psint_t) sfp + - xfs_dir2_sf_entsize_byentry(sfp, sfep) + xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen) > ino_dir_size) { bad_sfnamelen = 1; @@ -1125,8 +976,8 @@ _("entry contains offset out of order in shortform dir %" PRIu64 "\n"), name[namelen] = '\0'; if (!no_modify) { - tmp_elen = - xfs_dir2_sf_entsize_byentry(sfp, sfep); + tmp_elen = xfs_dir2_sf_entsize(&sfp->hdr, + sfep->namelen); be64_add_cpu(&dip->di_size, -tmp_elen); ino_dir_size -= tmp_elen; @@ -1178,11 +1029,9 @@ _("would have junked entry \"%s\" in directory inode %" PRIu64 "\n"), */ next_sfep = (tmp_sfep == NULL) ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep - + ((!bad_sfnamelen) - ? xfs_dir2_sf_entsize_byentry(sfp, - sfep) - : xfs_dir2_sf_entsize_byname(sfp, - namelen))) + + ((!bad_sfnamelen) + ? xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen) + : xfs_dir2_sf_entsize(&sfp->hdr, namelen))) : tmp_sfep; } @@ -1262,7 +1111,7 @@ _("corrected entry offsets in directory %" PRIu64 "\n"), /* * check parent (..) entry */ - *parent = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + *parent = xfs_dir2_sf_get_parent_ino(&sfp->hdr); /* * if parent entry is bogus, null it out. we'll fix it later . @@ -1276,7 +1125,7 @@ _("bogus .. inode number (%" PRIu64 ") in directory inode %" PRIu64 ", "), if (!no_modify) { do_warn(_("clearing inode number\n")); - xfs_dir2_sf_put_inumber(sfp, &zero, &sfp->hdr.parent); + xfs_dir2_sf_put_parent_ino(&sfp->hdr, zero); *dino_dirty = 1; *repair = 1; } else { @@ -1291,7 +1140,7 @@ _("bogus .. inode number (%" PRIu64 ") in directory inode %" PRIu64 ", "), _("corrected root directory %" PRIu64 " .. entry, was %" PRIu64 ", now %" PRIu64 "\n"), ino, *parent, ino); *parent = ino; - xfs_dir2_sf_put_inumber(sfp, parent, &sfp->hdr.parent); + xfs_dir2_sf_put_parent_ino(&sfp->hdr, ino); *dino_dirty = 1; *repair = 1; } else { @@ -1311,7 +1160,7 @@ _("bad .. entry in directory inode %" PRIu64 ", points to self, "), if (!no_modify) { do_warn(_("clearing inode number\n")); - xfs_dir2_sf_put_inumber(sfp, &zero, &sfp->hdr.parent); + xfs_dir2_sf_put_parent_ino(&sfp->hdr, zero); *dino_dirty = 1; *repair = 1; } else { @@ -1334,11 +1183,12 @@ process_dir2_data( int ino_discovery, char *dirname, /* directory pathname */ xfs_ino_t *parent, /* out - NULLFSINO if entry not exist */ - xfs_dabuf_t *bp, + struct xfs_buf *bp, int *dot, /* out - 1 if there is a dot, else 0 */ int *dotdot, /* out - 1 if there's a dotdot, else 0 */ xfs_dablk_t da_bno, - char *endptr) + char *endptr, + int *dirty) { int badbest; xfs_dir2_data_free_t *bf; @@ -1358,7 +1208,7 @@ process_dir2_data( char *ptr; xfs_ino_t ent_ino; - d = bp->data; + d = bp->b_addr; bf = d->hdr.bestfree; ptr = (char *)d->u; badbest = lastfree = freeseen = 0; @@ -1392,7 +1242,7 @@ process_dir2_data( (char *)dup - (char *)d) break; badbest |= lastfree != 0; - dfp = xfs_dir2_data_freefind(d, dup); + dfp = xfs_dir2_data_freefind(&d->hdr, dup); if (dfp) { i = dfp - bf; badbest |= (freeseen & (1 << i)) != 0; @@ -1535,7 +1385,7 @@ _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"), (intptr_t)ptr - (intptr_t)d); dep->inumber = cpu_to_be64(BADFSINO); ent_ino = BADFSINO; - bp->dirty = 1; + *dirty = 1; } else { do_warn( _("\twould clear inode number in entry at offset %" PRIdPTR "...\n"), @@ -1561,7 +1411,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64 " has ille */ if (!no_modify && ent_ino == BADFSINO) { dep->name[0] = '/'; - bp->dirty = 1; + *dirty = 1; junkit = 0; } /* @@ -1597,7 +1447,7 @@ _("bad .. entry in root directory inode %" PRIu64 ", was %" PRIu64 ": "), if (!no_modify) { do_warn(_("correcting\n")); dep->inumber = cpu_to_be64(ino); - bp->dirty = 1; + *dirty = 1; } else { do_warn(_("would correct\n")); } @@ -1629,7 +1479,7 @@ _("bad . entry in directory inode %" PRIu64 ", was %" PRIu64 ": "), if (!no_modify) { do_warn(_("correcting\n")); dep->inumber = cpu_to_be64(ino); - bp->dirty = 1; + *dirty = 1; } else { do_warn(_("would correct\n")); } @@ -1656,7 +1506,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "), if (junkit) { if (!no_modify) { dep->name[0] = '/'; - bp->dirty = 1; + *dirty = 1; do_warn(_("clearing entry\n")); } else { do_warn(_("would clear entry\n")); @@ -1676,8 +1526,8 @@ _("bad bestfree table in block %u in directory inode %" PRIu64 ": "), da_bno, ino); if (!no_modify) { do_warn(_("repairing table\n")); - libxfs_dir2_data_freescan(mp, d, &i); - bp->dirty = 1; + libxfs_dir2_data_freescan(mp, &d->hdr, &i); + *dirty = 1; } else { do_warn(_("would repair table\n")); } @@ -1706,11 +1556,12 @@ process_block_dir2( xfs_dir2_block_t *block; xfs_dir2_leaf_entry_t *blp; bmap_ext_t *bmp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; xfs_dir2_block_tail_t *btp; int nex; int rval; bmap_ext_t lbmp; + int dirty = 0; *repair = *dot = *dotdot = 0; *parent = NULLFSINO; @@ -1733,7 +1584,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"), /* * Verify the block */ - block = bp->data; + block = bp->b_addr; if (be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC) do_warn( _("bad directory block magic # %#x in block %u for directory inode %" PRIu64 "\n"), @@ -1742,7 +1593,7 @@ _("bad directory block magic # %#x in block %u for directory inode %" PRIu64 "\n * process the data area * this also checks & fixes the bestfree */ - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, &block->hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Don't let this go past the end of the block. @@ -1750,12 +1601,12 @@ _("bad directory block magic # %#x in block %u for directory inode %" PRIu64 "\n if ((char *)blp > (char *)btp) blp = (xfs_dir2_leaf_entry_t *)btp; rval = process_dir2_data(mp, ino, dip, ino_discovery, dirname, parent, - bp, dot, dotdot, mp->m_dirdatablk, (char *)blp); - if (bp->dirty && !no_modify) { + bp, dot, dotdot, mp->m_dirdatablk, (char *)blp, &dirty); + if (dirty && !no_modify) { *repair = 1; - da_bwrite(mp, bp); + libxfs_writebuf(bp, 0); } else - da_brelse(bp); + libxfs_putbuf(bp); return rval; } @@ -1814,7 +1665,7 @@ process_leaf_level_dir2( int *repair) { bmap_ext_t *bmp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; int buf_dirty; xfs_dahash_t current_hashval; xfs_dablk_t da_bno; @@ -1859,7 +1710,7 @@ _("can't read file block %u for directory inode %" PRIu64 "\n"), da_bno, ino); goto error_out; } - leaf = bp->data; + leaf = bp->b_addr; /* * Check magic number for leaf directory btree block. */ @@ -1869,7 +1720,7 @@ _("can't read file block %u for directory inode %" PRIu64 "\n"), _("bad directory leaf magic # %#x for directory inode %" PRIu64 " block %u\n"), be16_to_cpu(leaf->hdr.info.magic), ino, da_bno); - da_brelse(bp); + libxfs_putbuf(bp); goto error_out; } buf_dirty = 0; @@ -1879,7 +1730,7 @@ _("bad directory leaf magic # %#x for directory inode %" PRIu64 " block %u\n"), */ if (process_leaf_block_dir2(mp, leaf, da_bno, ino, current_hashval, &greatest_hashval)) { - da_brelse(bp); + libxfs_putbuf(bp); goto error_out; } /* @@ -1898,14 +1749,14 @@ _("bad directory leaf magic # %#x for directory inode %" PRIu64 " block %u\n"), do_warn( _("bad sibling back pointer for block %u in directory inode %" PRIu64 "\n"), da_bno, ino); - da_brelse(bp); + libxfs_putbuf(bp); goto error_out; } prev_bno = da_bno; da_bno = be32_to_cpu(leaf->hdr.info.forw); if (da_bno != 0) { if (verify_dir2_path(mp, da_cursor, 0)) { - da_brelse(bp); + libxfs_putbuf(bp); goto error_out; } } @@ -1913,9 +1764,9 @@ _("bad sibling back pointer for block %u in directory inode %" PRIu64 "\n"), ASSERT(buf_dirty == 0 || (buf_dirty && !no_modify)); if (buf_dirty && !no_modify) { *repair = 1; - da_bwrite(mp, bp); + libxfs_writebuf(bp, 0); } else - da_brelse(bp); + libxfs_putbuf(bp); } while (da_bno != 0); if (verify_final_dir2_path(mp, da_cursor, 0)) { /* @@ -2008,7 +1859,7 @@ process_leaf_node_dir2( int isnode) /* node directory not leaf */ { bmap_ext_t *bmp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; xfs_dir2_data_t *data; xfs_dfiloff_t dbno; int good; @@ -2017,6 +1868,7 @@ process_leaf_node_dir2( int nex; int t; bmap_ext_t lbmp; + int dirty = 0; *repair = *dot = *dotdot = good = 0; *parent = NULLFSINO; @@ -2043,21 +1895,21 @@ _("can't read block %" PRIu64 " for directory inode %" PRIu64 "\n"), dbno, ino); continue; } - data = bp->data; + data = bp->b_addr; if (be32_to_cpu(data->hdr.magic) != XFS_DIR2_DATA_MAGIC) do_warn( _("bad directory block magic # %#x in block %" PRIu64 " for directory inode %" PRIu64 "\n"), be32_to_cpu(data->hdr.magic), dbno, ino); i = process_dir2_data(mp, ino, dip, ino_discovery, dirname, parent, bp, dot, dotdot, (xfs_dablk_t)dbno, - (char *)data + mp->m_dirblksize); + (char *)data + mp->m_dirblksize, &dirty); if (i == 0) good++; - if (bp->dirty && !no_modify) { + if (dirty && !no_modify) { *repair = 1; - da_bwrite(mp, bp); + libxfs_writebuf(bp, 0); } else - da_brelse(bp); + libxfs_putbuf(bp); } if (good == 0) return 1; diff --git a/repair/dir2.h b/repair/dir2.h index 4d30b893c..5162028fc 100644 --- a/repair/dir2.h +++ b/repair/dir2.h @@ -22,6 +22,32 @@ struct blkmap; struct bmap_ext; +/* + * generic dir2 structures used by xfs_repair. + * XXX: shared with xfsdb + */ +typedef union { + xfs_dir2_data_entry_t entry; + xfs_dir2_data_unused_t unused; +} xfs_dir2_data_union_t; + +typedef struct xfs_dir2_data { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */ + xfs_dir2_data_union_t u[1]; +} xfs_dir2_data_t; + +typedef struct xfs_dir2_block { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_union_t u[1]; + xfs_dir2_leaf_entry_t leaf[1]; + xfs_dir2_block_tail_t tail; +} xfs_dir2_block_t; + +typedef struct xfs_dir2_sf { + xfs_dir2_sf_hdr_t hdr; /* shortform header */ + xfs_dir2_sf_entry_t list[1]; /* shortform entries */ +} xfs_dir2_sf_t; + /* * the cursor gets passed up and down the da btree processing * routines. The interior block processing routines use the @@ -42,7 +68,7 @@ struct bmap_ext; * Currently, we just trash it. */ typedef struct dir2_level_state { - xfs_dabuf_t *bp; /* block bp */ + xfs_buf_t *bp; /* block bp */ xfs_dablk_t bno; /* file block number */ xfs_dahash_t hashval; /* last verified hashval */ int index; /* current index in block */ diff --git a/repair/phase6.c b/repair/phase6.c index a44ba0901..5c3379703 100644 --- a/repair/phase6.c +++ b/repair/phase6.c @@ -483,9 +483,9 @@ mk_rbmino(xfs_mount_t *mp) xfs_bmap_init(&flist, &first); while (bno < mp->m_sb.sb_rbmblocks) { nmap = XFS_BMAP_MAX_NMAP; - error = libxfs_bmapi(tp, ip, bno, + error = libxfs_bmapi_write(tp, ip, bno, (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno), - XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks, + 0, &first, mp->m_sb.sb_rbmblocks, map, &nmap, &flist); if (error) { do_error( @@ -541,7 +541,7 @@ fill_rbmino(xfs_mount_t *mp) * fill the file one block at a time */ nmap = 1; - error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE, + error = libxfs_bmapi_write(tp, ip, bno, 1, 0, &first, 1, &map, &nmap, NULL); if (error || nmap != 1) { do_error( @@ -554,7 +554,7 @@ fill_rbmino(xfs_mount_t *mp) error = libxfs_trans_read_buf( mp, tp, mp->m_dev, XFS_FSB_TO_DADDR(mp, map.br_startblock), - XFS_FSB_TO_BB(mp, 1), 1, &bp); + XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL); if (error) { do_warn( @@ -610,7 +610,7 @@ fill_rsumino(xfs_mount_t *mp) * fill the file one block at a time */ nmap = 1; - error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE, + error = libxfs_bmapi_write(tp, ip, bno, 1, 0, &first, 1, &map, &nmap, NULL); if (error || nmap != 1) { do_error( @@ -623,7 +623,7 @@ fill_rsumino(xfs_mount_t *mp) error = libxfs_trans_read_buf( mp, tp, mp->m_dev, XFS_FSB_TO_DADDR(mp, map.br_startblock), - XFS_FSB_TO_BB(mp, 1), 1, &bp); + XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL); if (error) { do_warn( @@ -722,10 +722,9 @@ mk_rsumino(xfs_mount_t *mp) xfs_bmap_init(&flist, &first); while (bno < nsumblocks) { nmap = XFS_BMAP_MAX_NMAP; - error = libxfs_bmapi(tp, ip, bno, + error = libxfs_bmapi_write(tp, ip, bno, (xfs_extlen_t)(nsumblocks - bno), - XFS_BMAPI_WRITE, &first, nsumblocks, - map, &nmap, &flist); + 0, &first, nsumblocks, map, &nmap, &flist); if (error) { do_error( _("couldn't allocate realtime summary inode, error = %d\n"), @@ -1283,7 +1282,7 @@ dir2_kill_block( xfs_mount_t *mp, xfs_inode_t *ip, xfs_dablk_t da_bno, - xfs_dabuf_t *bp) + struct xfs_buf *bp) { xfs_da_args_t args; int committed; @@ -1301,7 +1300,7 @@ dir2_kill_block( res_failed(error); libxfs_trans_ijoin(tp, ip, 0); libxfs_trans_ihold(tp, ip); - libxfs_da_bjoin(tp, bp); + libxfs_trans_bjoin(tp, bp); memset(&args, 0, sizeof(args)); xfs_bmap_init(&flist, &firstblock); args.dp = ip; @@ -1333,7 +1332,7 @@ longform_dir2_entry_check_data( int *need_dot, ino_tree_node_t *current_irec, int current_ino_offset, - xfs_dabuf_t **bpp, + struct xfs_buf **bpp, dir_hash_tab_t *hashtab, freetab_t **freetabp, xfs_dablk_t da_bno, @@ -1341,7 +1340,7 @@ longform_dir2_entry_check_data( { xfs_dir2_dataptr_t addr; xfs_dir2_leaf_entry_t *blp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; xfs_dir2_block_tail_t *btp; int committed; xfs_dir2_data_t *d; @@ -1370,14 +1369,14 @@ longform_dir2_entry_check_data( int wantmagic; bp = *bpp; - d = bp->data; + d = bp->b_addr; ptr = (char *)d->u; nbad = 0; needscan = needlog = 0; junkit = 0; freetab = *freetabp; if (isblock) { - btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + btp = xfs_dir2_block_tail_p(mp, (struct xfs_dir2_data_hdr *)d); blp = xfs_dir2_block_leaf_p(btp); endptr = (char *)blp; if (endptr > (char *)btp) @@ -1465,7 +1464,7 @@ longform_dir2_entry_check_data( dir2_kill_block(mp, ip, da_bno, bp); } else { do_warn(_("would junk block\n")); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); } freetab->ents[db].v = NULLDATAOFF; *bpp = NULL; @@ -1483,8 +1482,8 @@ longform_dir2_entry_check_data( res_failed(error); libxfs_trans_ijoin(tp, ip, 0); libxfs_trans_ihold(tp, ip); - libxfs_da_bjoin(tp, bp); - libxfs_da_bhold(tp, bp); + libxfs_trans_bjoin(tp, bp); + libxfs_trans_bhold(tp, bp); xfs_bmap_init(&flist, &firstblock); if (be32_to_cpu(d->hdr.magic) != wantmagic) { do_warn( @@ -1749,7 +1748,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 " } *num_illegal += nbad; if (needscan) - libxfs_dir2_data_freescan(mp, d, &needlog); + libxfs_dir2_data_freescan(mp, &d->hdr, &needlog); if (needlog) libxfs_dir2_data_log_header(tp, bp); libxfs_bmap_finish(&tp, &flist, &committed); @@ -1770,7 +1769,7 @@ longform_dir2_check_leaf( { int badtail; __be16 *bestsp; - xfs_dabuf_t *bp; + struct xfs_buf *bp; xfs_dablk_t da_bno; int i; xfs_dir2_leaf_t *leaf; @@ -1778,13 +1777,13 @@ longform_dir2_check_leaf( int seeval; da_bno = mp->m_dirleafblk; - if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK)) { + if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK, NULL)) { do_error( _("can't read block %u for directory inode %" PRIu64 "\n"), da_bno, ip->i_ino); /* NOTREACHED */ } - leaf = bp->data; + leaf = bp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR2_LEAF1_MAGIC || @@ -1792,21 +1791,21 @@ longform_dir2_check_leaf( be32_to_cpu(leaf->hdr.info.back) || be16_to_cpu(leaf->hdr.count) < be16_to_cpu(leaf->hdr.stale) || - be16_to_cpu(leaf->hdr.count) > + be16_to_cpu(leaf->hdr.count) > xfs_dir2_max_leaf_ents(mp) || - (char *)&leaf->ents[be16_to_cpu( + (char *)&leaf->ents[be16_to_cpu( leaf->hdr.count)] > (char *)bestsp) { do_warn( _("leaf block %u for directory inode %" PRIu64 " bad header\n"), da_bno, ip->i_ino); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 1; } seeval = dir_hash_see_all(hashtab, leaf->ents, be16_to_cpu(leaf->hdr.count), be16_to_cpu(leaf->hdr.stale)); if (dir_hash_check(hashtab, ip, seeval)) { - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 1; } badtail = freetab->nents != be32_to_cpu(ltp->bestcount); @@ -1818,10 +1817,10 @@ longform_dir2_check_leaf( do_warn( _("leaf block %u for directory inode %" PRIu64 " bad tail\n"), da_bno, ip->i_ino); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 1; } - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 0; } @@ -1836,7 +1835,7 @@ longform_dir2_check_node( dir_hash_tab_t *hashtab, freetab_t *freetab) { - xfs_dabuf_t *bp; + struct xfs_buf *bp; xfs_dablk_t da_bno; xfs_dir2_db_t fdb; xfs_dir2_free_t *free; @@ -1852,25 +1851,25 @@ longform_dir2_check_node( next_da_bno = da_bno + mp->m_dirblkfsbs - 1; if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) break; - if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, - XFS_DATA_FORK)) { + if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp, + XFS_DATA_FORK, NULL)) { do_warn( _("can't read leaf block %u for directory inode %" PRIu64 "\n"), da_bno, ip->i_ino); return 1; } - leaf = bp->data; + leaf = bp->b_addr; if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR2_LEAFN_MAGIC) { if (be16_to_cpu(leaf->hdr.info.magic) == XFS_DA_NODE_MAGIC) { - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); continue; } do_warn( _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"), be16_to_cpu(leaf->hdr.info.magic), da_bno, ip->i_ino); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 1; } if (be16_to_cpu(leaf->hdr.count) > xfs_dir2_max_leaf_ents(mp) || @@ -1879,13 +1878,13 @@ longform_dir2_check_node( do_warn( _("leaf block %u for directory inode %" PRIu64 " bad header\n"), da_bno, ip->i_ino); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 1; } seeval = dir_hash_see_all(hashtab, leaf->ents, be16_to_cpu(leaf->hdr.count), be16_to_cpu(leaf->hdr.stale)); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); if (seeval != DIR_HASH_CK_OK) return 1; } @@ -1898,25 +1897,25 @@ longform_dir2_check_node( next_da_bno = da_bno + mp->m_dirblkfsbs - 1; if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) break; - if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, - XFS_DATA_FORK)) { + if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp, + XFS_DATA_FORK, NULL)) { do_warn( _("can't read freespace block %u for directory inode %" PRIu64 "\n"), da_bno, ip->i_ino); return 1; } - free = bp->data; + free = bp->b_addr; fdb = xfs_dir2_da_to_db(mp, da_bno); if (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC || be32_to_cpu(free->hdr.firstdb) != (fdb - XFS_DIR2_FREE_FIRSTDB(mp)) * - XFS_DIR2_MAX_FREE_BESTS(mp) || + xfs_dir2_free_max_bests(mp) || be32_to_cpu(free->hdr.nvalid) < be32_to_cpu(free->hdr.nused)) { do_warn( _("free block %u for directory inode %" PRIu64 " bad header\n"), da_bno, ip->i_ino); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 1; } for (i = used = 0; i < be32_to_cpu(free->hdr.nvalid); i++) { @@ -1924,11 +1923,11 @@ longform_dir2_check_node( freetab->nents || freetab->ents[i + be32_to_cpu( free->hdr.firstdb)].v != - be16_to_cpu(free->bests[i])) { + be16_to_cpu(free->bests[i])) { do_warn( _("free block %u entry %i for directory ino %" PRIu64 " bad\n"), da_bno, i, ip->i_ino); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 1; } used += be16_to_cpu(free->bests[i]) != NULLDATAOFF; @@ -1938,10 +1937,10 @@ longform_dir2_check_node( do_warn( _("free block %u for directory inode %" PRIu64 " bad nused\n"), da_bno, ip->i_ino); - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); return 1; } - libxfs_da_brelse(NULL, bp); + libxfs_putbuf(bp); } for (i = 0; i < freetab->nents; i++) { if ((freetab->ents[i].s == 0) && @@ -1971,7 +1970,7 @@ longform_dir2_entry_check(xfs_mount_t *mp, dir_hash_tab_t *hashtab) { xfs_dir2_block_t *block; - xfs_dabuf_t **bplist; + struct xfs_buf **bplist; xfs_dablk_t da_bno; freetab_t *freetab; int num_bps; @@ -1998,7 +1997,7 @@ longform_dir2_entry_check(xfs_mount_t *mp, freetab->ents[i].s = 0; } num_bps = freetab->naents; - bplist = calloc(num_bps, sizeof(xfs_dabuf_t*)); + bplist = calloc(num_bps, sizeof(struct xfs_buf*)); /* is this a block, leaf, or node directory? */ libxfs_dir2_isblock(NULL, ip, &isblock); libxfs_dir2_isleaf(NULL, ip, &isleaf); @@ -2014,14 +2013,14 @@ longform_dir2_entry_check(xfs_mount_t *mp, if (db >= num_bps) { /* more data blocks than expected */ num_bps = db + 1; - bplist = realloc(bplist, num_bps * sizeof(xfs_dabuf_t*)); + bplist = realloc(bplist, num_bps * sizeof(struct xfs_buf*)); if (!bplist) do_error( _("realloc failed in longform_dir2_entry_check (%zu bytes)\n"), - num_bps * sizeof(xfs_dabuf_t*)); + num_bps * sizeof(struct xfs_buf*)); } - if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bplist[db], - XFS_DATA_FORK)) { + if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bplist[db], + XFS_DATA_FORK, NULL)) { do_warn( _("can't read data block %u for directory inode %" PRIu64 "\n"), da_bno, ino); @@ -2040,8 +2039,8 @@ longform_dir2_entry_check(xfs_mount_t *mp, xfs_dir2_block_tail_t *btp; xfs_dir2_leaf_entry_t *blp; - block = bplist[0]->data; - btp = xfs_dir2_block_tail_p(mp, block); + block = bplist[0]->b_addr; + btp = xfs_dir2_block_tail_p(mp, &block->hdr); blp = xfs_dir2_block_leaf_p(btp); seeval = dir_hash_see_all(hashtab, blp, be32_to_cpu(btp->count), @@ -2060,14 +2059,14 @@ longform_dir2_entry_check(xfs_mount_t *mp, dir_hash_dup_names(hashtab); for (i = 0; i < freetab->naents; i++) if (bplist[i]) - libxfs_da_brelse(NULL, bplist[i]); + libxfs_putbuf(bplist[i]); longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab); *num_illegal = 0; *need_dot = 0; } else { for (i = 0; i < freetab->naents; i++) if (bplist[i]) - libxfs_da_brelse(NULL, bplist[i]); + libxfs_putbuf(bplist[i]); } free(bplist); @@ -2126,7 +2125,7 @@ shortform_dir2_entry_check(xfs_mount_t *mp, do_warn( _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"), ino, parent); - xfs_dir2_sf_put_inumber(sfp, &parent, &sfp->hdr.parent); + xfs_dir2_sf_put_parent_ino(&sfp->hdr, parent); *ino_dirty = 1; } return; @@ -2143,15 +2142,14 @@ shortform_dir2_entry_check(xfs_mount_t *mp, /* * Initialise i8 counter -- the parent inode number counts as well. */ - i8 = (xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent) > - XFS_DIR2_MAX_SHORT_INUM); + i8 = xfs_dir2_sf_get_parent_ino(&sfp->hdr) > XFS_DIR2_MAX_SHORT_INUM; /* * now run through entries, stop at first bad entry, don't need * to skip over '..' since that's encoded in its own field and * no need to worry about '.' since it doesn't exist. */ - sfep = next_sfep = xfs_dir2_sf_firstentry(sfp); + sfep = next_sfep = xfs_dir2_sf_firstentry(&sfp->hdr); for (i = 0; i < sfp->hdr.count && max_size > (__psint_t)next_sfep - (__psint_t)sfp; @@ -2160,7 +2158,7 @@ shortform_dir2_entry_check(xfs_mount_t *mp, bad_sfnamelen = 0; tmp_sfep = NULL; - lino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); + lino = xfs_dir2_sfe_get_ino(&sfp->hdr, sfep); namelen = sfep->namelen; @@ -2189,7 +2187,7 @@ shortform_dir2_entry_check(xfs_mount_t *mp, break; } } else if (no_modify && (__psint_t) sfep - (__psint_t) sfp + - + xfs_dir2_sf_entsize_byentry(sfp, sfep) + + xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen) > ip->i_d.di_size) { bad_sfnamelen = 1; @@ -2219,7 +2217,7 @@ shortform_dir2_entry_check(xfs_mount_t *mp, if (no_modify && verify_inum(mp, lino)) { next_sfep = (xfs_dir2_sf_entry_t *)((__psint_t)sfep + - xfs_dir2_sf_entsize_byentry(sfp, sfep)); + xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen)); continue; } @@ -2270,8 +2268,8 @@ shortform_dir2_entry_check(xfs_mount_t *mp, * check for duplicate names in directory. */ if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t) - (sfep - xfs_dir2_sf_firstentry(sfp)), - lino, sfep->namelen, sfep->name)) { + (sfep - xfs_dir2_sf_firstentry(&sfp->hdr)), + lino, sfep->namelen, sfep->name)) { do_warn( _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), fname, lino, ino); @@ -2327,7 +2325,8 @@ do_junkit: if (lino == orphanage_ino) orphanage_ino = 0; if (!no_modify) { - tmp_elen = xfs_dir2_sf_entsize_byentry(sfp, sfep); + tmp_elen = xfs_dir2_sf_entsize(&sfp->hdr, + sfep->namelen); tmp_sfep = (xfs_dir2_sf_entry_t *) ((__psint_t) sfep + tmp_elen); tmp_len = max_size - ((__psint_t) tmp_sfep @@ -2378,9 +2377,9 @@ do_junkit: next_sfep = (tmp_sfep == NULL) ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep - + ((!bad_sfnamelen) - ? xfs_dir2_sf_entsize_byentry(sfp, sfep) - : xfs_dir2_sf_entsize_byname(sfp, namelen))) + + ((!bad_sfnamelen) + ? xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen) + : xfs_dir2_sf_entsize(&sfp->hdr, namelen))) : tmp_sfep; }