xfsprogs: sync code to current kernel code

author Dave Chinner <dchinner@redhat.com>

Thu, 9 May 2013 12:23:15 +0000 (07:23 -0500)

committer Rich Johnston <rjohnston@sgi.com>

Thu, 9 May 2013 12:23:15 +0000 (07:23 -0500)
author Dave Chinner <dchinner@redhat.com>
Thu, 9 May 2013 12:23:15 +0000 (07:23 -0500)
committer Rich Johnston <rjohnston@sgi.com>
Thu, 9 May 2013 12:23:15 +0000 (07:23 -0500)
diff --git a/db/check.c b/db/check.c

index 4493fca75d0bb53c040543241af4a608ddaad503..ff24e339d804589daae1865548abb3815194fa99 100644 (file)
--- a/db/check.c
+++ b/db/check.c
@@ -31,6 +31,7 @@
  #include "output.h"
  #include "init.h"
  #include "malloc.h"
+#include "dir2.h"
  
  typedef enum {
         IS_USER_QUOTA, IS_PROJECT_QUOTA, IS_GROUP_QUOTA,
@@ -2212,7 +2213,7 @@ process_data_dir_v2(
         bf = data->hdr.bestfree;
         ptr = (char *)data->u;
         if (be32_to_cpu(block->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-               btp = xfs_dir2_block_tail_p(mp, block);
+               btp = xfs_dir2_block_tail_p(mp, &block->hdr);
                 lep = xfs_dir2_block_leaf_p(btp);
                 endptr = (char *)lep;
                 if (endptr <= ptr || endptr > (char *)btp) {
@@ -2792,7 +2793,7 @@ process_inode(
                         break;
                 }
                 if (ic) {
-                       dqprid = xfs_get_projid(idic);  /* dquot ID is u32 */
+                       dqprid = xfs_get_projid(&idic); /* dquot ID is u32 */
                         quota_add(&dqprid, &idic.di_gid, &idic.di_uid,
                                   0, bc, ic, rc);
                 }
@@ -3003,7 +3004,7 @@ process_leaf_node_dir_v2_free(
                 error++;
                 return;
         }
-       maxent = XFS_DIR2_MAX_FREE_BESTS(mp);
+       maxent = xfs_dir2_free_max_bests(mp);
         if (be32_to_cpu(free->hdr.firstdb) != xfs_dir2_da_to_db(mp, 
                                         dabno - mp->m_dirfreeblk) * maxent) {
                 if (!sflag || v)
@@ -3415,10 +3416,10 @@ process_sf_dir_v2(
         if (v)
                 dbprintf(_("dir %lld entry . %lld\n"), id->ino, id->ino);
         (*dot)++;
-       sfe = xfs_dir2_sf_firstentry(sf);
+       sfe = xfs_dir2_sf_firstentry(&sf->hdr);
         offset = XFS_DIR2_DATA_FIRST_OFFSET;
         for (i = sf->hdr.count - 1, i8 = 0; i >= 0; i--) {
-               if ((__psint_t)sfe + xfs_dir2_sf_entsize_byentry(sf, sfe) -
+               if ((__psint_t)sfe + xfs_dir2_sf_entsize(&sf->hdr,sfe->namelen) -
                     (__psint_t)sf > be64_to_cpu(dip->di_size)) {
                         if (!sflag)
                                 dbprintf(_("dir %llu bad size in entry at %d\n"),
@@ -3427,7 +3428,7 @@ process_sf_dir_v2(
                         error++;
                         break;
                 }
-               lino = xfs_dir2_sf_get_inumber(sf, xfs_dir2_sf_inumberp(sfe));
+               lino = xfs_dir2_sfe_get_ino(&sf->hdr, sfe);
                 if (lino > XFS_DIR2_MAX_SHORT_INUM)
                         i8++;
                 cid = find_inode(lino, 1);
@@ -3457,8 +3458,8 @@ process_sf_dir_v2(
                 }
                 offset =
                         xfs_dir2_sf_get_offset(sfe) +
-                       xfs_dir2_data_entsize(sfe->namelen);
-               sfe = xfs_dir2_sf_nextentry(sf, sfe);
+                       xfs_dir2_sf_entsize(&sf->hdr, sfe->namelen);
+               sfe = xfs_dir2_sf_nextentry(&sf->hdr, sfe);
         }
         if (i < 0 && (__psint_t)sfe - (__psint_t)sf != 
                                         be64_to_cpu(dip->di_size)) {
@@ -3474,7 +3475,7 @@ process_sf_dir_v2(
                         dbprintf(_("dir %llu offsets too high\n"), id->ino);
                 error++;
         }
-       lino = xfs_dir2_sf_get_inumber(sf, &sf->hdr.parent);
+       lino = xfs_dir2_sf_get_parent_ino(&sf->hdr);
         if (lino > XFS_DIR2_MAX_SHORT_INUM)
                 i8++;
         cid = find_inode(lino, 1);
diff --git a/db/dir2.c b/db/dir2.c

index 0b8b99059c286bea32b26b15c4be4335d1c65076..a539f2d0860ff33f7e12577bf217a9725c3765e7 100644 (file)
--- a/db/dir2.c
+++ b/db/dir2.c
@@ -215,7 +215,7 @@ dir2_block_leaf_count(
         block = obj;
         if (be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)
                 return 0;
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, &block->hdr);
         return be32_to_cpu(btp->count);
  }
  
@@ -233,7 +233,7 @@ dir2_block_leaf_offset(
         ASSERT(startoff == 0);
         block = obj;
         ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, &block->hdr);
         lep = xfs_dir2_block_leaf_p(btp) + idx;
         return bitize((int)((char *)lep - (char *)block));
  }
@@ -265,7 +265,7 @@ dir2_block_tail_offset(
         ASSERT(idx == 0);
         block = obj;
         ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, &block->hdr);
         return bitize((int)((char *)btp - (char *)block));
  }
  
@@ -287,7 +287,7 @@ dir2_block_u_count(
         block = obj;
         if (be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)
                 return 0;
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, &block->hdr);
         ptr = (char *)block->u;
         endptr = (char *)xfs_dir2_block_leaf_p(btp);
         for (i = 0; ptr < endptr; i++) {
@@ -320,7 +320,7 @@ dir2_block_u_offset(
         ASSERT(startoff == 0);
         block = obj;
         ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, &block->hdr);
         ptr = (char *)block->u;
         endptr = (char *)xfs_dir2_block_leaf_p(btp);
         for (i = 0; i < idx; i++) {
diff --git a/db/dir2.h b/db/dir2.h

index 0a8467aa9d0be1ff8a0eb9ab901d338ed6fec6e0..a5f0bec97b991397ebd85a33dfb0e24c0683f234 100644 (file)
--- a/db/dir2.h
+++ b/db/dir2.h
@@ -31,5 +31,35 @@ extern const field_t da_blkinfo_flds[];
  extern const field_t   da_node_entry_flds[];
  extern const field_t   da_node_hdr_flds[];
  
+/*
+ * generic dir2 structures used by xfs_db
+ */
+typedef union {
+       xfs_dir2_data_entry_t   entry;
+       xfs_dir2_data_unused_t  unused;
+} xfs_dir2_data_union_t;
+
+typedef struct xfs_dir2_data {
+       xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_DATA_MAGIC */
+       xfs_dir2_data_union_t   u[1];
+} xfs_dir2_data_t;
+
+typedef struct xfs_dir2_block {
+       xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_BLOCK_MAGIC */
+       xfs_dir2_data_union_t   u[1];
+       xfs_dir2_leaf_entry_t   leaf[1];
+       xfs_dir2_block_tail_t   tail;
+} xfs_dir2_block_t;
+
+typedef struct xfs_dir2_sf {
+       xfs_dir2_sf_hdr_t       hdr;            /* shortform header */
+       xfs_dir2_sf_entry_t     list[1];        /* shortform entries */
+} xfs_dir2_sf_t;
+
+static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
+{
+       return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
+}
+
  extern int     dir2_data_union_size(void *obj, int startoff, int idx);
  extern int     dir2_size(void *obj, int startoff, int idx);
diff --git a/db/dir2sf.c b/db/dir2sf.c

index b2db08820b331caf0d0128551fda444e8ce59e3d..92f8a66cf80c961f5e037f507b00950876cad283 100644 (file)
--- a/db/dir2sf.c
+++ b/db/dir2sf.c
@@ -22,6 +22,7 @@
  #include "fprint.h"
  #include "field.h"
  #include "bit.h"
+#include "dir2.h"
  #include "dir2sf.h"
  
  static int     dir2_inou_i4_count(void *obj, int startoff);
@@ -149,10 +150,10 @@ dir2_sf_entry_size(
  
         ASSERT(bitoffs(startoff) == 0);
         sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
-       e = xfs_dir2_sf_firstentry(sf);
+       e = xfs_dir2_sf_firstentry(&sf->hdr);
         for (i = 0; i < idx; i++)
-               e = xfs_dir2_sf_nextentry(sf, e);
-       return bitize((int)xfs_dir2_sf_entsize_byentry(sf, e));
+               e = xfs_dir2_sf_nextentry(&sf->hdr, e);
+       return bitize((int)xfs_dir2_sf_entsize(&sf->hdr, e->namelen));
  }
  
  /*ARGSUSED*/
@@ -194,9 +195,9 @@ dir2_sf_list_offset(
  
         ASSERT(bitoffs(startoff) == 0);
         sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
-       e = xfs_dir2_sf_firstentry(sf);
+       e = xfs_dir2_sf_firstentry(&sf->hdr);
         for (i = 0; i < idx; i++)
-               e = xfs_dir2_sf_nextentry(sf, e);
+               e = xfs_dir2_sf_nextentry(&sf->hdr, e);
         return bitize((int)((char *)e - (char *)sf));
  }
  
@@ -214,8 +215,8 @@ dir2sf_size(
         ASSERT(bitoffs(startoff) == 0);
         ASSERT(idx == 0);
         sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
-       e = xfs_dir2_sf_firstentry(sf);
+       e = xfs_dir2_sf_firstentry(&sf->hdr);
         for (i = 0; i < sf->hdr.count; i++)
-               e = xfs_dir2_sf_nextentry(sf, e);
+               e = xfs_dir2_sf_nextentry(&sf->hdr, e);
         return bitize((int)((char *)e - (char *)sf));
  }
diff --git a/db/metadump.c b/db/metadump.c

index 9f15d9e576f9fc30f17794be85cbea93db94d4d0..5739f86bb5184b9fd18d390321bfe584feab807c 100644 (file)
--- a/db/metadump.c
+++ b/db/metadump.c
@@ -26,6 +26,10 @@
  #include "init.h"
  #include "sig.h"
  #include "xfs_metadump.h"
+#include "fprint.h"
+#include "faddr.h"
+#include "field.h"
+#include "dir2.h"
  
  #define DEFAULT_MAX_EXT_SIZE   1000
  
@@ -916,7 +920,7 @@ obfuscate_sf_dir(
                                         (long long)cur_ino);
         }
  
-       sfep = xfs_dir2_sf_firstentry(sfp);
+       sfep = xfs_dir2_sf_firstentry(&sfp->hdr);
         for (i = 0; (i < sfp->hdr.count) &&
                         ((char *)sfep - (char *)sfp < ino_dir_size); i++) {
  
@@ -935,7 +939,7 @@ obfuscate_sf_dir(
                         namelen = ino_dir_size - ((char *)&sfep->name[0] -
                                          (char *)sfp);
                 } else if ((char *)sfep - (char *)sfp +
-                               xfs_dir2_sf_entsize_byentry(sfp, sfep) >
+                               xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen) >
                                 ino_dir_size) {
                         if (show_warnings)
                                 print_warning("entry length in dir inode %llu "
@@ -946,12 +950,11 @@ obfuscate_sf_dir(
                                          (char *)sfp);
                 }
  
-               generate_obfuscated_name(xfs_dir2_sf_get_inumber(sfp,
-                               xfs_dir2_sf_inumberp(sfep)), namelen,
-                               &sfep->name[0]);
+               generate_obfuscated_name(xfs_dir2_sfe_get_ino(&sfp->hdr, sfep),
+                                        namelen, &sfep->name[0]);
  
                 sfep = (xfs_dir2_sf_entry_t *)((char *)sfep +
-                               xfs_dir2_sf_entsize_byname(sfp, namelen));
+                               xfs_dir2_sf_entsize(&sfp->hdr, namelen));
         }
  }
  
@@ -1107,9 +1110,10 @@ obfuscate_dir_data_blocks(
                         if (is_block_format) {
                                 xfs_dir2_leaf_entry_t   *blp;
                                 xfs_dir2_block_tail_t   *btp;
+                               xfs_dir2_block_t        *blk;
  
-                               btp = xfs_dir2_block_tail_p(mp,
-                                               (xfs_dir2_block_t *)block);
+                               blk = (xfs_dir2_block_t *)block;
+                               btp = xfs_dir2_block_tail_p(mp, &blk->hdr);
                                 blp = xfs_dir2_block_leaf_p(btp);
                                 if ((char *)blp > (char *)btp)
                                         blp = (xfs_dir2_leaf_entry_t *)btp;
diff --git a/estimate/xfs_estimate.c b/estimate/xfs_estimate.c

index 310c1f45aa6530b332763bd9657087277acf47f0..c574a08a09535a08c80c0167aab08f05ffd6064d 100644 (file)
--- a/estimate/xfs_estimate.c
+++ b/estimate/xfs_estimate.c
@@ -18,6 +18,8 @@
  
  /*
   * Estimate space of an XFS filesystem
+ *
+ * XXX: assumes dirv1 format.
   */
  #include <xfs/libxfs.h>
  #include <sys/stat.h>
diff --git a/fsr/xfs_fsr.c b/fsr/xfs_fsr.c

index d4ec9a3be35623903f51004ea840d6b8e51472cc..66a357095508d64a83c0fd4c010c4bde9d151b6d 100644 (file)
--- a/fsr/xfs_fsr.c
+++ b/fsr/xfs_fsr.c
@@ -16,8 +16,8 @@
   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   */
  
-#include <xfs/xfs.h>
  #include <libxfs.h>
+#include <xfs/xfs.h>
  #include <xfs/xfs_types.h>
  #include <xfs/jdm.h>
  #include <xfs/xfs_dfrag.h>
diff --git a/include/Makefile b/include/Makefile

index 79db532dbce6aefd9b710724578248a150167f78..22e8726cf0092e1615416edc3f5eb4a349f4cafb 100644 (file)
--- a/include/Makefile
+++ b/include/Makefile
@@ -24,15 +24,14 @@ QAHFILES = libxfs.h libxlog.h \
         xfs_ag.h xfs_alloc.h xfs_alloc_btree.h xfs_arch.h xfs_attr_leaf.h \
         xfs_attr_sf.h xfs_bit.h xfs_bmap.h xfs_bmap_btree.h xfs_btree.h \
         xfs_btree_trace.h xfs_buf_item.h xfs_da_btree.h xfs_dinode.h \
-       xfs_dir2.h xfs_dir2_block.h xfs_dir2_data.h xfs_dir2_leaf.h \
-       xfs_dir2_node.h xfs_dir2_sf.h \
+       xfs_dir2.h xfs_dir2_format.h \
         xfs_extfree_item.h xfs_ialloc.h xfs_ialloc_btree.h \
         xfs_inode.h xfs_inode_item.h xfs_inum.h \
         xfs_log.h xfs_log_priv.h xfs_log_recover.h xfs_metadump.h \
         xfs_mount.h xfs_quota.h xfs_rtalloc.h xfs_sb.h xfs_trace.h \
-       xfs_trans.h xfs_trans_space.h xfs_types.h xfs_dfrag.h
+       xfs_trans.h xfs_trans_space.h xfs_dfrag.h
  
-HFILES = handle.h jdm.h xqm.h xfs.h xfs_fs.h
+HFILES = handle.h jdm.h xqm.h xfs.h xfs_fs.h xfs_types.h
  HFILES += $(PKG_PLATFORM).h
  PHFILES = darwin.h freebsd.h irix.h linux.h gnukfreebsd.h
  DKHFILES = volume.h fstyp.h dvh.h
diff --git a/include/libxfs.h b/include/libxfs.h

index f6c7abcad9fb2e65190058d24c1abffb0d348611..0e2fc5fcd0e8b8f1e8edb69b421702e08655533d 100644 (file)
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -33,8 +33,8 @@
  #include <xfs/swab.h>
  #include <xfs/atomic.h>
  
-#include <xfs/xfs_fs.h>
  #include <xfs/xfs_types.h>
+#include <xfs/xfs_fs.h>
  #include <xfs/xfs_arch.h>
  #include <xfs/xfs_bit.h>
  #include <xfs/xfs_inum.h>
@@ -46,7 +46,6 @@
  #include <xfs/xfs_bmap_btree.h>
  #include <xfs/xfs_alloc_btree.h>
  #include <xfs/xfs_ialloc_btree.h>
-#include <xfs/xfs_dir2_sf.h>
  #include <xfs/xfs_attr_sf.h>
  #include <xfs/xfs_dinode.h>
  #include <xfs/xfs_inode.h>
@@ -135,7 +134,6 @@ extern int  libxfs_log_clear (dev_t, xfs_daddr_t, uint, uuid_t *,
  extern int     libxfs_log_header (xfs_caddr_t, uuid_t *, int, int, int,
                                 libxfs_get_block_t *, void *);
  
-
  /*
   * Define a user-level mount structure with all we need
   * in order to make use of the numerous XFS_* macros.
@@ -199,6 +197,7 @@ typedef struct xfs_mount {
         xfs_dablk_t             m_dirfreeblk;   /* blockno of dirfreeindex v2 */
  } xfs_mount_t;
  
+
  #define LIBXFS_MOUNT_ROOTINOS          0x0001
  #define LIBXFS_MOUNT_DEBUGGER          0x0002
  #define LIBXFS_MOUNT_32BITINODES       0x0004
@@ -218,11 +217,32 @@ extern void       libxfs_rtmount_destroy (xfs_mount_t *);
  /*
   * Simple I/O interface
   */
+typedef struct xfs_buftarg {
+       struct xfs_mount        *bt_mount;
+       dev_t                   dev;
+} xfs_buftarg_t;
+
+#define XB_PAGES        2
+
+struct xfs_buf_map {
+       xfs_daddr_t             bm_bn;  /* block number for I/O */
+       int                     bm_len; /* size of I/O */
+};
+
+#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \
+       struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
+
+struct xfs_buf_ops {
+       void (*verify_read)(struct xfs_buf *);
+       void (*verify_write)(struct xfs_buf *);
+};
+
  typedef struct xfs_buf {
         struct cache_node       b_node;
         unsigned int            b_flags;
         xfs_daddr_t             b_blkno;
         unsigned                b_bcount;
+       unsigned int            b_length;
         dev_t                   b_dev;
         pthread_mutex_t         b_lock;
         pthread_t               b_holder;
@@ -230,8 +250,13 @@ typedef struct xfs_buf {
         void                    *b_fsprivate;
         void                    *b_fsprivate2;
         void                    *b_fsprivate3;
-       char                    *b_addr;
+       void                    *b_addr;
         int                     b_error;
+       const struct xfs_buf_ops *b_ops;
+       struct xfs_buftarg      *b_target;
+       struct xfs_perag        *b_pag;
+       struct xfs_buf_map      *b_map;
+       int                     b_nmaps;
  #ifdef XFS_BUF_TRACING
         struct list_head        b_lock_list;
         const char              *b_func;
@@ -244,10 +269,11 @@ enum xfs_buf_flags_t {    /* b_flags bits */
         LIBXFS_B_EXIT           = 0x0001,       /* ==LIBXFS_EXIT_ON_FAILURE */
         LIBXFS_B_DIRTY          = 0x0002,       /* buffer has been modified */
         LIBXFS_B_STALE          = 0x0004,       /* buffer marked as invalid */
-       LIBXFS_B_UPTODATE       = 0x0008        /* buffer is sync'd to disk */
+       LIBXFS_B_UPTODATE       = 0x0008,       /* buffer is sync'd to disk */
+       LIBXFS_B_DISCONTIG      = 0x0010,       /* discontiguous buffer */
  };
  
-#define XFS_BUF_PTR(bp)                        ((bp)->b_addr)
+#define XFS_BUF_PTR(bp)                        ((char *)(bp)->b_addr)
  #define xfs_buf_offset(bp, offset)     (XFS_BUF_PTR(bp) + (offset))
  #define XFS_BUF_ADDR(bp)               ((bp)->b_blkno)
  #define XFS_BUF_SIZE(bp)               ((bp)->b_bcount)
@@ -274,6 +300,8 @@ enum xfs_buf_flags_t {      /* b_flags bits */
                                                 (pri))
  #define XFS_BUF_PRIORITY(bp)           (cache_node_get_priority( \
                                                 (struct cache_node *)(bp)))
+#define xfs_buf_set_ref(bp,ref)                ((void) 0)
+#define xfs_buf_ioerror(bp,err)                (bp)->b_error = (err);
  
  /* Buffer Cache Interfaces */
  
@@ -287,23 +315,34 @@ extern struct cache_operations    libxfs_bcache_operations;
  #define libxfs_readbuf(dev, daddr, len, flags) \
         libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, \
                             (dev), (daddr), (len), (flags))
+#define libxfs_readbuf_map(dev, map, nmaps, flags) \
+       libxfs_trace_readbuf_map(__FUNCTION__, __FILE__, __LINE__, \
+                           (dev), (map), (nmaps), (flags))
  #define libxfs_writebuf(buf, flags) \
         libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, \
                               (buf), (flags))
  #define libxfs_getbuf(dev, daddr, len) \
         libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
                             (dev), (daddr), (len))
+#define libxfs_getbuf_map(dev, map, nmaps) \
+       libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \
+                           (dev), (map), (nmaps))
  #define libxfs_getbuf_flags(dev, daddr, len, flags) \
-       libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
+       libxfs_trace_getbuf_flags(__FUNCTION__, __FILE__, __LINE__, \
                             (dev), (daddr), (len), (flags))
  #define libxfs_putbuf(buf) \
         libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf))
  
  extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int,
                         dev_t, xfs_daddr_t, int, int);
+extern xfs_buf_t *libxfs_trace_readbuf_map(const char *, const char *, int,
+                       dev_t, struct xfs_buf_map *, int, int);
  extern int     libxfs_trace_writebuf(const char *, const char *, int,
                         xfs_buf_t *, int);
-extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int);
+extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int,
+                       dev_t, xfs_daddr_t, int);
+extern xfs_buf_t *libxfs_trace_getbuf_map(const char *, const char *, int,
+                       dev_t, struct xfs_buf_map *, int);
  extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int,
                         dev_t, xfs_daddr_t, int, unsigned int);
  extern void    libxfs_trace_putbuf (const char *, const char *, int,
@@ -312,8 +351,10 @@ extern void        libxfs_trace_putbuf (const char *, const char *, int,
  #else
  
  extern xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
+extern xfs_buf_t *libxfs_readbuf_map(dev_t, struct xfs_buf_map *, int, int);
  extern int     libxfs_writebuf(xfs_buf_t *, int);
  extern xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int);
+extern xfs_buf_t *libxfs_getbuf_map(dev_t, struct xfs_buf_map *, int);
  extern xfs_buf_t *libxfs_getbuf_flags(dev_t, xfs_daddr_t, int, unsigned int);
  extern void    libxfs_putbuf (xfs_buf_t *);
  
@@ -357,6 +398,7 @@ typedef struct xfs_inode_log_item {
         xfs_log_item_t          ili_item;               /* common portion */
         struct xfs_inode        *ili_inode;             /* inode pointer */
         unsigned short          ili_flags;              /* misc flags */
+       unsigned int            ili_fields;             /* fields to be logged */
         unsigned int            ili_last_fields;        /* fields when flushed*/
         xfs_inode_log_format_t  ili_format;             /* logged structure */
         int                     ili_lock_flags;
@@ -408,11 +450,50 @@ extern void       libxfs_trans_bjoin (xfs_trans_t *, struct xfs_buf *);
  extern void    libxfs_trans_bhold (xfs_trans_t *, struct xfs_buf *);
  extern void    libxfs_trans_log_buf (xfs_trans_t *, struct xfs_buf *,
                                 uint, uint);
+/*
  extern xfs_buf_t       *libxfs_trans_get_buf (xfs_trans_t *, dev_t,
                                 xfs_daddr_t, int, uint);
  extern int     libxfs_trans_read_buf (xfs_mount_t *, xfs_trans_t *, dev_t,
                                 xfs_daddr_t, int, uint, struct xfs_buf **);
-
+*/
+
+struct xfs_buf *libxfs_trans_get_buf_map(struct xfs_trans *tp, dev_t dev,
+                                      struct xfs_buf_map *map, int nmaps,
+                                      uint flags);
+
+static inline struct xfs_buf *
+libxfs_trans_get_buf(
+       struct xfs_trans        *tp,
+       dev_t                   dev,
+       xfs_daddr_t             blkno,
+       int                     numblks,
+       uint                    flags)
+{
+       DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+       return libxfs_trans_get_buf_map(tp, dev, &map, 1, flags);
+}
+
+int            libxfs_trans_read_buf_map(struct xfs_mount *mp,
+                                      struct xfs_trans *tp, dev_t dev,
+                                      struct xfs_buf_map *map, int nmaps,
+                                      uint flags, struct xfs_buf **bpp,
+                                      const struct xfs_buf_ops *ops);
+
+static inline int
+libxfs_trans_read_buf(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       dev_t                   dev,
+       xfs_daddr_t             blkno,
+       int                     numblks,
+       uint                    flags,
+       struct xfs_buf          **bpp,
+       const struct xfs_buf_ops *ops)
+{
+       DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+       return libxfs_trans_read_buf_map(mp, tp, dev, &map, 1,
+                                     flags, bpp, ops);
+}
  
  /*
   * Inode interface
@@ -437,6 +518,27 @@ typedef struct xfs_inode {
  #define LIBXFS_ATTR_CREATE     0x0010  /* create, but fail if attr exists */
  #define LIBXFS_ATTR_REPLACE    0x0020  /* set, but fail if attr not exists */
  
+/*
+ * Project quota id helpers (previously projid was 16bit only and using two
+ * 16bit values to hold new 32bit projid was chosen to retain compatibility with
+ * "old" filesystems).
+ *
+ * Copied here from xfs_inode.h because it has to be defined after the struct
+ * xfs_inode...
+ */
+static inline prid_t
+xfs_get_projid(struct xfs_icdinode *id)
+{
+       return (prid_t)id->di_projid_hi << 16 | id->di_projid_lo;
+}
+
+static inline void
+xfs_set_projid(struct xfs_icdinode *id, prid_t projid)
+{
+       id->di_projid_hi = (__uint16_t) (projid >> 16);
+       id->di_projid_lo = (__uint16_t) (projid & 0xffff);
+}
+
  typedef struct cred {
         uid_t   cr_uid;
         gid_t   cr_gid;
@@ -450,8 +552,6 @@ extern void libxfs_trans_inode_alloc_buf (xfs_trans_t *, xfs_buf_t *);
  extern void    libxfs_trans_ichgtime(struct xfs_trans *,
                                         struct xfs_inode *, int);
  extern int     libxfs_iflush_int (xfs_inode_t *, xfs_buf_t *);
-extern int     libxfs_iread (xfs_mount_t *, xfs_trans_t *, xfs_ino_t,
-                               xfs_inode_t *, xfs_daddr_t);
  
  /* Inode Cache Interfaces */
  extern struct cache    *libxfs_icache;
@@ -461,13 +561,7 @@ extern int libxfs_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t,
                                 uint, xfs_inode_t **, xfs_daddr_t);
  extern void    libxfs_iput (xfs_inode_t *, uint);
  
-extern int     xfs_imap_to_bp(xfs_mount_t *mp, xfs_trans_t *tp, struct xfs_imap *imap,
-                       xfs_buf_t **bpp, uint buf_flags, uint iget_flags);
-
-#include <xfs/xfs_dir2_data.h>
-#include <xfs/xfs_dir2_leaf.h>
-#include <xfs/xfs_dir2_block.h>
-#include <xfs/xfs_dir2_node.h>
+#include <xfs/xfs_dir2_format.h>
  
  /* Shared utility routines */
  extern unsigned int    libxfs_log2_roundup(unsigned int i);
@@ -476,11 +570,6 @@ extern int libxfs_alloc_file_space (xfs_inode_t *, xfs_off_t,
                                 xfs_off_t, int, int);
  extern int     libxfs_bmap_finish(xfs_trans_t **, xfs_bmap_free_t *, int *);
  
-extern void    libxfs_da_bjoin (xfs_trans_t *, xfs_dabuf_t *);
-extern void    libxfs_da_bhold (xfs_trans_t *, xfs_dabuf_t *);
-extern int     libxfs_da_read_bufr(xfs_trans_t *, xfs_inode_t *, xfs_dablk_t,
-                               xfs_daddr_t, xfs_dabuf_t **, int);
-
  extern void    libxfs_fs_repair_cmn_err(int, struct xfs_mount *, char *, ...);
  extern void    libxfs_fs_cmn_err(int, struct xfs_mount *, char *, ...);
  
@@ -534,7 +623,8 @@ void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
  /* xfs_bmap.h */
  #define libxfs_bmap_cancel             xfs_bmap_cancel
  #define libxfs_bmap_last_offset                xfs_bmap_last_offset
-#define libxfs_bmapi                   xfs_bmapi
+#define libxfs_bmapi_write             xfs_bmapi_write
+#define libxfs_bmapi_read              xfs_bmapi_read
  #define libxfs_bunmapi                 xfs_bunmapi
  
  /* xfs_bmap_btree.h */
@@ -544,6 +634,7 @@ void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
  #define libxfs_da_brelse               xfs_da_brelse
  #define libxfs_da_hashname             xfs_da_hashname
  #define libxfs_da_shrink_inode         xfs_da_shrink_inode
+#define libxfs_da_read_buf             xfs_da_read_buf
  
  /* xfs_dir2.h */
  #define libxfs_dir_createname          xfs_dir_createname
@@ -564,6 +655,8 @@ void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
  /* xfs_inode.h */
  #define libxfs_dinode_from_disk                xfs_dinode_from_disk
  #define libxfs_dinode_to_disk          xfs_dinode_to_disk
+void   xfs_dinode_from_disk(struct xfs_icdinode *,
+                            struct xfs_dinode *);
  #define libxfs_idata_realloc           xfs_idata_realloc
  #define libxfs_idestroy_fork           xfs_idestroy_fork
  
diff --git a/include/linux.h b/include/linux.h

index e0bcba28df76ae0bca945e598fe4bea78e3e22b7..5bb91cda0b44027d9978e8fc5cec6270a0c0dfeb 100644 (file)
--- a/include/linux.h
+++ b/include/linux.h
@@ -27,6 +27,7 @@
  #include <malloc.h>
  #include <getopt.h>
  #include <endian.h>
+#include <stdbool.h>
  
  static __inline__ int xfsctl(const char *path, int fd, int cmd, void *p)
  {
diff --git a/include/platform_defs.h.in b/include/platform_defs.h.in

index 4e1e0c403df0f916bef3a689ddf3274f143bb77e..217d6c0d9bfdf94d0a6e08ab2bbc89716eeadaaf 100644 (file)
--- a/include/platform_defs.h.in
+++ b/include/platform_defs.h.in
@@ -34,6 +34,7 @@
  #include <ctype.h>
  #include <sys/types.h>
  #include <limits.h>
+#include <stdbool.h>
  
  #undef HAVE___U32
  #ifdef HAVE___U32
@@ -57,6 +58,10 @@ typedef signed long long int __s64;
  #define __force
  #endif
  
+typedef __u16  __bitwise       __le16;
+typedef __u32  __bitwise       __le32;
+typedef __u64  __bitwise       __le64;
+
  typedef __u16  __bitwise       __be16;
  typedef __u32  __bitwise       __be32;
  typedef __u64  __bitwise       __be64;
diff --git a/include/project.h b/include/project.h

index ea1274aca5948765bf00b7e99b6bb1e3e3534136..328e013bca347be323aaac5291c0f37c019cf050 100644 (file)
--- a/include/project.h
+++ b/include/project.h
@@ -20,10 +20,6 @@
  
  #include <xfs/xfs.h>
  
-#if !defined(__sgi__)
-typedef __uint32_t     prid_t;
-#endif
-
  extern int setprojid(const char *__name, int __fd, prid_t __id);
  extern int getprojid(const char *__name, int __fd, prid_t *__id);
  
diff --git a/include/swab.h b/include/swab.h

index 2684aa70ed88f3d4d41b60199b965cd2edd16e46..3de44d8ca549670cd5a2f5d3d94bb85c460810da 100644 (file)
--- a/include/swab.h
+++ b/include/swab.h
@@ -153,4 +153,42 @@ static __inline__ void __swab64s(__u64 *addr)
         (__extension__ ({__arch__swab64s(addr);}));
  }
  
+static inline __uint16_t get_unaligned_be16(void *p)
+{
+       __uint8_t *__p = p;
+       return __p[0] << 8 | __p[1];
+}
+
+static inline __uint32_t get_unaligned_be32(void *p)
+{
+       __uint8_t *__p = p;
+        return __p[0] << 24 | __p[1] << 16 | __p[2] << 8 | __p[3];
+}
+
+static inline __uint64_t get_unaligned_be64(void *p)
+{
+       return (__uint64_t)get_unaligned_be32(p) << 32 |
+                          get_unaligned_be32(p + 4);
+}
+
+static inline void put_unaligned_be16(__uint16_t val, void *p)
+{
+       __uint8_t *__p = p;
+       *__p++ = val >> 8;
+       *__p++ = val;
+}
+
+static inline void put_unaligned_be32(__uint32_t val, void *p)
+{
+       __uint8_t *__p = p;
+       put_unaligned_be16(val >> 16, __p);
+       put_unaligned_be16(val, __p + 2);
+}
+
+static inline void put_unaligned_be64(__uint64_t val, void *p)
+{
+       put_unaligned_be32(val >> 32, p);
+       put_unaligned_be32(val, p + 4);
+}
+
  #endif /* SWAB_H */
diff --git a/include/xfs.h b/include/xfs.h

index 147a5a0d091e0937ce1b3692e798dfb422e657ab..e32c4a8db43f434385a23fca63b94df618455eec 100644 (file)
--- a/include/xfs.h
+++ b/include/xfs.h
@@ -34,6 +34,7 @@
  #define __XFS_H__
  
  #include <xfs/platform_defs.h>
+#include <xfs/xfs_types.h>
  #include <xfs/xfs_fs.h>
  
  #endif /* __XFS_H__ */
diff --git a/include/xfs_ag.h b/include/xfs_ag.h

index 5adce91b6e6399ec41ae66c2cfe3ad66881a0166..f2aeedb6a579f0e45246feade8ca68dd4a8652ff 100644 (file)
--- a/include/xfs_ag.h
+++ b/include/xfs_ag.h
@@ -103,11 +103,13 @@ typedef struct xfs_agf {
  /* disk block (xfs_daddr_t) in the AG */
  #define XFS_AGF_DADDR(mp)      ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
  #define        XFS_AGF_BLOCK(mp)       XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
-#define        XFS_BUF_TO_AGF(bp)      ((xfs_agf_t *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_AGF(bp)      ((xfs_agf_t *)((bp)->b_addr))
  
  extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
                         xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
  
+extern const struct xfs_buf_ops xfs_agf_buf_ops;
+
  /*
   * Size of the unlinked inode hash table in the agi.
   */
@@ -156,11 +158,13 @@ typedef struct xfs_agi {
  /* disk block (xfs_daddr_t) in the AG */
  #define XFS_AGI_DADDR(mp)      ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
  #define        XFS_AGI_BLOCK(mp)       XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
-#define        XFS_BUF_TO_AGI(bp)      ((xfs_agi_t *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_AGI(bp)      ((xfs_agi_t *)((bp)->b_addr))
  
  extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
                                 xfs_agnumber_t agno, struct xfs_buf **bpp);
  
+extern const struct xfs_buf_ops xfs_agi_buf_ops;
+
  /*
   * The third a.g. block contains the a.g. freelist, an array
   * of block pointers to blocks owned by the allocation btree code.
@@ -168,30 +172,12 @@ extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
  #define XFS_AGFL_DADDR(mp)     ((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
  #define        XFS_AGFL_BLOCK(mp)      XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
  #define XFS_AGFL_SIZE(mp)      ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
-#define        XFS_BUF_TO_AGFL(bp)     ((xfs_agfl_t *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_AGFL(bp)     ((xfs_agfl_t *)((bp)->b_addr))
  
  typedef struct xfs_agfl {
         __be32          agfl_bno[1];    /* actually XFS_AGFL_SIZE(mp) */
  } xfs_agfl_t;
  
-/*
- * Busy block/extent entry.  Indexed by a rbtree in perag to mark blocks that
- * have been freed but whose transactions aren't committed to disk yet.
- *
- * Note that we use the transaction ID to record the transaction, not the
- * transaction structure itself. See xfs_alloc_busy_insert() for details.
- */
-struct xfs_busy_extent {
-#ifdef __KERNEL__
-       struct rb_node  rb_node;        /* ag by-bno indexed search tree */
-#endif
-       struct list_head list;          /* transaction busy extent list */
-       xfs_agnumber_t  agno;
-       xfs_agblock_t   bno;
-       xfs_extlen_t    length;
-       xlog_tid_t      tid;            /* transaction that created this */
-};
-
  /*
   * Per-ag incore structure, copies of information in agf and agi,
   * to improve the performance of allocation group selection.
@@ -251,6 +237,7 @@ typedef struct xfs_perag {
  #define XFS_ICI_NO_TAG         (-1)    /* special flag for an untagged lookup
                                            in xfs_inode_ag_iterator */
  #define XFS_ICI_RECLAIM_TAG    0       /* inode is to be reclaimed */
+#define XFS_ICI_EOFBLOCKS_TAG  1       /* inode has blocks beyond EOF */
  
  #define        XFS_AG_MAXLEVELS(mp)            ((mp)->m_ag_maxlevels)
  #define        XFS_MIN_FREELIST_RAW(bl,cl,mp)  \
diff --git a/include/xfs_alloc.h b/include/xfs_alloc.h

index 895009a97271fbbd27fcb63b97b9546f375a8778..99d0a61015587f7b8400bb8519bec6717b91c1cf 100644 (file)
--- a/include/xfs_alloc.h
+++ b/include/xfs_alloc.h
@@ -19,10 +19,12 @@
  #define        __XFS_ALLOC_H__
  
  struct xfs_buf;
+struct xfs_btree_cur;
  struct xfs_mount;
  struct xfs_perag;
  struct xfs_trans;
-struct xfs_busy_extent;
+
+extern struct workqueue_struct *xfs_alloc_wq;
  
  /*
   * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
@@ -73,6 +75,22 @@ typedef unsigned int xfs_alloctype_t;
   */
  #define XFS_ALLOC_SET_ASIDE(mp)  (4 + ((mp)->m_sb.sb_agcount * 4))
  
+/*
+ * When deciding how much space to allocate out of an AG, we limit the
+ * allocation maximum size to the size the AG. However, we cannot use all the
+ * blocks in the AG - some are permanently used by metadata. These
+ * blocks are generally:
+ *     - the AG superblock, AGF, AGI and AGFL
+ *     - the AGF (bno and cnt) and AGI btree root blocks
+ *     - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
+ *
+ * The AG headers are sector sized, so the amount of space they take up is
+ * dependent on filesystem geometry. The others are all single blocks.
+ */
+#define XFS_ALLOC_AG_MAX_USABLE(mp)    \
+       ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
+
+
  /*
   * Argument structure for xfs_alloc routines.
   * This is turned into a structure to avoid having 20 arguments passed
@@ -117,19 +135,6 @@ xfs_extlen_t
  xfs_alloc_longest_free_extent(struct xfs_mount *mp,
                 struct xfs_perag *pag);
  
-#ifdef __KERNEL__
-
-void
-xfs_alloc_busy_insert(xfs_trans_t *tp,
-               xfs_agnumber_t agno,
-               xfs_agblock_t bno,
-               xfs_extlen_t len);
-
-void
-xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
-
-#endif /* __KERNEL__ */
-
  /*
   * Compute and fill in value of m_ag_maxlevels.
   */
@@ -205,4 +210,28 @@ xfs_free_extent(
         xfs_fsblock_t   bno,    /* starting block number of extent */
         xfs_extlen_t    len);   /* length of extent */
  
+int                                    /* error */
+xfs_alloc_lookup_le(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat); /* success/failure */
+
+int                            /* error */
+xfs_alloc_lookup_ge(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat); /* success/failure */
+
+int                                    /* error */
+xfs_alloc_get_rec(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           *bno,   /* output: starting block of extent */
+       xfs_extlen_t            *len,   /* output: length of extent */
+       int                     *stat); /* output: success/failure */
+
+extern const struct xfs_buf_ops xfs_agf_buf_ops;
+extern const struct xfs_buf_ops xfs_agfl_buf_ops;
+
  #endif /* __XFS_ALLOC_H__ */
diff --git a/include/xfs_alloc_btree.h b/include/xfs_alloc_btree.h

index a6caa0022c9bba4f937abaf032ede0761e930a22..7e89a2b429ddcb350a847ede80f7c622000d71c8 100644 (file)
--- a/include/xfs_alloc_btree.h
+++ b/include/xfs_alloc_btree.h
@@ -50,20 +50,6 @@ typedef struct xfs_alloc_rec_incore {
  /* btree pointer type */
  typedef __be32 xfs_alloc_ptr_t;
  
-/*
- * Minimum and maximum blocksize and sectorsize.
- * The blocksize upper limit is pretty much arbitrary.
- * The sectorsize upper limit is due to sizeof(sb_sectsize).
- */
-#define XFS_MIN_BLOCKSIZE_LOG  9       /* i.e. 512 bytes */
-#define XFS_MAX_BLOCKSIZE_LOG  16      /* i.e. 65536 bytes */
-#define XFS_MIN_BLOCKSIZE      (1 << XFS_MIN_BLOCKSIZE_LOG)
-#define XFS_MAX_BLOCKSIZE      (1 << XFS_MAX_BLOCKSIZE_LOG)
-#define XFS_MIN_SECTORSIZE_LOG 9       /* i.e. 512 bytes */
-#define XFS_MAX_SECTORSIZE_LOG 15      /* i.e. 32768 bytes */
-#define XFS_MIN_SECTORSIZE     (1 << XFS_MIN_SECTORSIZE_LOG)
-#define XFS_MAX_SECTORSIZE     (1 << XFS_MAX_SECTORSIZE_LOG)
-
  /*
   * Block numbers in the AG:
   * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3.
@@ -107,4 +93,6 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
                 xfs_agnumber_t, xfs_btnum_t);
  extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
  
+extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
+
  #endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/include/xfs_attr_leaf.h b/include/xfs_attr_leaf.h

index 9c7d22fdcf4d8ea0412d840a109f14c3923d3aab..77de139a58f0277991cd2d1ede0e045def90ec4b 100644 (file)
--- a/include/xfs_attr_leaf.h
+++ b/include/xfs_attr_leaf.h
@@ -31,7 +31,6 @@
  struct attrlist;
  struct attrlist_cursor_kern;
  struct xfs_attr_list_context;
-struct xfs_dabuf;
  struct xfs_da_args;
  struct xfs_da_state;
  struct xfs_da_state_blk;
@@ -215,7 +214,7 @@ int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
  int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
  int    xfs_attr_shortform_remove(struct xfs_da_args *args);
  int    xfs_attr_shortform_list(struct xfs_attr_list_context *context);
-int    xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp);
+int    xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
  int    xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
  
  
@@ -223,7 +222,7 @@ int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
   * Internal routines when attribute fork size == XFS_LBSIZE(mp).
   */
  int    xfs_attr_leaf_to_node(struct xfs_da_args *args);
-int    xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp,
+int    xfs_attr_leaf_to_shortform(struct xfs_buf *bp,
                                    struct xfs_da_args *args, int forkoff);
  int    xfs_attr_leaf_clearflag(struct xfs_da_args *args);
  int    xfs_attr_leaf_setflag(struct xfs_da_args *args);
@@ -235,14 +234,14 @@ int       xfs_attr_leaf_flipflags(xfs_da_args_t *args);
  int    xfs_attr_leaf_split(struct xfs_da_state *state,
                                    struct xfs_da_state_blk *oldblk,
                                    struct xfs_da_state_blk *newblk);
-int    xfs_attr_leaf_lookup_int(struct xfs_dabuf *leaf,
+int    xfs_attr_leaf_lookup_int(struct xfs_buf *leaf,
                                         struct xfs_da_args *args);
-int    xfs_attr_leaf_getvalue(struct xfs_dabuf *bp, struct xfs_da_args *args);
-int    xfs_attr_leaf_add(struct xfs_dabuf *leaf_buffer,
+int    xfs_attr_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
+int    xfs_attr_leaf_add(struct xfs_buf *leaf_buffer,
                                  struct xfs_da_args *args);
-int    xfs_attr_leaf_remove(struct xfs_dabuf *leaf_buffer,
+int    xfs_attr_leaf_remove(struct xfs_buf *leaf_buffer,
                                     struct xfs_da_args *args);
-int    xfs_attr_leaf_list_int(struct xfs_dabuf *bp,
+int    xfs_attr_leaf_list_int(struct xfs_buf *bp,
                                       struct xfs_attr_list_context *context);
  
  /*
@@ -257,9 +256,15 @@ int        xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
  /*
   * Utility routines.
   */
-xfs_dahash_t   xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count);
-int    xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
-                                  struct xfs_dabuf *leaf2_bp);
+xfs_dahash_t   xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count);
+int    xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
+                                  struct xfs_buf *leaf2_bp);
  int    xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
                                         int *local);
+int    xfs_attr_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
+                       xfs_dablk_t bno, xfs_daddr_t mappedbno,
+                       struct xfs_buf **bpp);
+
+extern const struct xfs_buf_ops xfs_attr_leaf_buf_ops;
+
  #endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/include/xfs_bmap.h b/include/xfs_bmap.h

index 3651191daea10cd99bae79443a2b1b9305940250..de451a273041ce76499a02e1dfd49da4f01dcacd 100644 (file)
--- a/include/xfs_bmap.h
+++ b/include/xfs_bmap.h
@@ -62,36 +62,32 @@ typedef     struct xfs_bmap_free
  #define        XFS_BMAP_MAX_NMAP       4
  
  /*
- * Flags for xfs_bmapi
+ * Flags for xfs_bmapi_*
   */
-#define        XFS_BMAPI_WRITE         0x001   /* write operation: allocate space */
-#define XFS_BMAPI_DELAY                0x002   /* delayed write operation */
-#define XFS_BMAPI_ENTIRE       0x004   /* return entire extent, not trimmed */
-#define XFS_BMAPI_METADATA     0x008   /* mapping metadata not user data */
-#define XFS_BMAPI_ATTRFORK     0x010   /* use attribute fork not data */
-#define XFS_BMAPI_RSVBLOCKS    0x020   /* OK to alloc. reserved data blocks */
-#define        XFS_BMAPI_PREALLOC      0x040   /* preallocation op: unwritten space */
-#define        XFS_BMAPI_IGSTATE       0x080   /* Ignore state - */
+#define XFS_BMAPI_ENTIRE       0x001   /* return entire extent, not trimmed */
+#define XFS_BMAPI_METADATA     0x002   /* mapping metadata not user data */
+#define XFS_BMAPI_ATTRFORK     0x004   /* use attribute fork not data */
+#define XFS_BMAPI_PREALLOC     0x008   /* preallocation op: unwritten space */
+#define XFS_BMAPI_IGSTATE      0x010   /* Ignore state - */
                                         /* combine contig. space */
-#define        XFS_BMAPI_CONTIG        0x100   /* must allocate only one extent */
+#define XFS_BMAPI_CONTIG       0x020   /* must allocate only one extent */
  /*
   * unwritten extent conversion - this needs write cache flushing and no additional
   * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
   * from written to unwritten, otherwise convert from unwritten to written.
   */
-#define XFS_BMAPI_CONVERT      0x200
+#define XFS_BMAPI_CONVERT      0x040
+#define XFS_BMAPI_STACK_SWITCH 0x080
  
  #define XFS_BMAPI_FLAGS \
-       { XFS_BMAPI_WRITE,      "WRITE" }, \
-       { XFS_BMAPI_DELAY,      "DELAY" }, \
         { XFS_BMAPI_ENTIRE,     "ENTIRE" }, \
         { XFS_BMAPI_METADATA,   "METADATA" }, \
         { XFS_BMAPI_ATTRFORK,   "ATTRFORK" }, \
-       { XFS_BMAPI_RSVBLOCKS,  "RSVBLOCKS" }, \
         { XFS_BMAPI_PREALLOC,   "PREALLOC" }, \
         { XFS_BMAPI_IGSTATE,    "IGSTATE" }, \
         { XFS_BMAPI_CONTIG,     "CONTIG" }, \
-       { XFS_BMAPI_CONVERT,    "CONVERT" }
+       { XFS_BMAPI_CONVERT,    "CONVERT" }, \
+       { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
  
  
  static inline int xfs_bmapi_aflag(int w)
@@ -115,23 +111,32 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
   * Argument structure for xfs_bmap_alloc.
   */
  typedef struct xfs_bmalloca {
-       xfs_fsblock_t           firstblock; /* i/o first block allocated */
-       xfs_fsblock_t           rval;   /* starting block of new extent */
-       xfs_fileoff_t           off;    /* offset in file filling in */
+       xfs_fsblock_t           *firstblock; /* i/o first block allocated */
+       struct xfs_bmap_free    *flist; /* bmap freelist */
         struct xfs_trans        *tp;    /* transaction pointer */
         struct xfs_inode        *ip;    /* incore inode pointer */
-       struct xfs_bmbt_irec    *prevp; /* extent before the new one */
-       struct xfs_bmbt_irec    *gotp;  /* extent after, or delayed */
-       xfs_extlen_t            alen;   /* i/o length asked/allocated */
+       struct xfs_bmbt_irec    prev;   /* extent before the new one */
+       struct xfs_bmbt_irec    got;    /* extent after, or delayed */
+
+       xfs_fileoff_t           offset; /* offset in file filling in */
+       xfs_extlen_t            length; /* i/o length asked/allocated */
+       xfs_fsblock_t           blkno;  /* starting block of new extent */
+
+       struct xfs_btree_cur    *cur;   /* btree cursor */
+       xfs_extnum_t            idx;    /* current extent index */
+       int                     nallocs;/* number of extents alloc'd */
+       int                     logflags;/* flags for transaction logging */
+
         xfs_extlen_t            total;  /* total blocks needed for xaction */
         xfs_extlen_t            minlen; /* minimum allocation size (blocks) */
         xfs_extlen_t            minleft; /* amount must be left after alloc */
         char                    eof;    /* set if allocating past last extent */
         char                    wasdel; /* replacing a delayed allocation */
         char                    userdata;/* set if is user data */
-       char                    low;    /* low on space, using seq'l ags */
         char                    aeof;   /* allocated space at eof */
         char                    conv;   /* overwriting unwritten extents */
+       int                     flags;
+       int                     stack_switch;
  } xfs_bmalloca_t;
  
  /*
@@ -154,251 +159,65 @@ typedef struct xfs_bmalloca {
         { BMAP_RIGHT_FILLING,   "RF" }, \
         { BMAP_ATTRFORK,        "ATTR" }
  
-/*
- * Add bmap trace insert entries for all the contents of the extent list.
- *
- * Quite excessive tracing.  Only do this for debug builds.
- */
  #if defined(__KERNEL) && defined(DEBUG)
-void
-xfs_bmap_trace_exlist(
-       struct xfs_inode        *ip,            /* incore inode pointer */
-       xfs_extnum_t            cnt,            /* count of entries in list */
-       int                     whichfork,
-       unsigned long           caller_ip);     /* data or attr fork */
+void   xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
+               int whichfork, unsigned long caller_ip);
  #define        XFS_BMAP_TRACE_EXLIST(ip,c,w)   \
         xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
  #else
  #define        XFS_BMAP_TRACE_EXLIST(ip,c,w)
  #endif
  
-/*
- * Convert inode from non-attributed to attributed.
- * Must not be in a transaction, ip must not be locked.
- */
-int                                    /* error code */
-xfs_bmap_add_attrfork(
-       struct xfs_inode        *ip,    /* incore inode pointer */
-       int                     size,   /* space needed for new attribute */
-       int                     rsvd);  /* flag for reserved block allocation */
-
-/*
- * Add the extent to the list of extents to be free at transaction end.
- * The list is maintained sorted (by block number).
- */
-void
-xfs_bmap_add_free(
-       xfs_fsblock_t           bno,            /* fs block number of extent */
-       xfs_filblks_t           len,            /* length of extent */
-       xfs_bmap_free_t         *flist,         /* list of extents */
-       struct xfs_mount        *mp);           /* mount point structure */
-
-/*
- * Routine to clean up the free list data structure when
- * an error occurs during a transaction.
- */
-void
-xfs_bmap_cancel(
-       xfs_bmap_free_t         *flist);        /* free list to clean up */
-
-/*
- * Compute and fill in the value of the maximum depth of a bmap btree
- * in this filesystem.  Done once, during mount.
- */
-void
-xfs_bmap_compute_maxlevels(
-       struct xfs_mount        *mp,    /* file system mount structure */
-       int                     whichfork);     /* data or attr fork */
-
-/*
- * Returns the file-relative block number of the first unused block in the file.
- * This is the lowest-address hole if the file has holes, else the first block
- * past the end of file.
- */
-int                                            /* error */
-xfs_bmap_first_unused(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       xfs_extlen_t            len,            /* size of hole to find */
-       xfs_fileoff_t           *unused,        /* unused block num */
-       int                     whichfork);     /* data or attr fork */
-
-/*
- * Returns the file-relative block number of the last block + 1 before
- * last_block (input value) in the file.
- * This is not based on i_size, it is based on the extent list.
- * Returns 0 for local files, as they do not have an extent list.
- */
-int                                            /* error */
-xfs_bmap_last_before(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       xfs_fileoff_t           *last_block,    /* last block */
-       int                     whichfork);     /* data or attr fork */
-
-/*
- * Returns the file-relative block number of the first block past eof in
- * the file.  This is not based on i_size, it is based on the extent list.
- * Returns 0 for local files, as they do not have an extent list.
- */
-int                                            /* error */
-xfs_bmap_last_offset(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       xfs_fileoff_t           *unused,        /* last block num */
-       int                     whichfork);     /* data or attr fork */
-
-/*
- * Returns whether the selected fork of the inode has exactly one
- * block or not.  For the data fork we check this matches di_size,
- * implying the file's range is 0..bsize-1.
- */
-int
-xfs_bmap_one_block(
-       struct xfs_inode        *ip,            /* incore inode */
-       int                     whichfork);     /* data or attr fork */
-
-/*
- * Read in the extents to iu_extents.
- * All inode fields are set up by caller, we just traverse the btree
- * and copy the records in.
- */
-int                                            /* error */
-xfs_bmap_read_extents(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       int                     whichfork);     /* data or attr fork */
-
-/*
- * Map file blocks to filesystem blocks.
- * File range is given by the bno/len pair.
- * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
- * into a hole or past eof.
- * Only allocates blocks from a single allocation group,
- * to avoid locking problems.
- * The returned value in "firstblock" from the first call in a transaction
- * must be remembered and presented to subsequent calls in "firstblock".
- * An upper bound for the number of blocks to be allocated is supplied to
- * the first call in "total"; if no allocation group has that many free
- * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
- */
-int                                            /* error */
-xfs_bmapi(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       xfs_fileoff_t           bno,            /* starting file offs. mapped */
-       xfs_filblks_t           len,            /* length to map in file */
-       int                     flags,          /* XFS_BMAPI_... */
-       xfs_fsblock_t           *firstblock,    /* first allocated block
-                                                  controls a.g. for allocs */
-       xfs_extlen_t            total,          /* total blocks needed */
-       struct xfs_bmbt_irec    *mval,          /* output: map values */
-       int                     *nmap,          /* i/o: mval size/count */
-       xfs_bmap_free_t         *flist);        /* i/o: list extents to free */
-
-/*
- * Map file blocks to filesystem blocks, simple version.
- * One block only, read-only.
- * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
- * For the other flag values, the effect is as if XFS_BMAPI_METADATA
- * was set and all the others were clear.
- */
-int                                            /* error */
-xfs_bmapi_single(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       int                     whichfork,      /* data or attr fork */
-       xfs_fsblock_t           *fsb,           /* output: mapped block */
-       xfs_fileoff_t           bno);           /* starting file offs. mapped */
-
-/*
- * Unmap (remove) blocks from a file.
- * If nexts is nonzero then the number of extents to remove is limited to
- * that value.  If not all extents in the block range can be removed then
- * *done is set.
- */
-int                                            /* error */
-xfs_bunmapi(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       xfs_fileoff_t           bno,            /* starting offset to unmap */
-       xfs_filblks_t           len,            /* length to unmap in file */
-       int                     flags,          /* XFS_BMAPI_... */
-       xfs_extnum_t            nexts,          /* number of extents max */
-       xfs_fsblock_t           *firstblock,    /* first allocated block
-                                                  controls a.g. for allocs */
-       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
-       int                     *done);         /* set if not done yet */
-
-/*
- * Check an extent list, which has just been read, for
- * any bit in the extent flag field.
- */
-int
-xfs_check_nostate_extents(
-       struct xfs_ifork        *ifp,
-       xfs_extnum_t            idx,
-       xfs_extnum_t            num);
-
-uint
-xfs_default_attroffset(
-       struct xfs_inode        *ip);
+int    xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
+void   xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
+               struct xfs_bmap_free *flist, struct xfs_mount *mp);
+void   xfs_bmap_cancel(struct xfs_bmap_free *flist);
+void   xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
+int    xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
+               xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
+int    xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
+               xfs_fileoff_t *last_block, int whichfork);
+int    xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip,
+               xfs_fileoff_t *unused, int whichfork);
+int    xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
+int    xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+               int whichfork);
+int    xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
+               xfs_filblks_t len, struct xfs_bmbt_irec *mval,
+               int *nmap, int flags);
+int    xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno,
+               xfs_filblks_t len, struct xfs_bmbt_irec *mval,
+               int *nmap, int flags);
+int    xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
+               xfs_fileoff_t bno, xfs_filblks_t len, int flags,
+               xfs_fsblock_t *firstblock, xfs_extlen_t total,
+               struct xfs_bmbt_irec *mval, int *nmap,
+               struct xfs_bmap_free *flist);
+int    xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
+               xfs_fileoff_t bno, xfs_filblks_t len, int flags,
+               xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
+               struct xfs_bmap_free *flist, int *done);
+int    xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
+               xfs_extnum_t num);
+uint   xfs_default_attroffset(struct xfs_inode *ip);
  
  #ifdef __KERNEL__
-
-/*
- * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
- * caller.  Frees all the extents that need freeing, which must be done
- * last due to locking considerations.
- *
- * Return 1 if the given transaction was committed and a new one allocated,
- * and 0 otherwise.
- */
-int                                            /* error */
-xfs_bmap_finish(
-       struct xfs_trans        **tp,           /* transaction pointer addr */
-       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
-       int                     *committed);    /* xact committed or not */
-
  /* bmap to userspace formatter - copy to user & advance pointer */
  typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
  
-/*
- * Get inode's extents as described in bmv, and format for output.
- */
-int                                            /* error code */
-xfs_getbmap(
-       xfs_inode_t             *ip,
-       struct getbmapx         *bmv,           /* user bmap structure */
-       xfs_bmap_format_t       formatter,      /* format to user */
-       void                    *arg);          /* formatter arg */
+int    xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
+               int *committed);
+int    xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
+               xfs_bmap_format_t formatter, void *arg);
+int    xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
+               int whichfork, int *eof);
+int    xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+               int whichfork, int *count);
+int    xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
+               xfs_fileoff_t start_fsb, xfs_fileoff_t length);
  
-/*
- * Check if the endoff is outside the last extent. If so the caller will grow
- * the allocation to a stripe unit boundary
- */
-int
-xfs_bmap_eof(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           endoff,
-       int                     whichfork,
-       int                     *eof);
-
-/*
- * Count fsblocks of the given fork.
- */
-int
-xfs_bmap_count_blocks(
-       xfs_trans_t             *tp,
-       struct xfs_inode        *ip,
-       int                     whichfork,
-       int                     *count);
+xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
  
-int
-xfs_bmap_punch_delalloc_range(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           start_fsb,
-       xfs_fileoff_t           length);
  #endif /* __KERNEL__ */
  
  #endif /* __XFS_BMAP_H__ */
diff --git a/include/xfs_bmap_btree.h b/include/xfs_bmap_btree.h

index 0e66c4ea0f8581ef06f2248d72953a84b4cbafbf..88469ca086960986d4768f03b43979d517326c75 100644 (file)
--- a/include/xfs_bmap_btree.h
+++ b/include/xfs_bmap_btree.h
@@ -236,5 +236,6 @@ extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
  extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
                 struct xfs_trans *, struct xfs_inode *, int);
  
+extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
  
  #endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/include/xfs_btree.h b/include/xfs_btree.h

index 82fafc66bd1f911cfdc4c6293b6755bf11e3fd53..be1eb23a1fe05f68e8ef40df544039d6f074df3e 100644 (file)
--- a/include/xfs_btree.h
+++ b/include/xfs_btree.h
@@ -188,6 +188,8 @@ struct xfs_btree_ops {
         __int64_t (*key_diff)(struct xfs_btree_cur *cur,
                               union xfs_btree_key *key);
  
+       const struct xfs_buf_ops        *buf_ops;
+
  #ifdef DEBUG
         /* check that k1 is lower than k2 */
         int     (*keys_inorder)(struct xfs_btree_cur *cur,
@@ -281,7 +283,7 @@ typedef struct xfs_btree_cur
  /*
   * Convert from buffer to btree block header.
   */
-#define        XFS_BUF_TO_BLOCK(bp)    ((struct xfs_btree_block *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_BLOCK(bp)    ((struct xfs_btree_block *)((bp)->b_addr))
  
  
  /*
@@ -374,7 +376,8 @@ xfs_btree_read_bufl(
         xfs_fsblock_t           fsbno,  /* file system block number */
         uint                    lock,   /* lock flags for read_buf */
         struct xfs_buf          **bpp,  /* buffer for fsbno */
-       int                     refval);/* ref count value for buffer */
+       int                     refval, /* ref count value for buffer */
+       const struct xfs_buf_ops *ops);
  
  /*
   * Read-ahead the block, don't wait for it, don't return a buffer.
@@ -384,7 +387,8 @@ void                                        /* error */
  xfs_btree_reada_bufl(
         struct xfs_mount        *mp,    /* file system mount point */
         xfs_fsblock_t           fsbno,  /* file system block number */
-       xfs_extlen_t            count); /* count of filesystem blocks */
+       xfs_extlen_t            count,  /* count of filesystem blocks */
+       const struct xfs_buf_ops *ops);
  
  /*
   * Read-ahead the block, don't wait for it, don't return a buffer.
@@ -395,8 +399,20 @@ xfs_btree_reada_bufs(
         struct xfs_mount        *mp,    /* file system mount point */
         xfs_agnumber_t          agno,   /* allocation group number */
         xfs_agblock_t           agbno,  /* allocation group block number */
-       xfs_extlen_t            count); /* count of filesystem blocks */
+       xfs_extlen_t            count,  /* count of filesystem blocks */
+       const struct xfs_buf_ops *ops);
  
+/*
+ * Initialise a new btree block header
+ */
+void
+xfs_btree_init_block(
+       struct xfs_mount *mp,
+       struct xfs_buf  *bp,
+       __u32           magic,
+       __u16           level,
+       __u16           numrecs,
+       unsigned int    flags);
  
  /*
   * Common btree core entry points.
diff --git a/include/xfs_buf_item.h b/include/xfs_buf_item.h

index a5efba911a42315543511da9c9a4325b8bdabad2..ee36c88ecfdec40f999316d00a56f8d03cde8d73 100644 (file)
--- a/include/xfs_buf_item.h
+++ b/include/xfs_buf_item.h
@@ -20,23 +20,6 @@
  
  extern kmem_zone_t     *xfs_buf_item_zone;
  
-/*
- * This is the structure used to lay out a buf log item in the
- * log.  The data map describes which 128 byte chunks of the buffer
- * have been logged.
- * For 6.2 and beyond, this is XFS_LI_BUF.  We use this to log everything.
- */
-typedef struct xfs_buf_log_format {
-       unsigned short  blf_type;       /* buf log item type indicator */
-       unsigned short  blf_size;       /* size of this item */
-       ushort          blf_flags;      /* misc state */
-       ushort          blf_len;        /* number of blocks in this buf */
-       __int64_t       blf_blkno;      /* starting blkno of this buf */
-       unsigned int    blf_map_size;   /* size of data bitmap in words */
-       unsigned int    blf_data_map[1];/* variable size bitmap of */
-                                       /*   regions of buffer in this item */
-} xfs_buf_log_format_t;
-
  /*
   * This flag indicates that the buffer contains on disk inodes
   * and requires special recovery handling.
@@ -60,6 +43,23 @@ typedef struct xfs_buf_log_format {
  #define        BIT_TO_WORD_SHIFT       5
  #define        NBWORD                  (NBBY * sizeof(unsigned int))
  
+/*
+ * This is the structure used to lay out a buf log item in the
+ * log.  The data map describes which 128 byte chunks of the buffer
+ * have been logged.
+ */
+#define XFS_BLF_DATAMAP_SIZE   ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
+
+typedef struct xfs_buf_log_format {
+       unsigned short  blf_type;       /* buf log item type indicator */
+       unsigned short  blf_size;       /* size of this item */
+       ushort          blf_flags;      /* misc state */
+       ushort          blf_len;        /* number of blocks in this buf */
+       __int64_t       blf_blkno;      /* starting blkno of this buf */
+       unsigned int    blf_map_size;   /* used size of data bitmap in words */
+       unsigned int    blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
+} xfs_buf_log_format_t;
+
  /*
   * buf log item flags
   */
@@ -69,7 +69,7 @@ typedef struct xfs_buf_log_format {
  #define        XFS_BLI_LOGGED          0x08
  #define        XFS_BLI_INODE_ALLOC_BUF 0x10
  #define XFS_BLI_STALE_INODE    0x20
-#define XFS_BLI_INODE_BUF      0x40
+#define        XFS_BLI_INODE_BUF       0x40
  
  #define XFS_BLI_FLAGS \
         { XFS_BLI_HOLD,         "HOLD" }, \
@@ -80,6 +80,7 @@ typedef struct xfs_buf_log_format {
         { XFS_BLI_STALE_INODE,  "STALE_INODE" }, \
         { XFS_BLI_INODE_BUF,    "INODE_BUF" }
  
+
  #ifdef __KERNEL__
  
  struct xfs_buf;
@@ -97,11 +98,9 @@ typedef struct xfs_buf_log_item {
         unsigned int            bli_flags;      /* misc flags */
         unsigned int            bli_recur;      /* lock recursion count */
         atomic_t                bli_refcount;   /* cnt of tp refs */
-#ifdef XFS_TRANS_DEBUG
-       char                    *bli_orig;      /* original buffer copy */
-       char                    *bli_logged;    /* bytes logged (bitmap) */
-#endif
-       xfs_buf_log_format_t    bli_format;     /* in-log header */
+       int                     bli_format_count;       /* count of headers */
+       struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
+       struct xfs_buf_log_format __bli_format; /* embedded in-log header */
  } xfs_buf_log_item_t;
  
  void   xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
@@ -114,16 +113,6 @@ void       xfs_buf_attach_iodone(struct xfs_buf *,
  void   xfs_buf_iodone_callbacks(struct xfs_buf *);
  void   xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
  
-#ifdef XFS_TRANS_DEBUG
-void
-xfs_buf_item_flush_log_debug(
-       struct xfs_buf *bp,
-       uint    first,
-       uint    last);
-#else
-#define        xfs_buf_item_flush_log_debug(bp, first, last)
-#endif
-
  #endif /* __KERNEL__ */
  
  #endif /* __XFS_BUF_ITEM_H__ */
diff --git a/include/xfs_da_btree.h b/include/xfs_da_btree.h

index fe9f5a8c1d2a56794e083465ff54280cb49450d5..ee5170c46ae1abb5d348b493f24843389b4ecbe1 100644 (file)
--- a/include/xfs_da_btree.h
+++ b/include/xfs_da_btree.h
@@ -18,7 +18,6 @@
  #ifndef __XFS_DA_BTREE_H__
  #define        __XFS_DA_BTREE_H__
  
-struct xfs_buf;
  struct xfs_bmap_free;
  struct xfs_inode;
  struct xfs_mount;
@@ -32,7 +31,7 @@ struct zone;
  /*
   * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
   *
- * Is is used to manage a doubly linked list of all blocks at the same
+ * It is used to manage a doubly linked list of all blocks at the same
   * level in the Btree, and to identify which type of block this is.
   */
  #define XFS_DA_NODE_MAGIC      0xfebe  /* magic number: non-leaf blocks */
@@ -132,35 +131,6 @@ typedef struct xfs_da_args {
         { XFS_DA_OP_OKNOENT,    "OKNOENT" }, \
         { XFS_DA_OP_CILOOKUP,   "CILOOKUP" }
  
-/*
- * Structure to describe buffer(s) for a block.
- * This is needed in the directory version 2 format case, when
- * multiple non-contiguous fsblocks might be needed to cover one
- * logical directory block.
- * If the buffer count is 1 then the data pointer points to the
- * same place as the b_addr field for the buffer, else to kmem_alloced memory.
- */
-typedef struct xfs_dabuf {
-       int             nbuf;           /* number of buffer pointers present */
-       short           dirty;          /* data needs to be copied back */
-       short           bbcount;        /* how large is data in bbs */
-       void            *data;          /* pointer for buffers' data */
-#ifdef XFS_DABUF_DEBUG
-       inst_t          *ra;            /* return address of caller to make */
-       struct xfs_dabuf *next;         /* next in global chain */
-       struct xfs_dabuf *prev;         /* previous in global chain */
-       struct xfs_buftarg *target;     /* device for buffer */
-       xfs_daddr_t     blkno;          /* daddr first in bps[0] */
-#endif
-       struct xfs_buf  *bps[1];        /* actually nbuf of these */
-} xfs_dabuf_t;
-#define        XFS_DA_BUF_SIZE(n)      \
-       (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
-
-#ifdef XFS_DABUF_DEBUG
-extern xfs_dabuf_t     *xfs_dabuf_global_list;
-#endif
-
  /*
   * Storage for holding state during Btree searches and split/join ops.
   *
@@ -169,7 +139,7 @@ extern xfs_dabuf_t  *xfs_dabuf_global_list;
   * which is slightly more than enough.
   */
  typedef struct xfs_da_state_blk {
-       xfs_dabuf_t     *bp;            /* buffer containing block */
+       struct xfs_buf  *bp;            /* buffer containing block */
         xfs_dablk_t     blkno;          /* filesystem blkno of buffer */
         xfs_daddr_t     disk_blkno;     /* on-disk blkno (in BBs) of buffer */
         int             index;          /* relevant index into block */
@@ -222,7 +192,7 @@ struct xfs_nameops {
   * Routines used for growing the Btree.
   */
  int    xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
-                                        xfs_dabuf_t **bpp, int whichfork);
+                                        struct xfs_buf **bpp, int whichfork);
  int    xfs_da_split(xfs_da_state_t *state);
  
  /*
@@ -243,21 +213,28 @@ int       xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
   */
  int    xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
                                        xfs_da_state_blk_t *new_blk);
+int    xfs_da_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
+                        xfs_dablk_t bno, xfs_daddr_t mappedbno,
+                        struct xfs_buf **bpp, int which_fork);
  
  /*
   * Utility routines.
   */
  int    xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
+int    xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
+                             int count);
  int    xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
                               xfs_dablk_t bno, xfs_daddr_t mappedbno,
-                             xfs_dabuf_t **bp, int whichfork);
+                             struct xfs_buf **bp, int whichfork);
  int    xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
                                xfs_dablk_t bno, xfs_daddr_t mappedbno,
-                              xfs_dabuf_t **bpp, int whichfork);
+                              struct xfs_buf **bpp, int whichfork,
+                              const struct xfs_buf_ops *ops);
  xfs_daddr_t    xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
-                       xfs_dablk_t bno, int whichfork);
+                               xfs_dablk_t bno, xfs_daddr_t mapped_bno,
+                               int whichfork, const struct xfs_buf_ops *ops);
  int    xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
-                                         xfs_dabuf_t *dead_buf);
+                                         struct xfs_buf *dead_buf);
  
  uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
  enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
@@ -267,15 +244,7 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
  xfs_da_state_t *xfs_da_state_alloc(void);
  void xfs_da_state_free(xfs_da_state_t *state);
  
-void xfs_da_buf_done(xfs_dabuf_t *dabuf);
-void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first,
-                          uint last);
-void xfs_da_brelse(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
-void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
-xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
-
  extern struct kmem_zone *xfs_da_state_zone;
-extern struct kmem_zone *xfs_dabuf_zone;
  extern const struct xfs_nameops xfs_default_nameops;
  
  #endif /* __XFS_DA_BTREE_H__ */
diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h

index dffba9ba0db634c29b1bc012b4f54923cda76c55..1d9643b3dce656123cb591379bb07ee235143f11 100644 (file)
--- a/include/xfs_dinode.h
+++ b/include/xfs_dinode.h
@@ -33,7 +33,7 @@ typedef struct xfs_timestamp {
   * variable size the leftover area split into a data and an attribute fork.
   * The format of the data and attribute fork depends on the format of the
   * inode as indicated by di_format and di_aformat.  To access the data and
- * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros
+ * attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros
   * below.
   *
   * There is a very similar struct icdinode in xfs_inode which matches the
@@ -148,7 +148,7 @@ typedef enum xfs_dinode_fmt {
                 be32_to_cpu((dip)->di_nextents) : \
                 be16_to_cpu((dip)->di_anextents))
  
-#define        XFS_BUF_TO_DINODE(bp)   ((xfs_dinode_t *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_DINODE(bp)   ((xfs_dinode_t *)((bp)->b_addr))
  
  /*
   * For block and character special files the 32bit dev_t is stored at the
diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h

index 74a3b1057685c8d93db3494669d762ccc4da8a43..8ab59b5c8dc614d9f4836453f0bef585f4052fba 100644 (file)
--- a/include/xfs_dir2.h
+++ b/include/xfs_dir2.h
@@ -16,37 +16,20 @@
   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   */
  #ifndef __XFS_DIR2_H__
-#define        __XFS_DIR2_H__
+#define __XFS_DIR2_H__
  
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_put_args;
  struct xfs_bmap_free;
+struct xfs_da_args;
  struct xfs_inode;
  struct xfs_mount;
  struct xfs_trans;
+struct xfs_dir2_sf_hdr;
+struct xfs_dir2_sf_entry;
+struct xfs_dir2_data_hdr;
+struct xfs_dir2_data_entry;
+struct xfs_dir2_data_unused;
+struct xfs_dir2_data_free;
  
-/*
- * Directory version 2.
- * There are 4 possible formats:
- *     shortform
- *     single block - data with embedded leaf at the end
- *     multiple data blocks, single leaf+freeindex block
- *     data blocks, node&leaf blocks (btree), freeindex blocks
- *
- *     The shortform format is in xfs_dir2_sf.h.
- *     The single block format is in xfs_dir2_block.h.
- *     The data block format is in xfs_dir2_data.h.
- *     The leaf and freeindex block formats are in xfs_dir2_leaf.h.
- *     Node blocks are the same as the other version, in xfs_da_btree.h.
- */
-
-/*
- * Byte offset in data block and shortform entry.
- */
-typedef        __uint16_t      xfs_dir2_data_off_t;
-#define        NULLDATAOFF     0xffffU
  typedef uint           xfs_dir2_data_aoff_t;   /* argument form */
  
  /*
@@ -54,11 +37,6 @@ typedef uint         xfs_dir2_data_aoff_t;   /* argument form */
   */
  typedef        __uint32_t      xfs_dir2_db_t;
  
-/*
- * Byte offset in a directory.
- */
-typedef        xfs_off_t       xfs_dir2_off_t;
-
  extern struct xfs_name xfs_name_dotdot;
  
  /*
@@ -86,21 +64,44 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
                                 struct xfs_bmap_free *flist, xfs_extlen_t tot);
  extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
                                 struct xfs_name *name, uint resblks);
-extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
  
  /*
- * Utility routines for v2 directories.
+ * Direct call from the bmap code, bypassing the generic directory layer.
+ */
+extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
+
+/*
+ * used by db
   */
-extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
-                               xfs_dir2_db_t *dbp);
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp,
-                               int *vp);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
-                               int *vp);
+extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
+extern void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *sfp,
+               xfs_ino_t ino);
+extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
+               struct xfs_dir2_sf_entry *sfep);
+extern void xfs_dir2_sfe_put_ino( struct xfs_dir2_sf_hdr *,
+               struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino);
+
+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
  extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
-                               struct xfs_dabuf *bp);
+                               struct xfs_buf *bp);
+
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
+               struct xfs_dir2_data_hdr *hdr, int *loghead);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
+               struct xfs_dir2_data_entry *dep);
+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
+               struct xfs_buf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
+               struct xfs_dir2_data_unused *dup);
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
+               xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
+               int *needlogp, int *needscanp);
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
+               struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
+               xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
  
-extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
-                               const unsigned char *name, int len);
+extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
+               struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup);
  
  #endif /* __XFS_DIR2_H__ */
diff --git a/include/xfs_dir2_block.h b/include/xfs_dir2_block.h

deleted file mode 100644 (file)

index 10e6896..0000000
--- a/include/xfs_dir2_block.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_BLOCK_H__
-#define        __XFS_DIR2_BLOCK_H__
-
-/*
- * xfs_dir2_block.h
- * Directory version 2, single block format structures
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_data_hdr;
-struct xfs_dir2_leaf_entry;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * The single block format is as follows:
- * xfs_dir2_data_hdr_t structure
- * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures
- * xfs_dir2_leaf_entry_t structures
- * xfs_dir2_block_tail_t structure
- */
-
-#define        XFS_DIR2_BLOCK_MAGIC    0x58443242      /* XD2B: for one block dirs */
-
-typedef struct xfs_dir2_block_tail {
-       __be32          count;                  /* count of leaf entries */
-       __be32          stale;                  /* count of stale lf entries */
-} xfs_dir2_block_tail_t;
-
-/*
- * Generic single-block structure, for xfs_db.
- */
-typedef struct xfs_dir2_block {
-       xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_BLOCK_MAGIC */
-       xfs_dir2_data_union_t   u[1];
-       xfs_dir2_leaf_entry_t   leaf[1];
-       xfs_dir2_block_tail_t   tail;
-} xfs_dir2_block_t;
-
-/*
- * Pointer to the leaf header embedded in a data block (1-block format)
- */
-static inline xfs_dir2_block_tail_t *
-xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
-{
-       return (((xfs_dir2_block_tail_t *)
-               ((char *)(block) + (mp)->m_dirblksize)) - 1);
-}
-
-/*
- * Pointer to the leaf entries embedded in a data block (1-block format)
- */
-static inline struct xfs_dir2_leaf_entry *
-xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
-{
-       return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
-}
-
-/*
- * Function declarations.
- */
-extern int xfs_dir2_block_addname(struct xfs_da_args *args);
-extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
-                                  xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_block_removename(struct xfs_da_args *args);
-extern int xfs_dir2_block_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
-                                 struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
-extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
-
-#endif /* __XFS_DIR2_BLOCK_H__ */
diff --git a/include/xfs_dir2_data.h b/include/xfs_dir2_data.h

deleted file mode 100644 (file)

index efbc290..0000000
--- a/include/xfs_dir2_data.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_DATA_H__
-#define        __XFS_DIR2_DATA_H__
-
-/*
- * Directory format 2, data block structures.
- */
-
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_inode;
-struct xfs_trans;
-
-/*
- * Constants.
- */
-#define        XFS_DIR2_DATA_MAGIC     0x58443244      /* XD2D: for multiblock dirs */
-#define        XFS_DIR2_DATA_ALIGN_LOG 3               /* i.e., 8 bytes */
-#define        XFS_DIR2_DATA_ALIGN     (1 << XFS_DIR2_DATA_ALIGN_LOG)
-#define        XFS_DIR2_DATA_FREE_TAG  0xffff
-#define        XFS_DIR2_DATA_FD_COUNT  3
-
-/*
- * Directory address space divided into sections,
- * spaces separated by 32GB.
- */
-#define        XFS_DIR2_SPACE_SIZE     (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
-#define        XFS_DIR2_DATA_SPACE     0
-#define        XFS_DIR2_DATA_OFFSET    (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
-#define        XFS_DIR2_DATA_FIRSTDB(mp)       \
-       xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
-
-/*
- * Offsets of . and .. in data space (always block 0)
- */
-#define        XFS_DIR2_DATA_DOT_OFFSET        \
-       ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
-#define        XFS_DIR2_DATA_DOTDOT_OFFSET     \
-       (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
-#define        XFS_DIR2_DATA_FIRST_OFFSET              \
-       (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
-
-/*
- * Structures.
- */
-
-/*
- * Describe a free area in the data block.
- * The freespace will be formatted as a xfs_dir2_data_unused_t.
- */
-typedef struct xfs_dir2_data_free {
-       __be16                  offset;         /* start of freespace */
-       __be16                  length;         /* length of freespace */
-} xfs_dir2_data_free_t;
-
-/*
- * Header for the data blocks.
- * Always at the beginning of a directory-sized block.
- * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
- */
-typedef struct xfs_dir2_data_hdr {
-       __be32                  magic;          /* XFS_DIR2_DATA_MAGIC */
-                                               /* or XFS_DIR2_BLOCK_MAGIC */
-       xfs_dir2_data_free_t    bestfree[XFS_DIR2_DATA_FD_COUNT];
-} xfs_dir2_data_hdr_t;
-
-/*
- * Active entry in a data block.  Aligned to 8 bytes.
- * Tag appears as the last 2 bytes.
- */
-typedef struct xfs_dir2_data_entry {
-       __be64                  inumber;        /* inode number */
-       __u8                    namelen;        /* name length */
-       __u8                    name[1];        /* name bytes, no null */
-                                               /* variable offset */
-       __be16                  tag;            /* starting offset of us */
-} xfs_dir2_data_entry_t;
-
-/*
- * Unused entry in a data block.  Aligned to 8 bytes.
- * Tag appears as the last 2 bytes.
- */
-typedef struct xfs_dir2_data_unused {
-       __be16                  freetag;        /* XFS_DIR2_DATA_FREE_TAG */
-       __be16                  length;         /* total free length */
-                                               /* variable offset */
-       __be16                  tag;            /* starting offset of us */
-} xfs_dir2_data_unused_t;
-
-typedef union {
-       xfs_dir2_data_entry_t   entry;
-       xfs_dir2_data_unused_t  unused;
-} xfs_dir2_data_union_t;
-
-/*
- * Generic data block structure, for xfs_db.
- */
-typedef struct xfs_dir2_data {
-       xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_DATA_MAGIC */
-       xfs_dir2_data_union_t   u[1];
-} xfs_dir2_data_t;
-
-/*
- * Macros.
- */
-
-/*
- * Size of a data entry.
- */
-static inline int xfs_dir2_data_entsize(int n)
-{
-       return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
-                (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
-}
-
-/*
- * Pointer to an entry's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
-{
-       return (__be16 *)((char *)dep +
-               xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-/*
- * Pointer to a freespace's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
-{
-       return (__be16 *)((char *)dup +
-                       be16_to_cpu(dup->length) - sizeof(__be16));
-}
-
-/*
- * Function declarations.
- */
-#ifdef DEBUG
-extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
-#else
-#define        xfs_dir2_data_check(dp,bp)
-#endif
-extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d,
-                               xfs_dir2_data_unused_t *dup);
-extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
-                               xfs_dir2_data_unused_t *dup, int *loghead);
-extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
-                               int *loghead);
-extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
-                               struct xfs_dabuf **bpp);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                               xfs_dir2_data_entry_t *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
-                               struct xfs_dabuf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                               xfs_dir2_data_unused_t *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                               xfs_dir2_data_aoff_t offset,
-                               xfs_dir2_data_aoff_t len, int *needlogp,
-                               int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                              xfs_dir2_data_unused_t *dup,
-                              xfs_dir2_data_aoff_t offset,
-                              xfs_dir2_data_aoff_t len, int *needlogp,
-                              int *needscanp);
-
-#endif /* __XFS_DIR2_DATA_H__ */
diff --git a/include/xfs_dir2_format.h b/include/xfs_dir2_format.h

new file mode 100644 (file)

index 0000000..f5c264a
--- /dev/null
+++ b/include/xfs_dir2_format.h
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_FORMAT_H__
+#define __XFS_DIR2_FORMAT_H__
+
+/*
+ * Directory version 2.
+ *
+ * There are 4 possible formats:
+ *  - shortform - embedded into the inode
+ *  - single block - data with embedded leaf at the end
+ *  - multiple data blocks, single leaf+freeindex block
+ *  - data blocks, node and leaf blocks (btree), freeindex blocks
+ *
+ * Note: many node blocks structures and constants are shared with the attr
+ * code and defined in xfs_da_btree.h.
+ */
+
+#define        XFS_DIR2_BLOCK_MAGIC    0x58443242      /* XD2B: single block dirs */
+#define        XFS_DIR2_DATA_MAGIC     0x58443244      /* XD2D: multiblock dirs */
+#define        XFS_DIR2_FREE_MAGIC     0x58443246      /* XD2F: free index blocks */
+
+/*
+ * Byte offset in data block and shortform entry.
+ */
+typedef        __uint16_t      xfs_dir2_data_off_t;
+#define        NULLDATAOFF     0xffffU
+
+/*
+ * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
+ * Only need 16 bits, this is the byte offset into the single block form.
+ */
+typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
+
+/*
+ * Offset in data space of a data entry.
+ */
+typedef        __uint32_t      xfs_dir2_dataptr_t;
+#define        XFS_DIR2_MAX_DATAPTR    ((xfs_dir2_dataptr_t)0xffffffff)
+#define        XFS_DIR2_NULL_DATAPTR   ((xfs_dir2_dataptr_t)0)
+
+/*
+ * Byte offset in a directory.
+ */
+typedef        xfs_off_t       xfs_dir2_off_t;
+
+/*
+ * Inode number stored as 8 8-bit values.
+ */
+typedef        struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
+
+/*
+ * Inode number stored as 4 8-bit values.
+ * Works a lot of the time, when all the inode numbers in a directory
+ * fit in 32 bits.
+ */
+typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
+
+typedef union {
+       xfs_dir2_ino8_t i8;
+       xfs_dir2_ino4_t i4;
+} xfs_dir2_inou_t;
+#define        XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
+
+/*
+ * Directory layout when stored internal to an inode.
+ *
+ * Small directories are packed as tightly as possible so as to fit into the
+ * literal area of the inode.  These "shortform" directories consist of a
+ * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
+ * structures.  Due the different inode number storage size and the variable
+ * length name field in the xfs_dir2_sf_entry all these structure are
+ * variable length, and the accessors in this file should be used to iterate
+ * over them.
+ */
+typedef struct xfs_dir2_sf_hdr {
+       __uint8_t               count;          /* count of entries */
+       __uint8_t               i8count;        /* count of 8-byte inode #s */
+       xfs_dir2_inou_t         parent;         /* parent dir inode number */
+} __arch_pack xfs_dir2_sf_hdr_t;
+
+typedef struct xfs_dir2_sf_entry {
+       __u8                    namelen;        /* actual name length */
+       xfs_dir2_sf_off_t       offset;         /* saved offset */
+       __u8                    name[];         /* name, variable size */
+       /*
+        * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
+        * variable offset after the name.
+        */
+} __arch_pack xfs_dir2_sf_entry_t;
+
+static inline int xfs_dir2_sf_hdr_size(int i8count)
+{
+       return sizeof(struct xfs_dir2_sf_hdr) -
+               (i8count == 0) *
+               (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t));
+}
+
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
+{
+       return get_unaligned_be16(&sfep->offset.i);
+}
+
+static inline void
+xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
+{
+       put_unaligned_be16(off, &sfep->offset.i);
+}
+
+static inline int
+xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len)
+{
+       return sizeof(struct xfs_dir2_sf_entry) +       /* namelen + offset */
+               len +                                   /* name */
+               (hdr->i8count ?                         /* ino */
+                sizeof(xfs_dir2_ino8_t) :
+                sizeof(xfs_dir2_ino4_t));
+}
+
+static inline struct xfs_dir2_sf_entry *
+xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
+{
+       return (struct xfs_dir2_sf_entry *)
+               ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
+}
+
+static inline struct xfs_dir2_sf_entry *
+xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
+               struct xfs_dir2_sf_entry *sfep)
+{
+       return (struct xfs_dir2_sf_entry *)
+               ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
+}
+
+
+/*
+ * Data block structures.
+ *
+ * A pure data block looks like the following drawing on disk:
+ *
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_data_hdr_t                             |
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | ...                                             |
+ *    +-------------------------------------------------+
+ *    | unused space                                    |
+ *    +-------------------------------------------------+
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ *
+ * In addition to the pure data blocks for the data and node formats,
+ * most structures are also used for the combined data/freespace "block"
+ * format below.
+ */
+
+#define        XFS_DIR2_DATA_ALIGN_LOG 3               /* i.e., 8 bytes */
+#define        XFS_DIR2_DATA_ALIGN     (1 << XFS_DIR2_DATA_ALIGN_LOG)
+#define        XFS_DIR2_DATA_FREE_TAG  0xffff
+#define        XFS_DIR2_DATA_FD_COUNT  3
+
+/*
+ * Directory address space divided into sections,
+ * spaces separated by 32GB.
+ */
+#define        XFS_DIR2_SPACE_SIZE     (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
+#define        XFS_DIR2_DATA_SPACE     0
+#define        XFS_DIR2_DATA_OFFSET    (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
+#define        XFS_DIR2_DATA_FIRSTDB(mp)       \
+       xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
+
+/*
+ * Offsets of . and .. in data space (always block 0)
+ */
+#define        XFS_DIR2_DATA_DOT_OFFSET        \
+       ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr))
+#define        XFS_DIR2_DATA_DOTDOT_OFFSET     \
+       (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
+#define        XFS_DIR2_DATA_FIRST_OFFSET              \
+       (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
+
+/*
+ * Describe a free area in the data block.
+ *
+ * The freespace will be formatted as a xfs_dir2_data_unused_t.
+ */
+typedef struct xfs_dir2_data_free {
+       __be16                  offset;         /* start of freespace */
+       __be16                  length;         /* length of freespace */
+} xfs_dir2_data_free_t;
+
+/*
+ * Header for the data blocks.
+ *
+ * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
+ */
+typedef struct xfs_dir2_data_hdr {
+       __be32                  magic;          /* XFS_DIR2_DATA_MAGIC or */
+                                               /* XFS_DIR2_BLOCK_MAGIC */
+       xfs_dir2_data_free_t    bestfree[XFS_DIR2_DATA_FD_COUNT];
+} xfs_dir2_data_hdr_t;
+
+/*
+ * Active entry in a data block.
+ *
+ * Aligned to 8 bytes.  After the variable length name field there is a
+ * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p.
+ */
+typedef struct xfs_dir2_data_entry {
+       __be64                  inumber;        /* inode number */
+       __u8                    namelen;        /* name length */
+       __u8                    name[];         /* name bytes, no null */
+     /*        __be16                  tag; */         /* starting offset of us */
+} xfs_dir2_data_entry_t;
+
+/*
+ * Unused entry in a data block.
+ *
+ * Aligned to 8 bytes.  Tag appears as the last 2 bytes and must be accessed
+ * using xfs_dir2_data_unused_tag_p.
+ */
+typedef struct xfs_dir2_data_unused {
+       __be16                  freetag;        /* XFS_DIR2_DATA_FREE_TAG */
+       __be16                  length;         /* total free length */
+                                               /* variable offset */
+       __be16                  tag;            /* starting offset of us */
+} xfs_dir2_data_unused_t;
+
+/*
+ * Size of a data entry.
+ */
+static inline int xfs_dir2_data_entsize(int n)
+{
+       return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n +
+                (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
+}
+
+/*
+ * Pointer to an entry's tag word.
+ */
+static inline __be16 *
+xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep)
+{
+       return (__be16 *)((char *)dep +
+               xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
+}
+
+/*
+ * Pointer to a freespace's tag word.
+ */
+static inline __be16 *
+xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
+{
+       return (__be16 *)((char *)dup +
+                       be16_to_cpu(dup->length) - sizeof(__be16));
+}
+
+/*
+ * Leaf block structures.
+ *
+ * A pure leaf block looks like the following drawing on disk:
+ *
+ *    +---------------------------+
+ *    | xfs_dir2_leaf_hdr_t       |
+ *    +---------------------------+
+ *    | xfs_dir2_leaf_entry_t     |
+ *    | xfs_dir2_leaf_entry_t     |
+ *    | xfs_dir2_leaf_entry_t     |
+ *    | xfs_dir2_leaf_entry_t     |
+ *    | ...                       |
+ *    +---------------------------+
+ *    | xfs_dir2_data_off_t       |
+ *    | xfs_dir2_data_off_t       |
+ *    | xfs_dir2_data_off_t       |
+ *    | ...                       |
+ *    +---------------------------+
+ *    | xfs_dir2_leaf_tail_t      |
+ *    +---------------------------+
+ *
+ * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
+ * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
+ * for directories with separate leaf nodes and free space blocks
+ * (magic = XFS_DIR2_LEAFN_MAGIC).
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ */
+
+/*
+ * Offset of the leaf/node space.  First block in this space
+ * is the btree root.
+ */
+#define        XFS_DIR2_LEAF_SPACE     1
+#define        XFS_DIR2_LEAF_OFFSET    (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
+#define        XFS_DIR2_LEAF_FIRSTDB(mp)       \
+       xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
+
+/*
+ * Leaf block header.
+ */
+typedef struct xfs_dir2_leaf_hdr {
+       xfs_da_blkinfo_t        info;           /* header for da routines */
+       __be16                  count;          /* count of entries */
+       __be16                  stale;          /* count of stale entries */
+} xfs_dir2_leaf_hdr_t;
+
+/*
+ * Leaf block entry.
+ */
+typedef struct xfs_dir2_leaf_entry {
+       __be32                  hashval;        /* hash value of name */
+       __be32                  address;        /* address of data entry */
+} xfs_dir2_leaf_entry_t;
+
+/*
+ * Leaf block tail.
+ */
+typedef struct xfs_dir2_leaf_tail {
+       __be32                  bestcount;
+} xfs_dir2_leaf_tail_t;
+
+/*
+ * Leaf block.
+ */
+typedef struct xfs_dir2_leaf {
+       xfs_dir2_leaf_hdr_t     hdr;            /* leaf header */
+       xfs_dir2_leaf_entry_t   ents[];         /* entries */
+} xfs_dir2_leaf_t;
+
+/*
+ * DB blocks here are logical directory block numbers, not filesystem blocks.
+ */
+
+static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
+{
+       return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) /
+               (uint)sizeof(struct xfs_dir2_leaf_entry);
+}
+
+/*
+ * Get address of the bestcount field in the single-leaf block.
+ */
+static inline struct xfs_dir2_leaf_tail *
+xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
+{
+       return (struct xfs_dir2_leaf_tail *)
+               ((char *)lp + mp->m_dirblksize -
+                 sizeof(struct xfs_dir2_leaf_tail));
+}
+
+/*
+ * Get address of the bests array in the single-leaf block.
+ */
+static inline __be16 *
+xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
+{
+       return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
+}
+
+/*
+ * Convert dataptr to byte in file space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+       return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
+}
+
+/*
+ * Convert byte in file space to dataptr.  It had better be aligned.
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+       return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
+}
+
+/*
+ * Convert byte in space to (DB) block
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+       return (xfs_dir2_db_t)
+               (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog));
+}
+
+/*
+ * Convert dataptr to a block number
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+       return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+}
+
+/*
+ * Convert byte in space to offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+       return (xfs_dir2_data_aoff_t)(by &
+               ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1));
+}
+
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+       return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+}
+
+/*
+ * Convert block and offset to byte in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
+                       xfs_dir2_data_aoff_t o)
+{
+       return ((xfs_dir2_off_t)db <<
+               (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o;
+}
+
+/*
+ * Convert block (DB) to block (dablk)
+ */
+static inline xfs_dablk_t
+xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+       return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog);
+}
+
+/*
+ * Convert byte in space to (DA) block
+ */
+static inline xfs_dablk_t
+xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+       return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
+}
+
+/*
+ * Convert block and offset to dataptr
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
+                          xfs_dir2_data_aoff_t o)
+{
+       return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
+}
+
+/*
+ * Convert block (dablk) to block (DB)
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
+{
+       return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog);
+}
+
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
+{
+       return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
+}
+
+/*
+ * Free space block defintions for the node format.
+ */
+
+/*
+ * Offset of the freespace index.
+ */
+#define        XFS_DIR2_FREE_SPACE     2
+#define        XFS_DIR2_FREE_OFFSET    (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
+#define        XFS_DIR2_FREE_FIRSTDB(mp)       \
+       xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
+
+typedef        struct xfs_dir2_free_hdr {
+       __be32                  magic;          /* XFS_DIR2_FREE_MAGIC */
+       __be32                  firstdb;        /* db of first entry */
+       __be32                  nvalid;         /* count of valid entries */
+       __be32                  nused;          /* count of used entries */
+} xfs_dir2_free_hdr_t;
+
+typedef struct xfs_dir2_free {
+       xfs_dir2_free_hdr_t     hdr;            /* block header */
+       __be16                  bests[];        /* best free counts */
+                                               /* unused entries are -1 */
+} xfs_dir2_free_t;
+
+static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp)
+{
+       return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
+               sizeof(xfs_dir2_data_off_t);
+}
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+       return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
+}
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+static inline int
+xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+       return db % xfs_dir2_free_max_bests(mp);
+}
+
+/*
+ * Single block format.
+ *
+ * The single block format looks like the following drawing on disk:
+ *
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_data_hdr_t                             |
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
+ *    | ...                                             |
+ *    +-------------------------------------------------+
+ *    | unused space                                    |
+ *    +-------------------------------------------------+
+ *    | ...                                             |
+ *    | xfs_dir2_leaf_entry_t                           |
+ *    | xfs_dir2_leaf_entry_t                           |
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_block_tail_t                           |
+ *    +-------------------------------------------------+
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ */
+
+typedef struct xfs_dir2_block_tail {
+       __be32          count;                  /* count of leaf entries */
+       __be32          stale;                  /* count of stale lf entries */
+} xfs_dir2_block_tail_t;
+
+/*
+ * Pointer to the leaf header embedded in a data block (1-block format)
+ */
+static inline struct xfs_dir2_block_tail *
+xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr)
+{
+       return ((struct xfs_dir2_block_tail *)
+               ((char *)hdr + mp->m_dirblksize)) - 1;
+}
+
+/*
+ * Pointer to the leaf entries embedded in a data block (1-block format)
+ */
+static inline struct xfs_dir2_leaf_entry *
+xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
+{
+       return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
+}
+
+#endif /* __XFS_DIR2_FORMAT_H__ */
diff --git a/include/xfs_dir2_leaf.h b/include/xfs_dir2_leaf.h

deleted file mode 100644 (file)

index 6c9539f..0000000
--- a/include/xfs_dir2_leaf.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_LEAF_H__
-#define        __XFS_DIR2_LEAF_H__
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * Offset of the leaf/node space.  First block in this space
- * is the btree root.
- */
-#define        XFS_DIR2_LEAF_SPACE     1
-#define        XFS_DIR2_LEAF_OFFSET    (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
-#define        XFS_DIR2_LEAF_FIRSTDB(mp)       \
-       xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
-
-/*
- * Offset in data space of a data entry.
- */
-typedef        __uint32_t      xfs_dir2_dataptr_t;
-#define        XFS_DIR2_MAX_DATAPTR    ((xfs_dir2_dataptr_t)0xffffffff)
-#define        XFS_DIR2_NULL_DATAPTR   ((xfs_dir2_dataptr_t)0)
-
-/*
- * Leaf block header.
- */
-typedef struct xfs_dir2_leaf_hdr {
-       xfs_da_blkinfo_t        info;           /* header for da routines */
-       __be16                  count;          /* count of entries */
-       __be16                  stale;          /* count of stale entries */
-} xfs_dir2_leaf_hdr_t;
-
-/*
- * Leaf block entry.
- */
-typedef struct xfs_dir2_leaf_entry {
-       __be32                  hashval;        /* hash value of name */
-       __be32                  address;        /* address of data entry */
-} xfs_dir2_leaf_entry_t;
-
-/*
- * Leaf block tail.
- */
-typedef struct xfs_dir2_leaf_tail {
-       __be32                  bestcount;
-} xfs_dir2_leaf_tail_t;
-
-/*
- * Leaf block.
- * bests and tail are at the end of the block for single-leaf only
- * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC).
- */
-typedef struct xfs_dir2_leaf {
-       xfs_dir2_leaf_hdr_t     hdr;            /* leaf header */
-       xfs_dir2_leaf_entry_t   ents[1];        /* entries */
-                                               /* ... */
-       xfs_dir2_data_off_t     bests[1];       /* best free counts */
-       xfs_dir2_leaf_tail_t    tail;           /* leaf tail */
-} xfs_dir2_leaf_t;
-
-/*
- * DB blocks here are logical directory block numbers, not filesystem blocks.
- */
-
-static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
-{
-       return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
-              (uint)sizeof(xfs_dir2_leaf_entry_t));
-}
-
-/*
- * Get address of the bestcount field in the single-leaf block.
- */
-static inline xfs_dir2_leaf_tail_t *
-xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
-{
-       return (xfs_dir2_leaf_tail_t *)
-               ((char *)(lp) + (mp)->m_dirblksize - 
-                 (uint)sizeof(xfs_dir2_leaf_tail_t));
-}
-
-/*
- * Get address of the bests array in the single-leaf block.
- */
-static inline __be16 *
-xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
-{
-       return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
-}
-
-/*
- * Convert dataptr to byte in file space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-       return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG;
-}
-
-/*
- * Convert byte in file space to dataptr.  It had better be aligned.
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-       return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG);
-}
-
-/*
- * Convert byte in space to (DB) block
- */
-static inline xfs_dir2_db_t
-xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-       return (xfs_dir2_db_t)((by) >> \
-                ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog));
-}
-
-/*
- * Convert dataptr to a block number
- */
-static inline xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-       return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert byte in space to offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-       return (xfs_dir2_data_aoff_t)((by) & \
-               ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1));
-}
-
-/*
- * Convert dataptr to a byte offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-       return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert block and offset to byte in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
-                       xfs_dir2_data_aoff_t o)
-{
-       return ((xfs_dir2_off_t)(db) << \
-               ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o);
-}
-
-/*
- * Convert block (DB) to block (dablk)
- */
-static inline xfs_dablk_t
-xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-       return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert byte in space to (DA) block
- */
-static inline xfs_dablk_t
-xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-       return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
-}
-
-/*
- * Convert block and offset to dataptr
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
-                          xfs_dir2_data_aoff_t o)
-{
-       return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
-}
-
-/*
- * Convert block (dablk) to block (DB)
- */
-static inline xfs_dir2_db_t
-xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
-{
-       return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert block (dablk) to byte offset in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
-{
-       return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
-}
-
-/*
- * Function declarations.
- */
-extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
-                                 struct xfs_dabuf *dbp);
-extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
-extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
-                                 struct xfs_dabuf *bp);
-extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
-                                    int *lowstalep, int *highstalep,
-                                    int *lowlogp, int *highlogp);
-extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
-                                 size_t bufsize, xfs_off_t *offset,
-                                 filldir_t filldir);
-extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
-                             struct xfs_dabuf **bpp, int magic);
-extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                                  int first, int last);
-extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
-                                    struct xfs_dabuf *bp);
-extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
-                                    struct xfs_dabuf *lbp);
-extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
-                                  struct xfs_dabuf *lbp, xfs_dir2_db_t db);
-extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
-
-#endif /* __XFS_DIR2_LEAF_H__ */
diff --git a/include/xfs_dir2_node.h b/include/xfs_dir2_node.h

deleted file mode 100644 (file)

index 82dfe71..0000000
--- a/include/xfs_dir2_node.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_NODE_H__
-#define        __XFS_DIR2_NODE_H__
-
-/*
- * Directory version 2, btree node format structures
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_da_state;
-struct xfs_da_state_blk;
-struct xfs_inode;
-struct xfs_trans;
-
-/*
- * Offset of the freespace index.
- */
-#define        XFS_DIR2_FREE_SPACE     2
-#define        XFS_DIR2_FREE_OFFSET    (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
-#define        XFS_DIR2_FREE_FIRSTDB(mp)       \
-       xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
-
-#define        XFS_DIR2_FREE_MAGIC     0x58443246      /* XD2F */
-
-typedef        struct xfs_dir2_free_hdr {
-       __be32                  magic;          /* XFS_DIR2_FREE_MAGIC */
-       __be32                  firstdb;        /* db of first entry */
-       __be32                  nvalid;         /* count of valid entries */
-       __be32                  nused;          /* count of used entries */
-} xfs_dir2_free_hdr_t;
-
-typedef struct xfs_dir2_free {
-       xfs_dir2_free_hdr_t     hdr;            /* block header */
-       __be16                  bests[1];       /* best free counts */
-                                               /* unused entries are -1 */
-} xfs_dir2_free_t;
-
-#define        XFS_DIR2_MAX_FREE_BESTS(mp)     \
-       (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
-        (uint)sizeof(xfs_dir2_data_off_t))
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static inline xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-       return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp));
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static inline int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-       return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
-}
-
-extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
-                                struct xfs_dabuf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
-extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
-                                    struct xfs_da_args *args, int *indexp,
-                                    struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
-                               struct xfs_dabuf *leaf2_bp);
-extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
-                               struct xfs_da_state_blk *oldblk,
-                               struct xfs_da_state_blk *newblk);
-extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
-extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
-                                    struct xfs_da_state_blk *drop_blk,
-                                    struct xfs_da_state_blk *save_blk);
-extern int xfs_dir2_node_addname(struct xfs_da_args *args);
-extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_node_removename(struct xfs_da_args *args);
-extern int xfs_dir2_node_replace(struct xfs_da_args *args);
-extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
-                                  int *rvalp);
-
-#endif /* __XFS_DIR2_NODE_H__ */
diff --git a/include/xfs_dir2_sf.h b/include/xfs_dir2_sf.h

deleted file mode 100644 (file)

index 6ac44b5..0000000
--- a/include/xfs_dir2_sf.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_SF_H__
-#define        __XFS_DIR2_SF_H__
-
-/*
- * Directory layout when stored internal to an inode.
- *
- * Small directories are packed as tightly as possible so as to
- * fit into the literal area of the inode.
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_block;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * Inode number stored as 8 8-bit values.
- */
-typedef        struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
-
-/*
- * Inode number stored as 4 8-bit values.
- * Works a lot of the time, when all the inode numbers in a directory
- * fit in 32 bits.
- */
-typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
-
-typedef union {
-       xfs_dir2_ino8_t i8;
-       xfs_dir2_ino4_t i4;
-} xfs_dir2_inou_t;
-#define        XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
-
-/*
- * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
- * Only need 16 bits, this is the byte offset into the single block form.
- */
-typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
-
-/*
- * The parent directory has a dedicated field, and the self-pointer must
- * be calculated on the fly.
- *
- * Entries are packed toward the top as tightly as possible.  The header
- * and the elements must be memcpy'd out into a work area to get correct
- * alignment for the inode number fields.
- */
-typedef struct xfs_dir2_sf_hdr {
-       __uint8_t               count;          /* count of entries */
-       __uint8_t               i8count;        /* count of 8-byte inode #s */
-       xfs_dir2_inou_t         parent;         /* parent dir inode number */
-} __arch_pack xfs_dir2_sf_hdr_t;
-
-typedef struct xfs_dir2_sf_entry {
-       __uint8_t               namelen;        /* actual name length */
-       xfs_dir2_sf_off_t       offset;         /* saved offset */
-       __uint8_t               name[1];        /* name, variable size */
-       xfs_dir2_inou_t         inumber;        /* inode number, var. offset */
-} __arch_pack xfs_dir2_sf_entry_t; 
-
-typedef struct xfs_dir2_sf {
-       xfs_dir2_sf_hdr_t       hdr;            /* shortform header */
-       xfs_dir2_sf_entry_t     list[1];        /* shortform entries */
-} xfs_dir2_sf_t;
-
-static inline int xfs_dir2_sf_hdr_size(int i8count)
-{
-       return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
-               ((i8count) == 0) * \
-               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
-{
-       return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
-}
-
-static inline xfs_intino_t
-xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
-{
-       return ((sfp)->hdr.i8count == 0 ? \
-               (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \
-               (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
-}
-
-static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
-                                               xfs_dir2_inou_t *to)
-{
-       if ((sfp)->hdr.i8count == 0)
-               XFS_PUT_DIR_INO4(*(from), (to)->i4);
-       else
-               XFS_PUT_DIR_INO8(*(from), (to)->i8);
-}
-
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
-{
-       return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
-}
-
-static inline void
-xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
-{
-       INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
-}
-
-static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
-{
-       return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
-               ((sfp)->hdr.i8count == 0) * \
-               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline int
-xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
-       return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
-               ((sfp)->hdr.i8count == 0) * \
-               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
-{
-       return ((xfs_dir2_sf_entry_t *) \
-               ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
-}
-
-static inline xfs_dir2_sf_entry_t *
-xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
-       return ((xfs_dir2_sf_entry_t *) \
-               ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
-}
-
-/*
- * Functions.
- */
-extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
-                                struct xfs_dir2_block *block,
-                                xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
-                               int size, xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
-extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
-extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
-                               xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-
-#endif /* __XFS_DIR2_SF_H__ */
diff --git a/include/xfs_fs.h b/include/xfs_fs.h

index faac5afd15ebf47377723417fb51eb9302bad82b..1cc1aa048fbf4b369934696fe133f94de1633ad4 100644 (file)
--- a/include/xfs_fs.h
+++ b/include/xfs_fs.h
@@ -233,7 +233,8 @@ typedef struct xfs_fsop_resblks {
  #define XFS_FSOP_GEOM_FLAGS_LOGV2      0x0100  /* log format version 2 */
  #define XFS_FSOP_GEOM_FLAGS_SECTOR     0x0200  /* sector sizes >1BB    */
  #define XFS_FSOP_GEOM_FLAGS_ATTR2      0x0400  /* inline attributes rework */
-#define XFS_FSOP_GEOM_FLAGS_DIRV2CI    0x1000  /* ASCII only CI names */
+#define XFS_FSOP_GEOM_FLAGS_PROJID32   0x0800  /* 32-bit project IDs   */
+#define XFS_FSOP_GEOM_FLAGS_DIRV2CI    0x1000  /* ASCII only CI names  */
  #define XFS_FSOP_GEOM_FLAGS_LAZYSB     0x4000  /* lazy superblock counters */
  
  
@@ -249,6 +250,11 @@ typedef struct xfs_fsop_resblks {
  #define XFS_MAX_LOG_BYTES \
         ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
  
+/* Used for sanity checks on superblock */
+#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks)
+#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) *     \
+                        (s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
+
  /*
   * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
   */
@@ -333,6 +339,35 @@ typedef struct xfs_error_injection {
  } xfs_error_injection_t;
  
  
+/*
+ * Speculative preallocation trimming.
+ */
+#define XFS_EOFBLOCKS_VERSION          1
+struct xfs_eofblocks {
+       __u32           eof_version;
+       __u32           eof_flags;
+       uid_t           eof_uid;
+       gid_t           eof_gid;
+       prid_t          eof_prid;
+       __u32           pad32;
+       __u64           eof_min_file_size;
+       __u64           pad64[12];
+};
+
+/* eof_flags values */
+#define XFS_EOF_FLAGS_SYNC             (1 << 0) /* sync/wait mode scan */
+#define XFS_EOF_FLAGS_UID              (1 << 1) /* filter by uid */
+#define XFS_EOF_FLAGS_GID              (1 << 2) /* filter by gid */
+#define XFS_EOF_FLAGS_PRID             (1 << 3) /* filter by project id */
+#define XFS_EOF_FLAGS_MINFILESIZE      (1 << 4) /* filter by min file size */
+#define XFS_EOF_FLAGS_VALID    \
+       (XFS_EOF_FLAGS_SYNC |   \
+        XFS_EOF_FLAGS_UID |    \
+        XFS_EOF_FLAGS_GID |    \
+        XFS_EOF_FLAGS_PRID |   \
+        XFS_EOF_FLAGS_MINFILESIZE)
+
+
  /*
   * The user-level Handle Request interface structure.
   */
@@ -451,6 +486,7 @@ typedef struct xfs_handle {
  /*     XFS_IOC_GETBIOSIZE ---- deprecated 47      */
  #define XFS_IOC_GETBMAPX       _IOWR('X', 56, struct getbmap)
  #define XFS_IOC_ZERO_RANGE     _IOW ('X', 57, struct xfs_flock64)
+#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks)
  
  /*
   * ioctl commands that replace IRIX syssgi()'s
diff --git a/include/xfs_ialloc.h b/include/xfs_ialloc.h

index bb5385475e1f904b74bf24ec533b2640e96b867e..c8da3df271e6b94c97bc76d627ad1c1052ad03b7 100644 (file)
--- a/include/xfs_ialloc.h
+++ b/include/xfs_ialloc.h
@@ -46,15 +46,6 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
                 (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
  }
  
-/*
- * Find a free (set) bit in the inode bitmask.
- */
-static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
-{
-       return xfs_lowbit64(*fp);
-}
-
-
  /*
   * Allocate an inode on disk.
   * Mode is used to tell whether the new inode will need space, and whether
@@ -81,11 +72,9 @@ int                                  /* error */
  xfs_dialloc(
         struct xfs_trans *tp,           /* transaction pointer */
         xfs_ino_t       parent,         /* parent inode (directory) */
-       mode_t          mode,           /* mode bits for new inode */
+       umode_t         mode,           /* mode bits for new inode */
         int             okalloc,        /* ok to allocate more space */
         struct xfs_buf  **agbp,         /* buf for a.g. inode header */
-       boolean_t       *alloc_done,    /* an allocation was done to replenish
-                                          the free inodes */
         xfs_ino_t       *inop);         /* inode number allocated */
  
  /*
@@ -158,7 +147,9 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
  /*
   * Get the data from the pointed-to record.
   */
-extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
+int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
                 xfs_inobt_rec_incore_t *rec, int *stat);
  
+extern const struct xfs_buf_ops xfs_agi_buf_ops;
+
  #endif /* __XFS_IALLOC_H__ */
diff --git a/include/xfs_ialloc_btree.h b/include/xfs_ialloc_btree.h

index f782ad0c4769483ae23ab25dade35ee737ea4c63..25c0239a8eab78f7ba60be5ee02c4d85cd1649aa 100644 (file)
--- a/include/xfs_ialloc_btree.h
+++ b/include/xfs_ialloc_btree.h
@@ -109,4 +109,6 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
                 struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
  extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
  
+extern const struct xfs_buf_ops xfs_inobt_buf_ops;
+
  #endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/include/xfs_inode.h b/include/xfs_inode.h

index ca5654424dae43f2e2f093121e921018209d5ebf..437b3af99b1fb6f4da07141d40494da76a57b5f4 100644 (file)
--- a/include/xfs_inode.h
+++ b/include/xfs_inode.h
@@ -66,8 +66,6 @@ typedef struct xfs_ifork {
         struct xfs_btree_block  *if_broot;      /* file's incore btree root */
         short                   if_broot_bytes; /* bytes allocated for root */
         unsigned char           if_flags;       /* per-fork flags */
-       unsigned char           if_ext_max;     /* max # of extent records */
-       xfs_extnum_t            if_lastex;      /* last if_extents used */
         union {
                 xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
                 xfs_ext_irec_t  *if_ext_irec;   /* irec map file exts */
@@ -111,7 +109,7 @@ struct xfs_imap {
   * Generally, we do not want to hold the i_rlock while holding the
   * i_ilock. Hierarchy is i_iolock followed by i_rlock.
   *
- * xfs_iptr_t contains all the inode fields upto and including the
+ * xfs_iptr_t contains all the inode fields up to and including the
   * i_mnext and i_mprev fields, it is used as a marker in the inode
   * chain off the mount structure by xfs_sync calls.
   */
@@ -207,29 +205,12 @@ typedef struct xfs_icdinode {
         ((w) == XFS_DATA_FORK ? \
                 ((ip)->i_d.di_nextents = (n)) : \
                 ((ip)->i_d.di_anextents = (n)))
+#define XFS_IFORK_MAXEXT(ip, w) \
+       (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
  
-/*
- * Project quota id helpers (previously projid was 16bit only
- * and using two 16bit values to hold new 32bit projid was choosen
- * to retain compatibility with "old" filesystems).
- */
-static inline __uint32_t
-xfs_get_projid(struct xfs_icdinode i_d)
-{
-       return (__uint32_t)i_d.di_projid_hi << 16 | i_d.di_projid_lo;
-}
-
-static inline void
-xfs_set_projid(struct xfs_icdinode *i_d,
-               __uint32_t projid)
-{
-       i_d->di_projid_hi = (__uint16_t) (projid >> 16);
-       i_d->di_projid_lo = (__uint16_t) (projid & 0xffff);
-}
  
  #ifdef __KERNEL__
  
-struct bhv_desc;
  struct xfs_buf;
  struct xfs_bmap_free;
  struct xfs_bmbt_irec;
@@ -238,12 +219,6 @@ struct xfs_mount;
  struct xfs_trans;
  struct xfs_dquot;
  
-typedef struct dm_attrs_s {
-       __uint32_t      da_dmevmask;    /* DMIG event mask */
-       __uint16_t      da_dmstate;     /* DMIG state info */
-       __uint16_t      da_pad;         /* DMIG extra padding */
-} dm_attrs_t;
-
  typedef struct xfs_inode {
         /* Inode linking and identification information. */
         struct xfs_mount        *i_mount;       /* fs mount struct ptr */
@@ -259,32 +234,21 @@ typedef struct xfs_inode {
         xfs_ifork_t             i_df;           /* data fork */
  
         /* Transaction and locking information. */
-       struct xfs_trans        *i_transp;      /* ptr to owning transaction*/
         struct xfs_inode_log_item *i_itemp;     /* logging information */
         mrlock_t                i_lock;         /* inode lock */
         mrlock_t                i_iolock;       /* inode IO lock */
-       struct completion       i_flush;        /* inode flush completion q */
         atomic_t                i_pincount;     /* inode pin count */
-       wait_queue_head_t       i_ipin_wait;    /* inode pinning wait queue */
         spinlock_t              i_flags_lock;   /* inode i_flags lock */
         /* Miscellaneous state. */
-       unsigned short          i_flags;        /* see defined flags below */
-       unsigned char           i_update_core;  /* timestamps/size is dirty */
+       unsigned long           i_flags;        /* see defined flags below */
         unsigned int            i_delayed_blks; /* count of delay alloc blks */
  
         xfs_icdinode_t          i_d;            /* most of ondisk inode */
  
-       xfs_fsize_t             i_size;         /* in-memory size */
-       xfs_fsize_t             i_new_size;     /* size when write completes */
-       atomic_t                i_iocount;      /* outstanding I/O count */
-
         /* VFS inode */
         struct inode            i_vnode;        /* embedded VFS inode */
  } xfs_inode_t;
  
-#define XFS_ISIZE(ip)  (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
-                               (ip)->i_size : (ip)->i_d.di_size;
-
  /* Convert from vfs inode to xfs inode */
  static inline struct xfs_inode *XFS_I(struct inode *inode)
  {
@@ -297,6 +261,32 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
         return &ip->i_vnode;
  }
  
+/*
+ * For regular files we only update the on-disk filesize when actually
+ * writing data back to disk.  Until then only the copy in the VFS inode
+ * is uptodate.
+ */
+static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
+{
+       if (S_ISREG(ip->i_d.di_mode))
+               return i_size_read(VFS_I(ip));
+       return ip->i_d.di_size;
+}
+
+/*
+ * If this I/O goes past the on-disk inode size update it unless it would
+ * be past the current in-core inode size.
+ */
+static inline xfs_fsize_t
+xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
+{
+       xfs_fsize_t i_size = i_size_read(VFS_I(ip));
+
+       if (new_size > i_size)
+               new_size = i_size;
+       return new_size > ip->i_d.di_size ? new_size : 0;
+}
+
  /*
   * i_flags helper functions
   */
@@ -351,9 +341,22 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
         return ret;
  }
  
+static inline int
+xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
+{
+       int ret;
+
+       spin_lock(&ip->i_flags_lock);
+       ret = ip->i_flags & flags;
+       if (!ret)
+               ip->i_flags |= flags;
+       spin_unlock(&ip->i_flags_lock);
+       return ret;
+}
+
  /*
   * Project quota id helpers (previously projid was 16bit only
- * and using two 16bit values to hold new 32bit projid was choosen
+ * and using two 16bit values to hold new 32bit projid was chosen
   * to retain compatibility with "old" filesystems).
   */
  static inline prid_t
@@ -371,35 +374,58 @@ xfs_set_projid(struct xfs_inode *ip,
  }
  
  /*
- * Manage the i_flush queue embedded in the inode.  This completion
- * queue synchronizes processes attempting to flush the in-core
- * inode back to disk.
+ * In-core inode flags.
+ */
+#define XFS_IRECLAIM           (1 << 0) /* started reclaiming this inode */
+#define XFS_ISTALE             (1 << 1) /* inode has been staled */
+#define XFS_IRECLAIMABLE       (1 << 2) /* inode can be reclaimed */
+#define XFS_INEW               (1 << 3) /* inode has just been allocated */
+#define XFS_IFILESTREAM                (1 << 4) /* inode is in a filestream dir. */
+#define XFS_ITRUNCATED         (1 << 5) /* truncated down so flush-on-close */
+#define XFS_IDIRTY_RELEASE     (1 << 6) /* dirty release already seen */
+#define __XFS_IFLOCK_BIT       7        /* inode is being flushed right now */
+#define XFS_IFLOCK             (1 << __XFS_IFLOCK_BIT)
+#define __XFS_IPINNED_BIT      8        /* wakeup key for zero pin count */
+#define XFS_IPINNED            (1 << __XFS_IPINNED_BIT)
+#define XFS_IDONTCACHE         (1 << 9) /* don't cache the inode long term */
+
+/*
+ * Per-lifetime flags need to be reset when re-using a reclaimable inode during
+ * inode lookup. This prevents unintended behaviour on the new inode from
+ * ocurring.
   */
-static inline void xfs_iflock(xfs_inode_t *ip)
+#define XFS_IRECLAIM_RESET_FLAGS       \
+       (XFS_IRECLAIMABLE | XFS_IRECLAIM | \
+        XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
+        XFS_IFILESTREAM);
+
+/*
+ * Synchronize processes attempting to flush the in-core inode back to disk.
+ */
+
+extern void __xfs_iflock(struct xfs_inode *ip);
+
+static inline int xfs_iflock_nowait(struct xfs_inode *ip)
  {
-       wait_for_completion(&ip->i_flush);
+       return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
  }
  
-static inline int xfs_iflock_nowait(xfs_inode_t *ip)
+static inline void xfs_iflock(struct xfs_inode *ip)
  {
-       return try_wait_for_completion(&ip->i_flush);
+       if (!xfs_iflock_nowait(ip))
+               __xfs_iflock(ip);
  }
  
-static inline void xfs_ifunlock(xfs_inode_t *ip)
+static inline void xfs_ifunlock(struct xfs_inode *ip)
  {
-       complete(&ip->i_flush);
+       xfs_iflags_clear(ip, XFS_IFLOCK);
+       wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
  }
  
-/*
- * In-core inode flags.
- */
-#define XFS_IRECLAIM           0x0001  /* started reclaiming this inode */
-#define XFS_ISTALE             0x0002  /* inode has been staled */
-#define XFS_IRECLAIMABLE       0x0004  /* inode can be reclaimed */
-#define XFS_INEW               0x0008  /* inode has just been allocated */
-#define XFS_IFILESTREAM                0x0010  /* inode is in a filestream directory */
-#define XFS_ITRUNCATED         0x0020  /* truncated down so flush-on-close */
-#define XFS_IDIRTY_RELEASE     0x0040  /* dirty release already seen */
+static inline int xfs_isiflocked(struct xfs_inode *ip)
+{
+       return xfs_iflags_test(ip, XFS_IFLOCK);
+}
  
  /*
   * Flags for inode locking.
@@ -410,7 +436,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
  #define        XFS_IOLOCK_SHARED       (1<<1)
  #define        XFS_ILOCK_EXCL          (1<<2)
  #define        XFS_ILOCK_SHARED        (1<<3)
-#define        XFS_IUNLOCK_NONOTIFY    (1<<4)
  
  #define XFS_LOCK_MASK          (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
                                 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
@@ -419,35 +444,41 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
         { XFS_IOLOCK_EXCL,      "IOLOCK_EXCL" }, \
         { XFS_IOLOCK_SHARED,    "IOLOCK_SHARED" }, \
         { XFS_ILOCK_EXCL,       "ILOCK_EXCL" }, \
-       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }, \
-       { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" }
+       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }
  
  
  /*
   * Flags for lockdep annotations.
   *
- * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes
- * (ie directory operations that require locking a directory inode and
- * an entry inode).  The first inode gets locked with this flag so it
- * gets a lockdep subclass of 1 and the second lock will have a lockdep
- * subclass of 0.
+ * XFS_LOCK_PARENT - for directory operations that require locking a
+ * parent directory inode and a child entry inode.  The parent gets locked
+ * with this flag so it gets a lockdep subclass of 1 and the child entry
+ * lock will have a lockdep subclass of 0.
+ *
+ * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
+ * inodes do not participate in the normal lock order, and thus have their
+ * own subclasses.
   *
   * XFS_LOCK_INUMORDER - for locking several inodes at the some time
   * with xfs_lock_inodes().  This flag is used as the starting subclass
   * and each subsequent lock acquired will increment the subclass by one.
- * So the first lock acquired will have a lockdep subclass of 2, the
- * second lock will have a lockdep subclass of 3, and so on. It is
+ * So the first lock acquired will have a lockdep subclass of 4, the
+ * second lock will have a lockdep subclass of 5, and so on. It is
   * the responsibility of the class builder to shift this to the correct
   * portion of the lock_mode lockdep mask.
   */
  #define XFS_LOCK_PARENT                1
-#define XFS_LOCK_INUMORDER     2
+#define XFS_LOCK_RTBITMAP      2
+#define XFS_LOCK_RTSUM         3
+#define XFS_LOCK_INUMORDER     4
  
  #define XFS_IOLOCK_SHIFT       16
  #define        XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
  
  #define XFS_ILOCK_SHIFT                24
  #define        XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
+#define        XFS_ILOCK_RTBITMAP      (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
+#define        XFS_ILOCK_RTSUM         (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
  
  #define XFS_IOLOCK_DEP_MASK    0x00ff0000
  #define XFS_ILOCK_DEP_MASK     0xff000000
@@ -456,18 +487,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
  #define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
  #define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
  
-extern struct lock_class_key xfs_iolock_reclaimable;
-
-/*
- * Flags for xfs_itruncate_start().
- */
-#define        XFS_ITRUNC_DEFINITE     0x1
-#define        XFS_ITRUNC_MAYBE        0x2
-
-#define XFS_ITRUNC_FLAGS \
-       { XFS_ITRUNC_DEFINITE,  "DEFINITE" }, \
-       { XFS_ITRUNC_MAYBE,     "MAYBE" }
-
  /*
   * For multiple groups support: if S_ISGID bit is set in the parent
   * directory, group of new file is set to that of the parent, and
@@ -477,11 +496,10 @@ extern struct lock_class_key xfs_iolock_reclaimable;
         (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
          ((pip)->i_d.di_mode & S_ISGID))
  
+
  /*
- * xfs_iget.c prototypes.
+ * xfs_inode.c prototypes.
   */
-int            xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
-                        uint, uint, xfs_inode_t **);
  void           xfs_ilock(xfs_inode_t *, uint);
  int            xfs_ilock_nowait(xfs_inode_t *, uint);
  void           xfs_iunlock(xfs_inode_t *, uint);
@@ -489,33 +507,25 @@ void              xfs_ilock_demote(xfs_inode_t *, uint);
  int            xfs_isilocked(xfs_inode_t *, uint);
  uint           xfs_ilock_map_shared(xfs_inode_t *);
  void           xfs_iunlock_map_shared(xfs_inode_t *, uint);
-void           xfs_inode_free(struct xfs_inode *ip);
-
-/*
- * xfs_inode.c prototypes.
- */
-int            xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
+int            xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
                            xfs_nlink_t, xfs_dev_t, prid_t, int,
-                          struct xfs_buf **, boolean_t *, xfs_inode_t **);
+                          struct xfs_buf **, xfs_inode_t **);
  
  uint           xfs_ip2xflags(struct xfs_inode *);
  uint           xfs_dic2xflags(struct xfs_dinode *);
  int            xfs_ifree(struct xfs_trans *, xfs_inode_t *,
                            struct xfs_bmap_free *);
-int            xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
-int            xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
-                                    xfs_fsize_t, int, int);
+int            xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
+                                     int, xfs_fsize_t);
  int            xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
  
  void           xfs_iext_realloc(xfs_inode_t *, int, int);
  void           xfs_iunpin_wait(xfs_inode_t *);
-int            xfs_iflush(xfs_inode_t *, uint);
+int            xfs_iflush(struct xfs_inode *, struct xfs_buf **);
  void           xfs_lock_inodes(xfs_inode_t **, int, uint);
  void           xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
  
-void           xfs_synchronize_times(xfs_inode_t *);
-void           xfs_mark_inode_dirty(xfs_inode_t *);
-void           xfs_mark_inode_dirty_sync(xfs_inode_t *);
+xfs_extlen_t   xfs_get_extsz_hint(struct xfs_inode *ip);
  
  #define IHOLD(ip) \
  do { \
@@ -537,19 +547,15 @@ do { \
   */
  #define XFS_IGET_CREATE                0x1
  #define XFS_IGET_UNTRUSTED     0x2
+#define XFS_IGET_DONTCACHE     0x4
  
-int            xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
-                           xfs_ino_t, struct xfs_dinode **,
-                           struct xfs_buf **, int *, uint);
-int            xfs_itobp(struct xfs_mount *, struct xfs_trans *,
-                         struct xfs_inode *, struct xfs_dinode **,
-                         struct xfs_buf **, uint);
+int            xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
+                              struct xfs_imap *, struct xfs_dinode **,
+                              struct xfs_buf **, uint, uint);
  int            xfs_iread(struct xfs_mount *, struct xfs_trans *,
                           struct xfs_inode *, uint);
  void           xfs_dinode_to_disk(struct xfs_dinode *,
                                    struct xfs_icdinode *);
-void           xfs_dinode_from_disk(struct xfs_icdinode *,
-                                    struct xfs_dinode *);
  void           xfs_idestroy_fork(struct xfs_inode *, int);
  void           xfs_idata_realloc(struct xfs_inode *, int, int);
  void           xfs_iroot_realloc(struct xfs_inode *, int, int);
@@ -579,16 +585,10 @@ void              xfs_iext_irec_compact(xfs_ifork_t *);
  void           xfs_iext_irec_compact_pages(xfs_ifork_t *);
  void           xfs_iext_irec_compact_full(xfs_ifork_t *);
  void           xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
+bool           xfs_can_free_eofblocks(struct xfs_inode *, bool);
  
  #define xfs_ipincount(ip)      ((unsigned int) atomic_read(&ip->i_pincount))
  
-#ifdef DEBUG
-void           xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
-                               xfs_fsize_t);
-#else  /* DEBUG */
-#define xfs_isize_check(mp, ip, isize)
-#endif /* DEBUG */
-
  #if defined(DEBUG)
  void           xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
  #else
@@ -598,5 +598,6 @@ void                xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
  extern struct kmem_zone        *xfs_ifork_zone;
  extern struct kmem_zone        *xfs_inode_zone;
  extern struct kmem_zone        *xfs_ili_zone;
+extern const struct xfs_buf_ops xfs_inode_buf_ops;
  
  #endif /* __XFS_INODE_H__ */
diff --git a/include/xfs_inode_item.h b/include/xfs_inode_item.h

index d3dee61e6d91fde1671157b3ee2deeee8df49fcd..779812fb3d80b27c94d97f288fea9195db8be4fb 100644 (file)
--- a/include/xfs_inode_item.h
+++ b/include/xfs_inode_item.h
@@ -86,6 +86,15 @@ typedef struct xfs_inode_log_format_64 {
  #define        XFS_ILOG_AEXT   0x080   /* log i_af.if_extents */
  #define        XFS_ILOG_ABROOT 0x100   /* log i_af.i_broot */
  
+
+/*
+ * The timestamps are dirty, but not necessarily anything else in the inode
+ * core.  Unlike the other fields above this one must never make it to disk
+ * in the ilf_fields of the inode_log_format, but is purely store in-memory in
+ * ili_fields in the inode_log_item.
+ */
+#define XFS_ILOG_TIMESTAMP     0x4000
+
  #define        XFS_ILOG_NONCORE        (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
                                  XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
                                  XFS_ILOG_UUID | XFS_ILOG_ADATA | \
@@ -101,7 +110,7 @@ typedef struct xfs_inode_log_format_64 {
                                  XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
                                  XFS_ILOG_DEV | XFS_ILOG_UUID | \
                                  XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
-                                XFS_ILOG_ABROOT)
+                                XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
  
  static inline int xfs_ilog_fbroot(int w)
  {
@@ -134,30 +143,25 @@ typedef struct xfs_inode_log_item {
         unsigned short          ili_lock_flags;    /* lock flags */
         unsigned short          ili_logged;        /* flushed logged data */
         unsigned int            ili_last_fields;   /* fields when flushed */
+       unsigned int            ili_fields;        /* fields to be logged */
         struct xfs_bmbt_rec     *ili_extents_buf;  /* array of logged
                                                       data exts */
         struct xfs_bmbt_rec     *ili_aextents_buf; /* array of logged
                                                       attr exts */
-#ifdef XFS_TRANS_DEBUG
-       int                     ili_root_size;
-       char                    *ili_orig_root;
-#endif
         xfs_inode_log_format_t  ili_format;        /* logged structure */
  } xfs_inode_log_item_t;
  
  
  static inline int xfs_inode_clean(xfs_inode_t *ip)
  {
-       return (!ip->i_itemp ||
-               !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
-              !ip->i_update_core;
+       return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL);
  }
  
  extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
  extern void xfs_inode_item_destroy(struct xfs_inode *);
  extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
  extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
-extern void xfs_iflush_abort(struct xfs_inode *);
+extern void xfs_iflush_abort(struct xfs_inode *, bool);
  extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
                                          xfs_inode_log_format_t *);
  
diff --git a/include/xfs_inum.h b/include/xfs_inum.h

index b8e4ee4e89a431904f9e9aa4e6576f7db35d7924..90efdaf1706fee07f38c84324450feca17a581c2 100644 (file)
--- a/include/xfs_inum.h
+++ b/include/xfs_inum.h
@@ -26,22 +26,6 @@
   * high agno_log-agblklog-inopblog bits - 0
   */
  
-typedef        __uint32_t      xfs_agino_t;    /* within allocation grp inode number */
-
-/*
- * Useful inode bits for this kernel.
- * Used in some places where having 64-bits in the 32-bit kernels
- * costs too much.
- */
-#if XFS_BIG_INUMS
-typedef        xfs_ino_t       xfs_intino_t;
-#else
-typedef        __uint32_t      xfs_intino_t;
-#endif
-
-#define        NULLFSINO       ((xfs_ino_t)-1)
-#define        NULLAGINO       ((xfs_agino_t)-1)
-
  struct xfs_mount;
  
  #define        XFS_INO_MASK(k)                 (__uint32_t)((1ULL << (k)) - 1)
diff --git a/include/xfs_log.h b/include/xfs_log.h

index 916eb7db14d9a09ff541882bf12905d9fd5068ea..5caee96059dfb3a9fe5a1f03cd84c868674626b0 100644 (file)
--- a/include/xfs_log.h
+++ b/include/xfs_log.h
@@ -52,15 +52,6 @@ static inline xfs_lsn_t      _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
   */
  #define XFS_LOG_REL_PERM_RESERV        0x1
  
-/*
- * Flags to xfs_log_reserve()
- *
- *     XFS_LOG_PERM_RESERV: Permanent reservation.  When writes are
- *             performed against this type of reservation, the reservation
- *             is not decreased.  Long running transactions should use this.
- */
-#define XFS_LOG_PERM_RESERV    0x2
-
  /*
   * Flags to xfs_log_force()
   *
@@ -137,7 +128,7 @@ struct xfs_trans;
  void   xfs_log_item_init(struct xfs_mount      *mp,
                         struct xfs_log_item     *item,
                         int                     type,
-                       struct xfs_item_ops     *ops);
+                       const struct xfs_item_ops *ops);
  
  xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
                        struct xlog_ticket *ticket,
@@ -160,8 +151,9 @@ int   xfs_log_mount(struct xfs_mount        *mp,
                         xfs_daddr_t             start_block,
                         int                     num_bblocks);
  int      xfs_log_mount_finish(struct xfs_mount *mp);
-void     xfs_log_move_tail(struct xfs_mount    *mp,
-                           xfs_lsn_t           tail_lsn);
+xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
+xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
+void     xfs_log_space_wake(struct xfs_mount *mp);
  int      xfs_log_notify(struct xfs_mount       *mp,
                          struct xlog_in_core    *iclog,
                          xfs_log_callback_t     *callback_entry);
@@ -172,13 +164,9 @@ int          xfs_log_reserve(struct xfs_mount *mp,
                           int              count,
                           struct xlog_ticket **ticket,
                           __uint8_t        clientid,
-                         uint             flags,
+                         bool             permanent,
                           uint             t_type);
-int      xfs_log_write(struct xfs_mount *mp,
-                       xfs_log_iovec_t  region[],
-                       int              nentries,
-                       struct xlog_ticket *ticket,
-                       xfs_lsn_t        *start_lsn);
+int      xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
  int      xfs_log_unmount_write(struct xfs_mount *mp);
  void      xfs_log_unmount(struct xfs_mount *mp);
  int      xfs_log_force_umount(struct xfs_mount *mp, int logerror);
@@ -189,12 +177,13 @@ void        xlog_iodone(struct xfs_buf *);
  struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
  void     xfs_log_ticket_put(struct xlog_ticket *ticket);
  
-xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
-
  int    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
-                               struct xfs_log_vec *log_vector,
                                 xfs_lsn_t *commit_lsn, int flags);
  bool   xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
  
+void   xfs_log_work_queue(struct xfs_mount *mp);
+void   xfs_log_worker(struct work_struct *work);
+void   xfs_log_quiesce(struct xfs_mount *mp);
+
  #endif
  #endif /* __XFS_LOG_H__ */
diff --git a/include/xfs_log_priv.h b/include/xfs_log_priv.h

index d5f8be8f4bf603cac8c3f0394759e5fefdd8d42e..16d8d12ea3b472cbf57d714f2952f4782e4c8a41 100644 (file)
--- a/include/xfs_log_priv.h
+++ b/include/xfs_log_priv.h
@@ -19,7 +19,7 @@
  #define __XFS_LOG_PRIV_H__
  
  struct xfs_buf;
-struct log;
+struct xlog;
  struct xlog_ticket;
  struct xfs_mount;
  
@@ -87,10 +87,6 @@ static inline uint xlog_get_client_id(__be32 i)
         return be32_to_cpu(i) >> 24;
  }
  
-#define xlog_panic(args...)    cmn_err(CE_PANIC, ## args)
-#define xlog_exit(args...)     cmn_err(CE_PANIC, ## args)
-#define xlog_warn(args...)     cmn_err(CE_WARN, ## args)
-
  /*
   * In core log state
   */
@@ -143,11 +139,13 @@ static inline uint xlog_get_client_id(__be32 i)
  /*
   * Flags for log structure
   */
-#define XLOG_CHKSUM_MISMATCH   0x1     /* used only during recovery */
  #define XLOG_ACTIVE_RECOVERY   0x2     /* in the middle of recovery */
  #define        XLOG_RECOVERY_NEEDED    0x4     /* log was recovered */
  #define XLOG_IO_ERROR          0x8     /* log hit an I/O error, and being
                                            shutdown */
+#define XLOG_TAIL_WARN         0x10    /* log tail verify warning issued */
+
+typedef __uint32_t xlog_tid_t;
  
  #ifdef __KERNEL__
  /*
@@ -240,8 +238,8 @@ typedef struct xlog_res {
  } xlog_res_t;
  
  typedef struct xlog_ticket {
-       wait_queue_head_t  t_wait;       /* ticket wait queue */
         struct list_head   t_queue;      /* reserve/write queue */
+       struct task_struct *t_task;      /* task that owns this ticket */
         xlog_tid_t         t_tid;        /* transaction identifier       : 4  */
         atomic_t           t_ref;        /* ticket reference count       : 4  */
         int                t_curr_res;   /* current reservation in bytes : 4  */
@@ -292,7 +290,7 @@ typedef struct xlog_rec_header {
         __be32    h_len;        /* len in bytes; should be 64-bit aligned: 4 */
         __be64    h_lsn;        /* lsn of this LR                       :  8 */
         __be64    h_tail_lsn;   /* lsn of 1st LR w/ buffers not committed: 8 */
-       __be32    h_chksum;     /* may not be used; non-zero if used    :  4 */
+       __le32    h_crc;        /* crc of log record                    :  4 */
         __be32    h_prev_block; /* block number to previous LR          :  4 */
         __be32    h_num_logops; /* number of log operations in this LR  :  4 */
         __be32    h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
@@ -353,7 +351,7 @@ typedef struct xlog_in_core {
         struct xlog_in_core     *ic_next;
         struct xlog_in_core     *ic_prev;
         struct xfs_buf          *ic_bp;
-       struct log              *ic_log;
+       struct xlog             *ic_log;
         int                     ic_size;
         int                     ic_offset;
         int                     ic_bwritecnt;
@@ -410,7 +408,7 @@ struct xfs_cil_ctx {
   * operations almost as efficient as the old logging methods.
   */
  struct xfs_cil {
-       struct log              *xc_log;
+       struct xlog             *xc_log;
         struct list_head        xc_cil;
         spinlock_t              xc_cil_lock;
         struct xfs_cil_ctx      *xc_ctx;
@@ -418,6 +416,8 @@ struct xfs_cil {
         struct list_head        xc_committing;
         wait_queue_head_t       xc_commit_wait;
         xfs_lsn_t               xc_current_sequence;
+       struct work_struct      xc_push_work;
+       xfs_lsn_t               xc_push_seq;
  };
  
  /*
@@ -470,13 +470,23 @@ struct xfs_cil {
  #define XLOG_CIL_SPACE_LIMIT(log)      (log->l_logsize >> 3)
  #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
  
+/*
+ * ticket grant locks, queues and accounting have their own cachlines
+ * as these are quite hot and can be operated on concurrently.
+ */
+struct xlog_grant_head {
+       spinlock_t              lock ____cacheline_aligned_in_smp;
+       struct list_head        waiters;
+       atomic64_t              grant;
+};
+
  /*
   * The reservation head lsn is not made up of a cycle number and block number.
   * Instead, it uses a cycle number and byte number.  Logs don't expect to
   * overflow 31 bits worth of byte offset, so using a byte number will mean
   * that round off problems won't occur when releasing partial reservations.
   */
-typedef struct log {
+struct xlog {
         /* The following fields don't need locking */
         struct xfs_mount        *l_mp;          /* mount point */
         struct xfs_ail          *l_ailp;        /* AIL log is working with */
@@ -484,6 +494,7 @@ typedef struct log {
         struct xfs_buf          *l_xbuf;        /* extra buffer for log
                                                  * wrapping */
         struct xfs_buftarg      *l_targ;        /* buftarg of log */
+       struct delayed_work     l_work;         /* background flush work */
         uint                    l_flags;
         uint                    l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
         struct list_head        *l_buf_cancel_table;
@@ -521,24 +532,15 @@ typedef struct log {
         /* lsn of 1st LR with unflushed * buffers */
         atomic64_t              l_tail_lsn ____cacheline_aligned_in_smp;
  
-       /*
-        * ticket grant locks, queues and accounting have their own cachlines
-        * as these are quite hot and can be operated on concurrently.
-        */
-       spinlock_t              l_grant_reserve_lock ____cacheline_aligned_in_smp;
-       struct list_head        l_reserveq;
-       atomic64_t              l_grant_reserve_head;
-
-       spinlock_t              l_grant_write_lock ____cacheline_aligned_in_smp;
-       struct list_head        l_writeq;
-       atomic64_t              l_grant_write_head;
+       struct xlog_grant_head  l_reserve_head;
+       struct xlog_grant_head  l_write_head;
  
         /* The following field are used for debugging; need to hold icloglock */
  #ifdef DEBUG
         char                    *l_iclog_bak[XLOG_MAX_ICLOGS];
  #endif
  
-} xlog_t;
+};
  
  #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
         ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
@@ -546,15 +548,25 @@ typedef struct log {
  #define XLOG_FORCED_SHUTDOWN(log)      ((log)->l_flags & XLOG_IO_ERROR)
  
  /* common routines */
-extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
-extern int      xlog_recover(xlog_t *log);
-extern int      xlog_recover_finish(xlog_t *log);
-extern void     xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
+extern int
+xlog_recover(
+       struct xlog             *log);
+extern int
+xlog_recover_finish(
+       struct xlog             *log);
+
+extern __le32   xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
+                           char *dp, int size);
  
  extern kmem_zone_t *xfs_log_ticket_zone;
-struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
-                               int count, char client, uint xflags,
-                               int alloc_flags);
+struct xlog_ticket *
+xlog_ticket_alloc(
+       struct xlog     *log,
+       int             unit_bytes,
+       int             count,
+       char            client,
+       bool            permanent,
+       xfs_km_flags_t  alloc_flags);
  
  
  static inline void
@@ -566,15 +578,20 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
  }
  
  void   xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
-int    xlog_write(struct log *log, struct xfs_log_vec *log_vector,
-                               struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
-                               xlog_in_core_t **commit_iclog, uint flags);
+int
+xlog_write(
+       struct xlog             *log,
+       struct xfs_log_vec      *log_vector,
+       struct xlog_ticket      *tic,
+       xfs_lsn_t               *start_lsn,
+       struct xlog_in_core     **commit_iclog,
+       uint                    flags);
  
  /*
   * When we crack an atomic LSN, we sample it first so that the value will not
   * change while we are cracking it into the component values. This means we
   * will always get consistent component values to work from. This should always
- * be used to smaple and crack LSNs taht are stored and updated in atomic
+ * be used to sample and crack LSNs that are stored and updated in atomic
   * variables.
   */
  static inline void
@@ -628,17 +645,23 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
  /*
   * Committed Item List interfaces
   */
-int    xlog_cil_init(struct log *log);
-void   xlog_cil_init_post_recovery(struct log *log);
-void   xlog_cil_destroy(struct log *log);
+int
+xlog_cil_init(struct xlog *log);
+void
+xlog_cil_init_post_recovery(struct xlog *log);
+void
+xlog_cil_destroy(struct xlog *log);
  
  /*
   * CIL force routines
   */
-xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
+xfs_lsn_t
+xlog_cil_force_lsn(
+       struct xlog *log,
+       xfs_lsn_t sequence);
  
  static inline void
-xlog_cil_force(struct log *log)
+xlog_cil_force(struct xlog *log)
  {
         xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
  }
diff --git a/include/xfs_mount.h b/include/xfs_mount.h

index 9be9a050d913583d223dc35f18f1bbc240c92f02..c267379c080edc62a19782eb05dee48d4dcdb02d 100644 (file)
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -52,15 +52,8 @@ typedef struct xfs_trans_reservations {
  
  #else /* __KERNEL__ */
  
-#include "xfs_sync.h"
-
-struct log;
-struct xfs_mount_args;
+struct xlog;
  struct xfs_inode;
-struct xfs_bmbt_irec;
-struct xfs_bmap_free;
-struct xfs_extdelta;
-struct xfs_swapext;
  struct xfs_mru_cache;
  struct xfs_nameops;
  struct xfs_ail;
@@ -134,7 +127,7 @@ typedef struct xfs_mount {
         uint                    m_readio_blocks; /* min read size blocks */
         uint                    m_writeio_log;  /* min write size log bytes */
         uint                    m_writeio_blocks; /* min write size blocks */
-       struct log              *m_log;         /* log specific stuff */
+       struct xlog             *m_log;         /* log specific stuff */
         int                     m_logbufs;      /* number of log buffers */
         int                     m_logbsize;     /* size of each log buffer */
         uint                    m_rsumlevels;   /* rt summary levels */
@@ -177,7 +170,6 @@ typedef struct xfs_mount {
         uint                    m_qflags;       /* quota status flags */
         xfs_trans_reservations_t m_reservations;/* precomputed res values */
         __uint64_t              m_maxicount;    /* maximum inode count */
-       __uint64_t              m_maxioffset;   /* maximum inode offset */
         __uint64_t              m_resblks;      /* total reserved blocks */
         __uint64_t              m_resblks_avail;/* available reserved blocks */
         __uint64_t              m_resblks_save; /* reserved blks @ remount,ro */
@@ -204,17 +196,21 @@ typedef struct xfs_mount {
         struct mutex            m_icsb_mutex;   /* balancer sync lock */
  #endif
         struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
-       struct task_struct      *m_sync_task;   /* generalised sync thread */
-       xfs_sync_work_t         m_sync_work;    /* work item for VFS_SYNC */
-       struct list_head        m_sync_list;    /* sync thread work item list */
-       spinlock_t              m_sync_lock;    /* work item list lock */
-       int                     m_sync_seq;     /* sync thread generation no. */
-       wait_queue_head_t       m_wait_single_sync_task;
+       struct delayed_work     m_reclaim_work; /* background inode reclaim */
+       struct delayed_work     m_eofblocks_work; /* background eof blocks
+                                                    trimming */
         __int64_t               m_update_flags; /* sb flags we need to update
                                                    on the next remount,rw */
         struct shrinker         m_inode_shrink; /* inode reclaim shrinker */
         int64_t                 m_low_space[XFS_LOWSP_MAX];
                                                 /* low free space thresholds */
+
+       struct workqueue_struct *m_data_workqueue;
+       struct workqueue_struct *m_unwritten_workqueue;
+       struct workqueue_struct *m_cil_workqueue;
+       struct workqueue_struct *m_reclaim_workqueue;
+       struct workqueue_struct *m_log_workqueue;
+       struct workqueue_struct *m_eofblocks_workqueue;
  } xfs_mount_t;
  
  /*
@@ -223,11 +219,11 @@ typedef struct xfs_mount {
  #define XFS_MOUNT_WSYNC                (1ULL << 0)     /* for nfs - all metadata ops
                                                    must be synchronous except
                                                    for space allocations */
-#define XFS_MOUNT_DELAYLOG     (1ULL << 1)     /* delayed logging is enabled */
  #define XFS_MOUNT_WAS_CLEAN    (1ULL << 3)
  #define XFS_MOUNT_FS_SHUTDOWN  (1ULL << 4)     /* atomic stop of all filesystem
                                                    operations, typically for
                                                    disk errors in metadata */
+#define XFS_MOUNT_DISCARD      (1ULL << 5)     /* discard unused blocks */
  #define XFS_MOUNT_RETERR       (1ULL << 6)     /* return alignment errors to
                                                    user */
  #define XFS_MOUNT_NOALIGN      (1ULL << 7)     /* turn off stripe alignment
@@ -297,8 +293,6 @@ xfs_preferred_iosize(xfs_mount_t *mp)
                         PAGE_CACHE_SIZE));
  }
  
-#define XFS_MAXIOFFSET(mp)     ((mp)->m_maxioffset)
-
  #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \
                                 ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
  #define XFS_FORCED_SHUTDOWN(mp)        ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
@@ -314,9 +308,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
  #define SHUTDOWN_REMOTE_REQ    0x0010  /* shutdown came from remote cell */
  #define SHUTDOWN_DEVICE_REQ    0x0020  /* failed all paths to the device */
  
-#define xfs_test_for_freeze(mp)                ((mp)->m_super->s_frozen)
-#define xfs_wait_for_freeze(mp,l)      vfs_check_frozen((mp)->m_super, (l))
-
  /*
   * Flags for xfs_mountfs
   */
@@ -366,12 +357,11 @@ typedef struct xfs_mod_sb {
         int64_t         msb_delta;      /* Change to make to specified field */
  } xfs_mod_sb_t;
  
-extern int     xfs_log_sbcount(xfs_mount_t *, uint);
+extern int     xfs_log_sbcount(xfs_mount_t *);
  extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
  extern int     xfs_mountfs(xfs_mount_t *mp);
  
  extern void    xfs_unmountfs(xfs_mount_t *);
-extern int     xfs_unmountfs_writesb(xfs_mount_t *);
  extern int     xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
  extern int     xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
                         uint, int);
@@ -402,4 +392,6 @@ extern int  xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
  extern void    xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
  extern void    xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
  
+extern const struct xfs_buf_ops xfs_sb_buf_ops;
+
  #endif /* __XFS_MOUNT_H__ */
diff --git a/include/xfs_quota.h b/include/xfs_quota.h

index 5d1f57d827a25672e50d0e4e2ca69c62538a657b..b50ec5b95d5a89fb4b0972c1761682c27fe8cdd9 100644 (file)
--- a/include/xfs_quota.h
+++ b/include/xfs_quota.h
@@ -87,8 +87,7 @@ typedef struct xfs_dqblk {
  #define XFS_DQ_PROJ            0x0002          /* project quota */
  #define XFS_DQ_GROUP           0x0004          /* a group quota */
  #define XFS_DQ_DIRTY           0x0008          /* dquot is dirty */
-#define XFS_DQ_WANT            0x0010          /* for lookup/reclaim race */
-#define XFS_DQ_INACTIVE                0x0020          /* dq off mplist & hashlist */
+#define XFS_DQ_FREEING         0x0010          /* dquot is beeing torn down */
  
  #define XFS_DQ_ALLTYPES                (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
  
@@ -97,8 +96,7 @@ typedef struct xfs_dqblk {
         { XFS_DQ_PROJ,          "PROJ" }, \
         { XFS_DQ_GROUP,         "GROUP" }, \
         { XFS_DQ_DIRTY,         "DIRTY" }, \
-       { XFS_DQ_WANT,          "WANT" }, \
-       { XFS_DQ_INACTIVE,      "INACTIVE" }
+       { XFS_DQ_FREEING,       "FREEING" }
  
  /*
   * In the worst case, when both user and group quotas are on,
@@ -176,6 +174,8 @@ typedef struct xfs_qoff_logformat {
  #define XFS_UQUOTA_ACTIVE      0x0100  /* uquotas are being turned off */
  #define XFS_PQUOTA_ACTIVE      0x0200  /* pquotas are being turned off */
  #define XFS_GQUOTA_ACTIVE      0x0400  /* gquotas are being turned off */
+#define XFS_ALL_QUOTA_ACTIVE   \
+       (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
  
  /*
   * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
@@ -199,7 +199,6 @@ typedef struct xfs_qoff_logformat {
  #define XFS_QMOPT_UQUOTA       0x0000004 /* user dquot requested */
  #define XFS_QMOPT_PQUOTA       0x0000008 /* project dquot requested */
  #define XFS_QMOPT_FORCE_RES    0x0000010 /* ignore quota limits */
-#define XFS_QMOPT_DQSUSER      0x0000020 /* don't cache super users dquot */
  #define XFS_QMOPT_SBVERSION    0x0000040 /* change superblock version num */
  #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
  #define XFS_QMOPT_DQREPAIR     0x0001000 /* repair dquot if damaged */
@@ -299,11 +298,6 @@ typedef struct xfs_dqtrx {
         long            qt_delrtb_delta;  /* delayed RT blk count changes */
  } xfs_dqtrx_t;
  
-extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
-extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
-
-#endif /* __KERNEL__ */
-
  #ifdef CONFIG_XFS_QUOTA
  extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
  extern void xfs_trans_free_dqinfo(struct xfs_trans *);
@@ -331,25 +325,36 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
  extern void xfs_qm_dqdetach(struct xfs_inode *);
  extern void xfs_qm_dqrele(struct xfs_dquot *);
  extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
-extern int xfs_qm_sync(struct xfs_mount *, int);
  extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
  extern void xfs_qm_mount_quotas(struct xfs_mount *);
  extern void xfs_qm_unmount(struct xfs_mount *);
  extern void xfs_qm_unmount_quotas(struct xfs_mount *);
  
  #else
-#define xfs_qm_vop_dqalloc(ip, uid, gid, prid, flags, udqp, gdqp)      ({ \
-       *(udqp) = NULL;         \
-       *(gdqp) = NULL;         \
-       0;                      \
-})
+static inline int
+xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
+               uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
+{
+       *udqp = NULL;
+       *gdqp = NULL;
+       return 0;
+}
  #define xfs_trans_dup_dqinfo(tp, tp2)
  #define xfs_trans_free_dqinfo(tp)
  #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
  #define xfs_trans_apply_dquot_deltas(tp)
  #define xfs_trans_unreserve_and_mod_dquots(tp)
-#define xfs_trans_reserve_quota_nblks(tp, ip, blks, inos, flg)         (0)
-#define xfs_trans_reserve_quota_bydquots(tp, mp, uqp, gqp, blks, inos, flg) (0)
+static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
+               struct xfs_inode *ip, long nblks, long ninos, uint flags)
+{
+       return 0;
+}
+static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
+               struct xfs_mount *mp, struct xfs_dquot *udqp,
+               struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
+{
+       return 0;
+}
  #define xfs_qm_vop_create_dqattach(tp, ip, u, g)
  #define xfs_qm_vop_rename_dqattach(it)                                 (0)
  #define xfs_qm_vop_chown(tp, ip, old, new)                             (NULL)
@@ -359,7 +364,6 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *);
  #define xfs_qm_dqdetach(ip)
  #define xfs_qm_dqrele(d)
  #define xfs_qm_statvfs(ip, s)
-#define xfs_qm_sync(mp, flags)                                         (0)
  #define xfs_qm_newmount(mp, a, b)                                      (0)
  #define xfs_qm_mount_quotas(mp)
  #define xfs_qm_unmount(mp)
@@ -372,4 +376,9 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *);
         xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
                                 f | XFS_QMOPT_RES_REGBLKS)
  
+extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
+                               xfs_dqid_t, uint, uint, char *);
+extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
+
+#endif /* __KERNEL__ */
  #endif /* __XFS_QUOTA_H__ */
diff --git a/include/xfs_rtalloc.h b/include/xfs_rtalloc.h

index ff614c29b44142da429bb33db904afdf843469ad..f7f3a359c1c5a238afd4884b19f4a74363e36287 100644 (file)
--- a/include/xfs_rtalloc.h
+++ b/include/xfs_rtalloc.h
@@ -47,7 +47,7 @@ struct xfs_trans;
  #define        XFS_SUMOFFSTOBLOCK(mp,s)        \
         (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
  #define        XFS_SUMPTR(mp,bp,so)    \
-       ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \
+       ((xfs_suminfo_t *)((bp)->b_addr + \
                 (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
  
  #define        XFS_BITTOBLOCK(mp,bi)   ((bi) >> (mp)->m_blkbit_log)
@@ -154,7 +154,7 @@ xfs_rtmount_init(
         if (mp->m_sb.sb_rblocks == 0)
                 return 0;
  
-       cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT");
+       xfs_warn(mp, "Not built with CONFIG_XFS_RT");
         return ENOSYS;
  }
  # define xfs_rtmount_inodes(m)  (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
diff --git a/include/xfs_sb.h b/include/xfs_sb.h

index 5dcc2d72ab158c213de91f732c8f8bf10d24bff8..6a7f8b0e49244c56c947817cde59eef6cf006acd 100644 (file)
--- a/include/xfs_sb.h
+++ b/include/xfs_sb.h
@@ -81,6 +81,7 @@ struct xfs_mount;
  #define XFS_SB_VERSION2_ATTR2BIT       0x00000008      /* Inline attr rework */
  #define XFS_SB_VERSION2_PARENTBIT      0x00000010      /* parent pointers */
  #define XFS_SB_VERSION2_PROJID32BIT    0x00000080      /* 32 bit project id */
+#define XFS_SB_VERSION2_CRCBIT         0x00000100      /* metadata CRCs */
  
  #define        XFS_SB_VERSION2_OKREALFBITS     \
         (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
@@ -510,13 +511,19 @@ static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp)
         sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT;
  }
  
+static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
+{
+       return (xfs_sb_version_hasmorebits(sbp) &&
+               (sbp->sb_features2 & XFS_SB_VERSION2_CRCBIT));
+}
+
  /*
   * end of superblock version macros
   */
  
  #define XFS_SB_DADDR           ((xfs_daddr_t)0) /* daddr in filesystem/ag */
  #define        XFS_SB_BLOCK(mp)        XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
-#define XFS_BUF_TO_SBP(bp)     ((xfs_dsb_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_SBP(bp)     ((xfs_dsb_t *)((bp)->b_addr))
  
  #define        XFS_HDR_BLOCK(mp,d)     ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d))
  #define        XFS_DADDR_TO_FSB(mp,d)  XFS_AGB_TO_FSB(mp, \
@@ -536,7 +543,6 @@ static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp)
  #define        XFS_BB_TO_FSB(mp,bb)    \
         (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log)
  #define        XFS_BB_TO_FSBT(mp,bb)   ((bb) >> (mp)->m_blkbb_log)
-#define        XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1))
  
  /*
   * File system block to byte conversions.
diff --git a/include/xfs_trace.h b/include/xfs_trace.h

index 1abcf28460fe13b7a0de056a7fd77e9c1bc44d17..5f32097353b0915f164c18300e4541acfa5f62c5 100644 (file)
--- a/include/xfs_trace.h
+++ b/include/xfs_trace.h
@@ -26,11 +26,14 @@
  #define trace_xfs_alloc_near_greater(a)                ((void) 0)
  #define trace_xfs_alloc_near_lesser(a)         ((void) 0)
  #define trace_xfs_alloc_near_error(a)          ((void) 0)
+#define trace_xfs_alloc_near_noentry(a)                ((void) 0)
+#define trace_xfs_alloc_near_busy(a)           ((void) 0)
  #define trace_xfs_alloc_size_neither(a)                ((void) 0)
  #define trace_xfs_alloc_size_noentry(a)                ((void) 0)
  #define trace_xfs_alloc_size_nominleft(a)      ((void) 0)
  #define trace_xfs_alloc_size_done(a)           ((void) 0)
  #define trace_xfs_alloc_size_error(a)          ((void) 0)
+#define trace_xfs_alloc_size_busy(a)           ((void) 0)
  #define trace_xfs_alloc_small_freelist(a)      ((void) 0)
  #define trace_xfs_alloc_small_notenough(a)     ((void) 0)
  #define trace_xfs_alloc_small_done(a)          ((void) 0)
@@ -91,12 +94,74 @@
  #define trace_xfs_dir2_sf_toino4(a)    ((void) 0)
  #define trace_xfs_dir2_sf_toino8(a)    ((void) 0)
  
+#define trace_xfs_da_node_create(a)            ((void) 0)
+#define trace_xfs_da_split(a)                  ((void) 0)
+#define trace_xfs_attr_leaf_split_before(a)    ((void) 0)
+#define trace_xfs_attr_leaf_split_after(a)     ((void) 0)
+#define trace_xfs_da_root_split(a)             ((void) 0)
+#define trace_xfs_da_node_split(a)             ((void) 0)
+#define trace_xfs_da_node_rebalance(a)         ((void) 0)
+#define trace_xfs_da_node_add(a)               ((void) 0)
+#define trace_xfs_da_join(a)                   ((void) 0)
+#define trace_xfs_da_root_join(a)              ((void) 0)
+#define trace_xfs_da_node_toosmall(a)          ((void) 0)
+#define trace_xfs_da_fixhashpath(a)            ((void) 0)
+#define trace_xfs_da_node_remove(a)            ((void) 0)
+#define trace_xfs_da_node_unbalance(a)         ((void) 0)
+#define trace_xfs_da_link_before(a)            ((void) 0)
+#define trace_xfs_da_link_after(a)             ((void) 0)
+#define trace_xfs_da_unlink_back(a)            ((void) 0)
+#define trace_xfs_da_unlink_forward(a)         ((void) 0)
+#define trace_xfs_da_path_shift(a)             ((void) 0)
+#define trace_xfs_da_grow_inode(a)             ((void) 0)
+#define trace_xfs_da_swap_lastblock(a)         ((void) 0)
+#define trace_xfs_da_shrink_inode(a)           ((void) 0)
+
+#define trace_xfs_attr_sf_create(a)            ((void) 0)
+#define trace_xfs_attr_sf_add(a)               ((void) 0)
+#define trace_xfs_attr_sf_remove(a)            ((void) 0)
+#define trace_xfs_attr_sf_lookup(a)            ((void) 0)
+#define trace_xfs_attr_sf_to_leaf(a)           ((void) 0)
+#define trace_xfs_attr_leaf_to_sf(a)           ((void) 0)
+#define trace_xfs_attr_leaf_to_node(a)         ((void) 0)
+#define trace_xfs_attr_leaf_create(a)          ((void) 0)
+#define trace_xfs_attr_leaf_split(a)           ((void) 0)
+#define trace_xfs_attr_leaf_add_old(a)         ((void) 0)
+#define trace_xfs_attr_leaf_add_new(a)         ((void) 0)
+#define trace_xfs_attr_leaf_add(a)             ((void) 0)
+#define trace_xfs_attr_leaf_add_work(a)                ((void) 0)
+#define trace_xfs_attr_leaf_compact(a)         ((void) 0)
+#define trace_xfs_attr_leaf_rebalance(a)       ((void) 0)
+#define trace_xfs_attr_leaf_toosmall(a)                ((void) 0)
+#define trace_xfs_attr_leaf_remove(a)          ((void) 0)
+#define trace_xfs_attr_leaf_unbalance(a)       ((void) 0)
+#define trace_xfs_attr_leaf_lookup(a)          ((void) 0)
+#define trace_xfs_attr_leaf_clearflag(a)       ((void) 0)
+#define trace_xfs_attr_leaf_setflag(a)         ((void) 0)
+#define trace_xfs_attr_leaf_flipflags(a)       ((void) 0)
+
+#define trace_xfs_attr_sf_addname(a)           ((void) 0)
+#define trace_xfs_attr_leaf_addname(a)         ((void) 0)
+#define trace_xfs_attr_leaf_replace(a)         ((void) 0)
+#define trace_xfs_attr_leaf_removename(a)      ((void) 0)
+#define trace_xfs_attr_leaf_get(a)             ((void) 0)
+#define trace_xfs_attr_node_addname(a)         ((void) 0)
+#define trace_xfs_attr_node_replace(a)         ((void) 0)
+#define trace_xfs_attr_node_removename(a)      ((void) 0)
+#define trace_xfs_attr_fillstate(a)            ((void) 0)
+#define trace_xfs_attr_refillstate(a)          ((void) 0)
+#define trace_xfs_attr_node_get(a)             ((void) 0)
+#define trace_xfs_attr_rmtval_get(a)           ((void) 0)
+#define trace_xfs_attr_rmtval_set(a)           ((void) 0)
+#define trace_xfs_attr_rmtval_remove(a)                ((void) 0)
+
  #define trace_xfs_bmap_pre_update(a,b,c,d)     ((void) 0)
  #define trace_xfs_bmap_post_update(a,b,c,d)    ((void) 0)
  #define trace_xfs_extlist(a,b,c,d)     ((void) 0)
  #define trace_xfs_bunmap(a,b,c,d,e)    ((void) 0)
  
-#define trace_xfs_perag_get(a,b,c,d)   ((void) 0)
-#define trace_xfs_perag_put(a,b,c,d)   ((void) 0)
+/* set c = c to avoid unused var warnings */
+#define trace_xfs_perag_get(a,b,c,d)   ((c) = (c))
+#define trace_xfs_perag_put(a,b,c,d)   ((c) = (c))
  
  #endif /* __TRACE_H__ */
diff --git a/include/xfs_trans.h b/include/xfs_trans.h

index 2870308deae730fd479cd161e34dd97eb4521929..acf1381f75074d08a8c2911a1da017e55bc4afac 100644 (file)
--- a/include/xfs_trans.h
+++ b/include/xfs_trans.h
@@ -163,9 +163,8 @@ typedef struct xfs_trans_header {
   */
  struct xfs_log_item_desc {
         struct xfs_log_item     *lid_item;
-       ushort                  lid_size;
-       unsigned char           lid_flags;
         struct list_head        lid_trans;
+       unsigned char           lid_flags;
  };
  
  #define XFS_LID_DIRTY          0x1
@@ -180,6 +179,8 @@ struct xfs_log_item_desc {
  #define        XFS_TRANS_SYNC          0x08    /* make commit synchronous */
  #define XFS_TRANS_DQ_DIRTY     0x10    /* at least one dquot in trx dirty */
  #define XFS_TRANS_RESERVE      0x20    /* OK to use reserved data blocks */
+#define XFS_TRANS_FREEZE_PROT  0x40    /* Transaction has elevated writer
+                                          count in superblock */
  
  /*
   * Values for call flags parameter.
@@ -323,7 +324,7 @@ typedef struct xfs_log_item {
                                                  struct xfs_log_item *);
                                                         /* buffer item iodone */
                                                         /* callback func */
-       struct xfs_item_ops             *li_ops;        /* function list */
+       const struct xfs_item_ops       *li_ops;        /* function list */
  
         /* delayed logging */
         struct list_head                li_cil;         /* CIL pointers */
@@ -338,37 +339,33 @@ typedef struct xfs_log_item {
         { XFS_LI_IN_AIL,        "IN_AIL" }, \
         { XFS_LI_ABORTED,       "ABORTED" }
  
-typedef struct xfs_item_ops {
+struct xfs_item_ops {
         uint (*iop_size)(xfs_log_item_t *);
         void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
         void (*iop_pin)(xfs_log_item_t *);
         void (*iop_unpin)(xfs_log_item_t *, int remove);
-       uint (*iop_trylock)(xfs_log_item_t *);
+       uint (*iop_push)(struct xfs_log_item *, struct list_head *);
         void (*iop_unlock)(xfs_log_item_t *);
         xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
-       void (*iop_push)(xfs_log_item_t *);
-       void (*iop_pushbuf)(xfs_log_item_t *);
         void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
-} xfs_item_ops_t;
+};
  
  #define IOP_SIZE(ip)           (*(ip)->li_ops->iop_size)(ip)
  #define IOP_FORMAT(ip,vp)      (*(ip)->li_ops->iop_format)(ip, vp)
  #define IOP_PIN(ip)            (*(ip)->li_ops->iop_pin)(ip)
  #define IOP_UNPIN(ip, remove)  (*(ip)->li_ops->iop_unpin)(ip, remove)
-#define IOP_TRYLOCK(ip)                (*(ip)->li_ops->iop_trylock)(ip)
+#define IOP_PUSH(ip, list)     (*(ip)->li_ops->iop_push)(ip, list)
  #define IOP_UNLOCK(ip)         (*(ip)->li_ops->iop_unlock)(ip)
  #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
-#define IOP_PUSH(ip)           (*(ip)->li_ops->iop_push)(ip)
-#define IOP_PUSHBUF(ip)                (*(ip)->li_ops->iop_pushbuf)(ip)
  #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
  
  /*
- * Return values for the IOP_TRYLOCK() routines.
+ * Return values for the IOP_PUSH() routines.
   */
-#define        XFS_ITEM_SUCCESS        0
-#define        XFS_ITEM_PINNED         1
-#define        XFS_ITEM_LOCKED         2
-#define XFS_ITEM_PUSHBUF       3
+#define XFS_ITEM_SUCCESS       0
+#define XFS_ITEM_PINNED                1
+#define XFS_ITEM_LOCKED                2
+#define XFS_ITEM_FLUSHING      3
  
  /*
   * This is the type of function which can be given to xfs_trans_callback()
@@ -445,16 +442,53 @@ typedef struct xfs_trans {
   * XFS transaction mechanism exported interfaces.
   */
  xfs_trans_t    *xfs_trans_alloc(struct xfs_mount *, uint);
-xfs_trans_t    *_xfs_trans_alloc(struct xfs_mount *, uint, uint);
+xfs_trans_t    *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
  xfs_trans_t    *xfs_trans_dup(xfs_trans_t *);
  int            xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
                                   uint, uint);
  void           xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
-struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t,
-                                  int, uint);
-int            xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *,
-                                  struct xfs_buftarg *, xfs_daddr_t, int, uint,
-                                  struct xfs_buf **);
+
+struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
+                                      struct xfs_buftarg *target,
+                                      struct xfs_buf_map *map, int nmaps,
+                                      uint flags);
+
+static inline struct xfs_buf *
+xfs_trans_get_buf(
+       struct xfs_trans        *tp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             blkno,
+       int                     numblks,
+       uint                    flags)
+{
+       DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+       return xfs_trans_get_buf_map(tp, target, &map, 1, flags);
+}
+
+int            xfs_trans_read_buf_map(struct xfs_mount *mp,
+                                      struct xfs_trans *tp,
+                                      struct xfs_buftarg *target,
+                                      struct xfs_buf_map *map, int nmaps,
+                                      xfs_buf_flags_t flags,
+                                      struct xfs_buf **bpp,
+                                      const struct xfs_buf_ops *ops);
+
+static inline int
+xfs_trans_read_buf(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             blkno,
+       int                     numblks,
+       xfs_buf_flags_t         flags,
+       struct xfs_buf          **bpp,
+       const struct xfs_buf_ops *ops)
+{
+       DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+       return xfs_trans_read_buf_map(mp, tp, target, &map, 1,
+                                     flags, bpp, ops);
+}
+
  struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
  
  void           xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
@@ -466,11 +500,8 @@ void               xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
  void           xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
  void           xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
  void           xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
-int            xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
-                              xfs_ino_t , uint, uint, struct xfs_inode **);
  void           xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
-void           xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
-void           xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
+void           xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
  void           xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
  void           xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
  struct xfs_efi_log_item        *xfs_trans_get_efi(xfs_trans_t *, uint);
@@ -486,10 +517,7 @@ void               xfs_trans_log_efd_extent(xfs_trans_t *,
                                          struct xfs_efd_log_item *,
                                          xfs_fsblock_t,
                                          xfs_extlen_t);
-int            _xfs_trans_commit(xfs_trans_t *,
-                                 uint flags,
-                                 int *);
-#define xfs_trans_commit(tp, flags)    _xfs_trans_commit(tp, flags, NULL)
+int            xfs_trans_commit(xfs_trans_t *, uint flags);
  void           xfs_trans_cancel(xfs_trans_t *, int);
  int            xfs_trans_ail_init(struct xfs_mount *);
  void           xfs_trans_ail_destroy(struct xfs_mount *);
diff --git a/include/xfs_types.h b/include/xfs_types.h

index 26d1867d8156b1488597ee0c2ef5f350b6b15cba..e9bd5c3b6a1c49309877ffe21b7fd799b106f154 100644 (file)
--- a/include/xfs_types.h
+++ b/include/xfs_types.h
@@ -33,7 +33,6 @@ typedef signed long long int  __int64_t;
  typedef unsigned long long int __uint64_t;
  
  typedef enum { B_FALSE,B_TRUE }        boolean_t;
-typedef __uint32_t             prid_t;         /* project ID */
  typedef __uint32_t             inst_t;         /* an instruction */
  
  typedef __s64                  xfs_off_t;      /* <file offset> type */
@@ -56,7 +55,10 @@ typedef __uint64_t __psunsigned_t;
  
  #endif /* __KERNEL__ */
  
+typedef __uint32_t     prid_t;         /* project ID */
+
  typedef __uint32_t     xfs_agblock_t;  /* blockno in alloc. group */
+typedef        __uint32_t      xfs_agino_t;    /* inode # within allocation grp */
  typedef        __uint32_t      xfs_extlen_t;   /* extent length in blocks */
  typedef        __uint32_t      xfs_agnumber_t; /* allocation group number */
  typedef __int32_t      xfs_extnum_t;   /* # of extents in a file */
@@ -73,8 +75,6 @@ typedef       __int32_t       xfs_tid_t;      /* transaction identifier */
  typedef        __uint32_t      xfs_dablk_t;    /* dir/attr block number (in file) */
  typedef        __uint32_t      xfs_dahash_t;   /* dir/attr hash value */
  
-typedef __uint32_t     xlog_tid_t;     /* transaction ID type */
-
  /*
   * These types are 64 bits on disk but are either 32 or 64 bits in memory.
   * Disk based types:
@@ -103,6 +103,7 @@ typedef __uint64_t  xfs_fileoff_t;  /* block number in a file */
  typedef __int64_t      xfs_sfiloff_t;  /* signed block number in a file */
  typedef __uint64_t     xfs_filblks_t;  /* number of blocks in a file */
  
+
  /*
   * Null values for the types.
   */
@@ -122,6 +123,9 @@ typedef __uint64_t  xfs_filblks_t;  /* number of blocks in a file */
  
  #define NULLCOMMITLSN  ((xfs_lsn_t)-1)
  
+#define        NULLFSINO       ((xfs_ino_t)-1)
+#define        NULLAGINO       ((xfs_agino_t)-1)
+
  /*
   * Max values for extlen, extnum, aextnum.
   */
@@ -129,6 +133,20 @@ typedef __uint64_t xfs_filblks_t;  /* number of blocks in a file */
  #define        MAXEXTNUM       ((xfs_extnum_t)0x7fffffff)      /* signed int */
  #define        MAXAEXTNUM      ((xfs_aextnum_t)0x7fff)         /* signed short */
  
+/*
+ * Minimum and maximum blocksize and sectorsize.
+ * The blocksize upper limit is pretty much arbitrary.
+ * The sectorsize upper limit is due to sizeof(sb_sectsize).
+ */
+#define XFS_MIN_BLOCKSIZE_LOG  9       /* i.e. 512 bytes */
+#define XFS_MAX_BLOCKSIZE_LOG  16      /* i.e. 65536 bytes */
+#define XFS_MIN_BLOCKSIZE      (1 << XFS_MIN_BLOCKSIZE_LOG)
+#define XFS_MAX_BLOCKSIZE      (1 << XFS_MAX_BLOCKSIZE_LOG)
+#define XFS_MIN_SECTORSIZE_LOG 9       /* i.e. 512 bytes */
+#define XFS_MAX_SECTORSIZE_LOG 15      /* i.e. 32768 bytes */
+#define XFS_MIN_SECTORSIZE     (1 << XFS_MIN_SECTORSIZE_LOG)
+#define XFS_MAX_SECTORSIZE     (1 << XFS_MAX_SECTORSIZE_LOG)
+
  /*
   * Min numbers of data/attr fork btree root pointers.
   */
diff --git a/libxfs/Makefile b/libxfs/Makefile

index b3fd85cfdf8d295bdb34b0a46b0a3446bf9e01bf..dc9b22cef3638dac242d981464880d7e3dd53478 100644 (file)
--- a/libxfs/Makefile
+++ b/libxfs/Makefile
@@ -10,7 +10,7 @@ LT_CURRENT = 0
  LT_REVISION = 0
  LT_AGE = 0
  
-HFILES = xfs.h init.h
+HFILES = xfs.h init.h xfs_dir2_priv.h
  CFILES = cache.c init.c kmem.c logitem.c radix-tree.c rdwr.c trans.c util.c \
         xfs_alloc.c xfs_ialloc.c xfs_inode.c xfs_btree.c xfs_alloc_btree.c \
         xfs_ialloc_btree.c xfs_bmap_btree.c xfs_da_btree.c \
diff --git a/libxfs/init.c b/libxfs/init.c

index fce344512c41f3a1b35fe4e59f9c5c13a3cf40ab..71da69bc32d7654241a536d8b2443e986421e5a8 100644 (file)
--- a/libxfs/init.c
+++ b/libxfs/init.c
@@ -371,7 +371,6 @@ manage_zones(int release)
         extern kmem_zone_t      *xfs_ili_zone;
         extern kmem_zone_t      *xfs_inode_zone;
         extern kmem_zone_t      *xfs_ifork_zone;
-       extern kmem_zone_t      *xfs_dabuf_zone;
         extern kmem_zone_t      *xfs_buf_item_zone;
         extern kmem_zone_t      *xfs_da_state_zone;
         extern kmem_zone_t      *xfs_btree_cur_zone;
@@ -383,7 +382,6 @@ manage_zones(int release)
                 kmem_free(xfs_buf_zone);
                 kmem_free(xfs_inode_zone);
                 kmem_free(xfs_ifork_zone);
-               kmem_free(xfs_dabuf_zone);
                 kmem_free(xfs_buf_item_zone);
                 kmem_free(xfs_da_state_zone);
                 kmem_free(xfs_btree_cur_zone);
@@ -395,7 +393,6 @@ manage_zones(int release)
         xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buffer");
         xfs_inode_zone = kmem_zone_init(sizeof(xfs_inode_t), "xfs_inode");
         xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-       xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
         xfs_ili_zone = kmem_zone_init(
                         sizeof(xfs_inode_log_item_t), "xfs_inode_log_item");
         xfs_buf_item_zone = kmem_zone_init(
diff --git a/libxfs/logitem.c b/libxfs/logitem.c

index 116d3edfd6dd59d1fdbc269871f0763148a33c7a..84e4c1498950c46240ab269d67de749ae7bed4d6 100644 (file)
--- a/libxfs/logitem.c
+++ b/libxfs/logitem.c
@@ -32,21 +32,27 @@ kmem_zone_t *xfs_ili_zone;          /* inode log item zone */
  xfs_buf_t *
  xfs_trans_buf_item_match(
         xfs_trans_t             *tp,
-       xfs_buftarg_t           *target,
-       xfs_daddr_t             blkno,
-       int                     len)
+       dev_t                   dev,
+       struct xfs_buf_map      *map,
+       int                     nmaps)
  {
          struct xfs_log_item_desc *lidp;
          struct xfs_buf_log_item *blip;
+       int                     len = 0;
+       int                     i;
+
+       for (i = 0; i < nmaps; i++)
+               len += map[i].bm_len;
  
-        len = BBTOB(len);
          list_for_each_entry(lidp, &tp->t_items, lid_trans) {
                  blip = (struct xfs_buf_log_item *)lidp->lid_item;
                  if (blip->bli_item.li_type == XFS_LI_BUF &&
-                    XFS_BUF_TARGET(blip->bli_buf) == target->dev &&
-                    XFS_BUF_ADDR(blip->bli_buf) == blkno &&
-                    XFS_BUF_COUNT(blip->bli_buf) == len)
+                   blip->bli_buf->b_dev == dev &&
+                   XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn &&
+                   blip->bli_buf->b_bcount == BBTOB(len)) {
+                       ASSERT(blip->bli_buf->b_map_count == nmaps);
                          return blip->bli_buf;
+               }
          }
  
          return NULL;
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c

index 432a1af5fe792148f0ecd4a704a265e563716031..e75edd0a78a3c4f8d325a002eca6b854cb708e4b 100644 (file)
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -159,7 +159,7 @@ libxfs_log_header(
                 head->h_len = cpu_to_be32(sunit - BBSIZE);
         else
                 head->h_len = cpu_to_be32(20);
-       head->h_chksum = cpu_to_be32(0);
+       head->h_crc = cpu_to_be32(0);
         head->h_prev_block = cpu_to_be32(-1);
         head->h_num_logops = cpu_to_be32(1);
         head->h_cycle_data[0] = cpu_to_be32(0xb0c0d0d0);
@@ -193,72 +193,86 @@ libxfs_log_header(
  #ifdef XFS_BUF_TRACING
  
  #undef libxfs_readbuf
+#undef libxfs_readbuf_map
  #undef libxfs_writebuf
  #undef libxfs_getbuf
+#undef libxfs_getbuf_map
  #undef libxfs_getbuf_flags
  #undef libxfs_putbuf
  
-xfs_buf_t      *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
+xfs_buf_t      *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
+xfs_buf_t      *libxfs_readbuf_map(dev_t, struct xfs_buf_map *, int, int);
  int            libxfs_writebuf(xfs_buf_t *, int);
-xfs_buf_t      *libxfs_getbuf(dev_t, xfs_daddr_t, int);
+xfs_buf_t      *libxfs_getbuf(dev_t, xfs_daddr_t, int);
+xfs_buf_t      *libxfs_getbuf_map(dev_t, struct xfs_buf_map *, int);
+xfs_buf_t      *libxfs_getbuf_flags(dev_t, xfs_daddr_t, int, unsigned int);
  void           libxfs_putbuf (xfs_buf_t *);
  
+#define        __add_trace(bp, func, file, line)       \
+do {                                           \
+       if (bp) {                               \
+               (bp)->b_func = (func);          \
+               (bp)->b_file = (file);          \
+               (bp)->b_line = (line);          \
+       }                                       \
+} while (0)
+
  xfs_buf_t *
-libxfs_trace_readbuf(const char *func, const char *file, int line, dev_t dev, xfs_daddr_t blkno, int len, int flags)
+libxfs_trace_readbuf(const char *func, const char *file, int line,
+               dev_t dev, xfs_daddr_t blkno, int len, int flags)
  {
         xfs_buf_t       *bp = libxfs_readbuf(dev, blkno, len, flags);
+       __add_trace(bp, func, file, line);
+       return bp;
+}
  
-       if (bp){
-               bp->b_func = func;
-               bp->b_file = file;
-               bp->b_line = line;
-       }
-
+xfs_buf_t *
+libxfs_trace_readbuf_map(const char *func, const char *file, int line,
+               dev_t dev, struct xfs_buf_map *map, int nmaps, int flags)
+{
+       xfs_buf_t       *bp = libxfs_readbuf_map(dev, map, nmaps, flags);
+       __add_trace(bp, func, file, line);
         return bp;
  }
  
  int
  libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags)
  {
-       bp->b_func = func;
-       bp->b_file = file;
-       bp->b_line = line;
-
+       __add_trace(bp, func, file, line);
         return libxfs_writebuf(bp, flags);
  }
  
  xfs_buf_t *
-libxfs_trace_getbuf(const char *func, const char *file, int line, dev_t device, xfs_daddr_t blkno, int len)
+libxfs_trace_getbuf(const char *func, const char *file, int line,
+               dev_t device, xfs_daddr_t blkno, int len)
  {
         xfs_buf_t       *bp = libxfs_getbuf(device, blkno, len);
+       __add_trace(bp, func, file, line);
+       return bp;
+}
  
-       bp->b_func = func;
-       bp->b_file = file;
-       bp->b_line = line;
-
+xfs_buf_t *
+libxfs_trace_getbuf_map(const char *func, const char *file, int line,
+               dev_t device, struct xfs_buf_map *map, int nmaps)
+{
+       xfs_buf_t       *bp = libxfs_getbuf_map(device, map, nmaps);
+       __add_trace(bp, func, file, line);
         return bp;
  }
  
  xfs_buf_t *
  libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
-               dev_t device, xfs_daddr_t blkno, int len, unsigned long flags)
+               dev_t device, xfs_daddr_t blkno, int len, unsigned int flags)
  {
-       xfs_buf_t       *bp = libxfs_getbuf(device, blkno, len, flags);
-
-       bp->b_func = func;
-       bp->b_file = file;
-       bp->b_line = line;
-
+       xfs_buf_t       *bp = libxfs_getbuf_flags(device, blkno, len, flags);
+       __add_trace(bp, func, file, line);
         return bp;
  }
  
  void
  libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
  {
-       bp->b_func = func;
-       bp->b_file = file;
-       bp->b_line = line;
-
+       __add_trace(bp, func, file, line);
         libxfs_putbuf(bp);
  }
  
@@ -279,30 +293,40 @@ static struct cache_mru           xfs_buf_freelist =
         {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
          0, PTHREAD_MUTEX_INITIALIZER };
  
-typedef struct {
-       dev_t           device;
-       xfs_daddr_t     blkno;
-       unsigned int    bblen;
-} xfs_bufkey_t;
+/*
+ * The bufkey is used to pass the new buffer information to the cache object
+ * allocation routine. Because discontiguous buffers need to pass different
+ * information, we need fields to pass that information. However, because the
+ * blkno and bblen is needed for the initial cache entry lookup (i.e. for
+ * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
+ * buffer initialisation instead of a contiguous buffer.
+ */
+struct xfs_bufkey {
+       dev_t                   device;
+       xfs_daddr_t             blkno;
+       unsigned int            bblen;
+       struct xfs_buf_map      *map;
+       int                     nmaps;
+};
  
  static unsigned int
  libxfs_bhash(cache_key_t key, unsigned int hashsize)
  {
-       return (((unsigned int)((xfs_bufkey_t *)key)->blkno) >> 5) % hashsize;
+       return (((unsigned int)((struct xfs_bufkey *)key)->blkno) >> 5) % hashsize;
  }
  
  static int
  libxfs_bcompare(struct cache_node *node, cache_key_t key)
  {
-       xfs_buf_t       *bp = (xfs_buf_t *)node;
-       xfs_bufkey_t    *bkey = (xfs_bufkey_t *)key;
+       struct xfs_buf  *bp = (struct xfs_buf *)node;
+       struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
  
  #ifdef IO_BCOMPARE_CHECK
         if (bp->b_dev == bkey->device &&
             bp->b_blkno == bkey->blkno &&
             bp->b_bcount != BBTOB(bkey->bblen))
                 fprintf(stderr, "%lx: Badness in key lookup (length)\n"
-                       "bp=(bno %llu, len %u bytes) key=(bno %llu, len %u bytes)\n",
+                       "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
                         pthread_self(),
                         (unsigned long long)bp->b_blkno, (int)bp->b_bcount,
                         (unsigned long long)bkey->blkno, BBTOB(bkey->bblen));
@@ -322,11 +346,12 @@ libxfs_bprint(xfs_buf_t *bp)
  }
  
  static void
-libxfs_initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes)
+__initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes)
  {
         bp->b_flags = 0;
         bp->b_blkno = bno;
         bp->b_bcount = bytes;
+       bp->b_length = BTOBB(bytes);
         bp->b_dev = device;
         bp->b_error = 0;
         if (!bp->b_addr)
@@ -346,11 +371,44 @@ libxfs_initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes)
         bp->b_recur = 0;
  }
  
+static void
+libxfs_initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes)
+{
+       __initbuf(bp, device, bno, bytes);
+}
+
+static void
+libxfs_initbuf_map(xfs_buf_t *bp, dev_t device, struct xfs_buf_map *map, int nmaps)
+{
+       unsigned int bytes = 0;
+       int i;
+
+       bytes = sizeof(struct xfs_buf_map) * nmaps;
+       bp->b_map = malloc(bytes);
+       if (!bp->b_map) {
+               fprintf(stderr,
+                       _("%s: %s can't malloc %u bytes: %s\n"),
+                       progname, __FUNCTION__, bytes,
+                       strerror(errno));
+               exit(1);
+       }
+       bp->b_nmaps = nmaps;
+
+       bytes = 0;
+       for ( i = 0; i < nmaps; i++) {
+               bp->b_map[i].bm_bn = map[i].bm_bn;
+               bp->b_map[i].bm_len = map[i].bm_len;
+               bytes += BBTOB(map[i].bm_len);
+       }
+
+       __initbuf(bp, device, map[0].bm_bn, bytes);
+       bp->b_flags |= LIBXFS_B_DISCONTIG;
+}
+
  xfs_buf_t *
-libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen)
+__libxfs_getbufr(int blen)
  {
         xfs_buf_t       *bp;
-       int             blen = BBTOB(bblen);
  
         /*
          * first look for a buffer that can be used as-is,
@@ -372,15 +430,27 @@ libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen)
                         list_del_init(&bp->b_node.cn_mru);
                         free(bp->b_addr);
                         bp->b_addr = NULL;
+                       free(bp->b_map);
+                       bp->b_map = NULL;
                 }
         } else
                 bp = kmem_zone_zalloc(xfs_buf_zone, 0);
         pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
  
-       if (bp != NULL)
+       return bp;
+}
+
+xfs_buf_t *
+libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen)
+{
+       xfs_buf_t       *bp;
+       int             blen = BBTOB(bblen);
+
+       bp =__libxfs_getbufr(blen);
+       if (bp)
                 libxfs_initbuf(bp, device, blkno, blen);
  #ifdef IO_DEBUG
-       printf("%lx: %s: allocated %u bytes buffer, key=%llu(%llu), %p\n",
+       printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
                 pthread_self(), __FUNCTION__, blen,
                 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
  #endif
@@ -388,6 +458,38 @@ libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen)
         return bp;
  }
  
+xfs_buf_t *
+libxfs_getbufr_map(dev_t device, xfs_daddr_t blkno, int bblen,
+               struct xfs_buf_map *map, int nmaps)
+{
+       xfs_buf_t       *bp;
+       int             blen = BBTOB(bblen);
+
+       if (!map || !nmaps) {
+               fprintf(stderr,
+                       _("%s: %s invalid map %p or nmaps %d\n"),
+                       progname, __FUNCTION__, map, nmaps);
+               exit(1);
+       }
+
+       if (blkno != map[0].bm_bn) {
+               fprintf(stderr,
+                       _("%s: %s map blkno %lx doesn't match key %lx\n"),
+                       progname, __FUNCTION__, map[0].bm_bn, blkno);
+               exit(1);
+       }
+
+       bp =__libxfs_getbufr(blen);
+       if (bp)
+               libxfs_initbuf_map(bp, device, map, nmaps);
+#ifdef IO_DEBUG
+       printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
+               pthread_self(), __FUNCTION__, blen,
+               (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
+#endif
+
+       return bp;
+}
  
  #ifdef XFS_BUF_TRACING
  struct list_head       lock_buf_list = {&lock_buf_list, &lock_buf_list};
@@ -396,18 +498,12 @@ int                       lock_buf_count = 0;
  
  extern int     use_xfs_buf_lock;
  
-struct xfs_buf *
-libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags)
+static struct xfs_buf *
+__cache_lookup(struct xfs_bufkey *key, unsigned int flags)
  {
-       xfs_buf_t       *bp;
-       xfs_bufkey_t    key;
-       int             miss;
-
-       key.device = device;
-       key.blkno = blkno;
-       key.bblen = len;
+       struct xfs_buf  *bp;
  
-       miss = cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp);
+       cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp);
         if (!bp)
                 return NULL;
  
@@ -423,7 +519,7 @@ libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags
                         if (pthread_equal(bp->b_holder, pthread_self())) {
                                 fprintf(stderr,
         _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
-                                       blkno);
+                                       key->blkno);
                                 bp->b_recur++;
                                 return bp;
                         } else {
@@ -444,9 +540,9 @@ libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags
         pthread_mutex_unlock(&libxfs_bcache->c_mutex);
  #endif
  #ifdef IO_DEBUG
-       printf("%lx %s: %s buffer %p for bno = %llu\n",
-               pthread_self(), __FUNCTION__, miss ? "miss" : "hit",
-               bp, (long long)LIBXFS_BBTOOFF64(blkno));
+       printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n",
+               pthread_self(), __FUNCTION__,
+               bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno));
  #endif
  
         return bp;
@@ -455,12 +551,41 @@ out_put:
         return NULL;
  }
  
+struct xfs_buf *
+libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags)
+{
+       struct xfs_bufkey key = {0};
+
+       key.device = device;
+       key.blkno = blkno;
+       key.bblen = len;
+
+       return __cache_lookup(&key, flags);
+}
+
  struct xfs_buf *
  libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
  {
         return libxfs_getbuf_flags(device, blkno, len, 0);
  }
  
+struct xfs_buf *
+libxfs_getbuf_map(dev_t device, struct xfs_buf_map *map, int nmaps)
+{
+       struct xfs_bufkey key = {0};
+       int i;
+
+       key.device = device;
+       key.blkno = map[0].bm_bn;
+       for (i = 0; i < nmaps; i++) {
+               key.bblen += map[i].bm_len;
+       }
+       key.map = map;
+       key.nmaps = nmaps;
+
+       return __cache_lookup(&key, 0);
+}
+
  void
  libxfs_putbuf(xfs_buf_t *bp)
  {
@@ -485,7 +610,7 @@ libxfs_putbuf(xfs_buf_t *bp)
  void
  libxfs_purgebuf(xfs_buf_t *bp)
  {
-       xfs_bufkey_t    key;
+       struct xfs_bufkey key = {0};
  
         key.device = bp->b_dev;
         key.blkno = bp->b_blkno;
@@ -497,47 +622,62 @@ libxfs_purgebuf(xfs_buf_t *bp)
  static struct cache_node *
  libxfs_balloc(cache_key_t key)
  {
-       xfs_bufkey_t    *bufkey = (xfs_bufkey_t *)key;
+       struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
  
+       if (bufkey->map)
+               return (struct cache_node *)
+                      libxfs_getbufr_map(bufkey->device,
+                                         bufkey->blkno, bufkey->bblen,
+                                         bufkey->map, bufkey->nmaps);
         return (struct cache_node *)libxfs_getbufr(bufkey->device,
-                                       bufkey->blkno, bufkey->bblen);
+                                         bufkey->blkno, bufkey->bblen);
  }
  
-int
-libxfs_readbufr(dev_t dev, xfs_daddr_t blkno, xfs_buf_t *bp, int len, int flags)
+
+static int
+__read_buf(int fd, void *buf, int len, off64_t offset, int flags)
  {
-       int     fd = libxfs_device_to_fd(dev);
-       int     bytes = BBTOB(len);
-       int     error;
         int     sts;
  
-       ASSERT(BBTOB(len) <= bp->b_bcount);
-
-       sts = pread64(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno));
+       sts = pread64(fd, buf, len, offset);
         if (sts < 0) {
-               error = errno;
+               int error = errno;
                 fprintf(stderr, _("%s: read failed: %s\n"),
                         progname, strerror(error));
                 if (flags & LIBXFS_EXIT_ON_FAILURE)
                         exit(1);
                 return error;
-       } else if (sts != bytes) {
+       } else if (sts != len) {
                 fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
-                       progname, sts, bytes);
+                       progname, sts, len);
                 if (flags & LIBXFS_EXIT_ON_FAILURE)
                         exit(1);
                 return EIO;
         }
-#ifdef IO_DEBUG
-       printf("%lx: %s: read %u bytes, blkno=%llu(%llu), %p\n",
-               pthread_self(), __FUNCTION__, bytes,
-               (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
-#endif
-       if (bp->b_dev == dev &&
+       return 0;
+}
+
+int
+libxfs_readbufr(dev_t dev, xfs_daddr_t blkno, xfs_buf_t *bp, int len, int flags)
+{
+       int     fd = libxfs_device_to_fd(dev);
+       int     bytes = BBTOB(len);
+       int     error;
+
+       ASSERT(BBTOB(len) <= bp->b_bcount);
+
+       error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
+       if (!error &&
+           bp->b_dev == dev &&
             bp->b_blkno == blkno &&
             bp->b_bcount == bytes)
                 bp->b_flags |= LIBXFS_B_UPTODATE;
-       return 0;
+#ifdef IO_DEBUG
+       printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n",
+               pthread_self(), __FUNCTION__, bytes, error,
+               (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
+#endif
+       return error;
  }
  
  xfs_buf_t *
@@ -555,37 +695,113 @@ libxfs_readbuf(dev_t dev, xfs_daddr_t blkno, int len, int flags)
         return bp;
  }
  
-int
-libxfs_writebufr(xfs_buf_t *bp)
+struct xfs_buf *
+libxfs_readbuf_map(dev_t dev, struct xfs_buf_map *map, int nmaps, int flags)
+{
+       xfs_buf_t       *bp;
+       int             error = 0;
+       int             fd;
+       int             i;
+       char            *buf;
+
+       if (nmaps == 1)
+               return libxfs_readbuf(dev, map[0].bm_bn, map[0].bm_len, flags);
+
+       bp = libxfs_getbuf_map(dev, map, nmaps);
+       if (!bp || (bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY)))
+               return bp;
+
+       ASSERT(bp->b_nmaps = nmaps);
+
+       fd = libxfs_device_to_fd(dev);
+       buf = bp->b_addr;
+       for (i = 0; i < bp->b_nmaps; i++) {
+               off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn);
+               int len = BBTOB(bp->b_map[i].bm_len);
+
+               ASSERT(bp->b_map[i].bm_bn == map[i].bm_bn);
+               ASSERT(bp->b_map[i].bm_len == map[i].bm_len);
+
+               error = __read_buf(fd, buf, len, offset, flags);
+               if (error) {
+                       bp->b_error = error;
+                       break;
+               }
+               buf += len;
+               offset += len;
+       }
+
+       if (!error)
+               bp->b_flags |= LIBXFS_B_UPTODATE;
+#ifdef IO_DEBUG
+       printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
+               pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
+               (long long)LIBXFS_BBTOOFF64(bp->b_blkno), (long long)bp->b_blkno, bp);
+#endif
+       return bp;
+}
+
+static int
+__write_buf(int fd, void *buf, int len, off64_t offset, int flags)
  {
         int     sts;
-       int     fd = libxfs_device_to_fd(bp->b_dev);
-       int     error;
  
-       sts = pwrite64(fd, bp->b_addr, bp->b_bcount, LIBXFS_BBTOOFF64(bp->b_blkno));
+       sts = pwrite64(fd, buf, len, offset);
         if (sts < 0) {
-               error = errno;
+               int error = errno;
                 fprintf(stderr, _("%s: pwrite64 failed: %s\n"),
                         progname, strerror(error));
-               if (bp->b_flags & LIBXFS_B_EXIT)
+               if (flags & LIBXFS_B_EXIT)
                         exit(1);
                 return error;
-       } else if (sts != bp->b_bcount) {
-               fprintf(stderr, _("%s: error - wrote only %d of %d bytes\n"),
-                       progname, sts, bp->b_bcount);
-               if (bp->b_flags & LIBXFS_B_EXIT)
+       } else if (sts != len) {
+               fprintf(stderr, _("%s: error - pwrite64 only %d of %d bytes\n"),
+                       progname, sts, len);
+               if (flags & LIBXFS_B_EXIT)
                         exit(1);
                 return EIO;
         }
+       return 0;
+}
+
+int
+libxfs_writebufr(xfs_buf_t *bp)
+{
+       int     fd = libxfs_device_to_fd(bp->b_dev);
+       int     error = 0;
+
+       if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) {
+               error = __write_buf(fd, bp->b_addr, bp->b_bcount,
+                                   LIBXFS_BBTOOFF64(bp->b_blkno), bp->b_flags);
+       } else {
+               int     i;
+               char    *buf = bp->b_addr;
+
+               for (i = 0; i < bp->b_nmaps; i++) {
+                       off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn);
+                       int len = BBTOB(bp->b_map[i].bm_len);
+
+                       error = __write_buf(fd, buf, len, offset, bp->b_flags);
+                       if (error) {
+                               bp->b_error = error;
+                               break;
+                       }
+                       buf += len;
+                       offset += len;
+               }
+       }
+
  #ifdef IO_DEBUG
         printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p\n",
                         pthread_self(), __FUNCTION__, bp->b_bcount,
                         (long long)LIBXFS_BBTOOFF64(bp->b_blkno),
                         (long long)bp->b_blkno, bp);
  #endif
-       bp->b_flags |= LIBXFS_B_UPTODATE;
-       bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT);
-       return 0;
+       if (!error) {
+               bp->b_flags |= LIBXFS_B_UPTODATE;
+               bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT);
+       }
+       return error;
  }
  
  int
@@ -609,7 +825,7 @@ libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
  #ifdef IO_DEBUG
         if (boff + len > bp->b_bcount) {
                 printf("Badness, iomove out of range!\n"
-                       "bp=(bno %llu, bytes %u) range=(boff %u, bytes %u)\n",
+                       "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n",
                         (long long)bp->b_blkno, bp->b_bcount, boff, len);
                 abort();
         }
@@ -742,7 +958,10 @@ libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags,
                 fprintf(stderr, "%s: allocated inode, ino=%llu(%llu), %p\n",
                         __FUNCTION__, (unsigned long long)ino, bno, ip);
  #endif
-               if ((error = libxfs_iread(mp, tp, ino, ip, bno))) {
+               ip->i_ino = ino;
+               ip->i_mount = mp;
+               error = xfs_iread(mp, tp, ip, bno);
+               if (error) {
                         cache_node_purge(libxfs_icache, &ino,
                                         (struct cache_node *)ip);
                         ip = NULL;
diff --git a/libxfs/trans.c b/libxfs/trans.c

index a745d515796dca142e522c069c918c5e9c9df1b0..7cb3c8c57d9f1a8b107efcd683bd2cc97526fb33 100644 (file)
--- a/libxfs/trans.c
+++ b/libxfs/trans.c
@@ -252,7 +252,7 @@ xfs_trans_log_inode(
          * this coordination mechanism.
          */
         flags |= ip->i_itemp->ili_last_fields;
-       ip->i_itemp->ili_format.ilf_fields |= flags;
+       ip->i_itemp->ili_fields |= flags;
  }
  
  /*
@@ -338,7 +338,7 @@ libxfs_trans_binval(
         if (bip->bli_flags & XFS_BLI_STALE)
                 return;
         XFS_BUF_UNDELAYWRITE(bp);
-       XFS_BUF_STALE(bp);
+       xfs_buf_stale(bp);
         bip->bli_flags |= XFS_BLI_STALE;
         bip->bli_flags &= ~XFS_BLI_DIRTY;
         bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
@@ -383,22 +383,20 @@ libxfs_trans_bhold(
  }
  
  xfs_buf_t *
-libxfs_trans_get_buf(
+libxfs_trans_get_buf_map(
         xfs_trans_t             *tp,
         dev_t                   dev,
-       xfs_daddr_t             d,
-       int                     len,
+       struct xfs_buf_map      *map,
+       int                     nmaps,
         uint                    f)
  {
         xfs_buf_t               *bp;
         xfs_buf_log_item_t      *bip;
-       xfs_buftarg_t           bdev;
  
         if (tp == NULL)
-               return libxfs_getbuf(dev, d, len);
+               return libxfs_getbuf_map(dev, map, nmaps);
  
-       bdev.dev = dev;
-       bp = xfs_trans_buf_item_match(tp, &bdev, d, len);
+       bp = xfs_trans_buf_item_match(tp, dev, map, nmaps);
         if (bp != NULL) {
                 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
                 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
@@ -407,7 +405,7 @@ libxfs_trans_get_buf(
                 return bp;
         }
  
-       bp = libxfs_getbuf(dev, d, len);
+       bp = libxfs_getbuf_map(dev, map, nmaps);
         if (bp == NULL)
                 return NULL;
  #ifdef XACT_DEBUG
@@ -432,15 +430,13 @@ libxfs_trans_getsb(
  {
         xfs_buf_t               *bp;
         xfs_buf_log_item_t      *bip;
-       xfs_buftarg_t           bdev;
-       int                     len;
+       int                     len = XFS_FSS_TO_BB(mp, 1);
+       DEFINE_SINGLE_BUF_MAP(map, XFS_SB_DADDR, len);
  
         if (tp == NULL)
                 return libxfs_getsb(mp, flags);
  
-       bdev.dev = mp->m_dev;
-       len = XFS_FSS_TO_BB(mp, 1);
-       bp = xfs_trans_buf_item_match(tp, &bdev, XFS_SB_DADDR, len);
+       bp = xfs_trans_buf_item_match(tp, mp->m_dev, &map, 1);
         if (bp != NULL) {
                 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
                 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
@@ -465,24 +461,24 @@ libxfs_trans_getsb(
  }
  
  int
-libxfs_trans_read_buf(
+libxfs_trans_read_buf_map(
         xfs_mount_t             *mp,
         xfs_trans_t             *tp,
         dev_t                   dev,
-       xfs_daddr_t             blkno,
-       int                     len,
+       struct xfs_buf_map      *map,
+       int                     nmaps,
         uint                    flags,
-       xfs_buf_t               **bpp)
+       xfs_buf_t               **bpp,
+       const struct xfs_buf_ops *ops)
  {
         xfs_buf_t               *bp;
         xfs_buf_log_item_t      *bip;
-       xfs_buftarg_t           bdev;
         int                     error;
  
         *bpp = NULL;
  
         if (tp == NULL) {
-               bp = libxfs_readbuf(dev, blkno, len, flags);
+               bp = libxfs_readbuf_map(dev, map, nmaps, flags);
                 if (!bp) {
                         return (flags & XBF_TRYLOCK) ?
                                 EAGAIN : XFS_ERROR(ENOMEM);
@@ -492,8 +488,7 @@ libxfs_trans_read_buf(
                 goto done;
         }
  
-       bdev.dev = dev;
-       bp = xfs_trans_buf_item_match(tp, &bdev, blkno, len);
+       bp = xfs_trans_buf_item_match(tp, dev, map, nmaps);
         if (bp != NULL) {
                 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
                 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
@@ -502,7 +497,7 @@ libxfs_trans_read_buf(
                 goto done;
         }
  
-       bp = libxfs_readbuf(dev, blkno, len, flags);
+       bp = libxfs_readbuf_map(dev, map, nmaps, flags);
         if (!bp) {
                 return (flags & XBF_TRYLOCK) ?
                         EAGAIN : XFS_ERROR(ENOMEM);
@@ -588,7 +583,7 @@ inode_item_done(
         mp = iip->ili_item.li_mountp;
         ASSERT(ip != NULL);
  
-       if (!(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) {
+       if (!(iip->ili_fields & XFS_ILOG_ALL)) {
                 ip->i_transp = NULL;    /* disassociate from transaction */
                 iip->ili_flags = 0;     /* reset all flags */
                 goto ili_done;
@@ -597,9 +592,9 @@ inode_item_done(
         /*
          * Get the buffer containing the on-disk inode.
          */
-       error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0);
+       error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, 0, 0);
         if (error) {
-               fprintf(stderr, _("%s: warning - itobp failed (%d)\n"),
+               fprintf(stderr, _("%s: warning - imap_to_bp failed (%d)\n"),
                         progname, error);
                 goto ili_done;
         }
@@ -674,6 +669,7 @@ trans_committed(
                 struct xfs_log_item *lip = lidp->lid_item;
  
                  xfs_trans_del_item(lip);
+
                 if (lip->li_type == XFS_LI_BUF)
                         buf_item_done((xfs_buf_log_item_t *)lip);
                 else if (lip->li_type == XFS_LI_INODE)
diff --git a/libxfs/util.c b/libxfs/util.c

index bffbac0578c78b2017cc92150333af1da216ab80..2ad4bfd19ef042969154d09f28bf5799e1353e6a 100644 (file)
--- a/libxfs/util.c
+++ b/libxfs/util.c
@@ -49,130 +49,6 @@ libxfs_trans_ichgtime(
         }
  }
  
-/*
- * Given a mount structure and an inode number, return a pointer
- * to a newly allocated in-core inode coresponding to the given
- * inode number.
- *
- * Initialize the inode's attributes and extent pointers if it
- * already has them (it will not if the inode has no links).
- *
- * NOTE: this has slightly different behaviour to the kernel in
- * that this version requires the already allocated *ip being 
- * passed in while the kernel version does the allocation and 
- * returns it in **ip.
- */
-int
-libxfs_iread(
-       xfs_mount_t     *mp,
-       xfs_trans_t     *tp,
-       xfs_ino_t       ino,
-       xfs_inode_t     *ip,
-       xfs_daddr_t     bno)
-{
-       xfs_buf_t       *bp;
-       xfs_dinode_t    *dip;
-       int             error;
-
-       ip->i_ino = ino;
-       ip->i_mount = mp;
-
-        /*
-         * Fill in the location information in the in-core inode.
-         */
-        error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, 0);
-        if (error)
-                return error;
-
-        /*
-         * Get pointers to the on-disk inode and the buffer containing it.
-         */
-        error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, XBF_LOCK, 0);
-        if (error)
-                return error;
-        dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
-
-       /*
-        * If we got something that isn't an inode it means someone
-        * (nfs or dmi) has a stale handle.
-        */
-       if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
-               xfs_trans_brelse(tp, bp);
-               return EINVAL;
-       }
-
-       /*
-        * If the on-disk inode is already linked to a directory
-        * entry, copy all of the inode into the in-core inode.
-        * xfs_iformat() handles copying in the inode format
-        * specific information.
-        * Otherwise, just get the truly permanent information.
-        */
-       if (dip->di_mode) {
-               xfs_dinode_from_disk(&ip->i_d, dip);
-               error = xfs_iformat(ip, dip);
-               if (error)  {
-                       xfs_trans_brelse(tp, bp);
-                       return error;
-               }
-       } else {
-               ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
-               ip->i_d.di_version = dip->di_version;
-               ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
-               ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
-               /*
-                * Make sure to pull in the mode here as well in
-                * case the inode is released without being used.
-                * This ensures that xfs_inactive() will see that
-                * the inode is already free and not try to mess
-                * with the uninitialized part of it.
-                */
-               ip->i_d.di_mode = 0;
-               /*
-                * Initialize the per-fork minima and maxima for a new
-                * inode here.  xfs_iformat will do it for old inodes.
-                */
-               ip->i_df.if_ext_max =
-                       XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
-       }
-
-       /*
-        * The inode format changed when we moved the link count and
-        * made it 32 bits long.  If this is an old format inode,
-        * convert it in memory to look like a new one.  If it gets
-        * flushed to disk we will convert back before flushing or
-        * logging it.  We zero out the new projid_lo/hi field and the old link
-        * count field.  We'll handle clearing the pad field (the remains
-        * of the old uuid field) when we actually convert the inode to
-        * the new format. We don't change the version number so that we
-        * can distinguish this from a real new format inode.
-        */
-       if (ip->i_d.di_version == 1) {
-               ip->i_d.di_nlink = ip->i_d.di_onlink;
-               ip->i_d.di_onlink = 0;
-               xfs_set_projid(&ip->i_d, 0);
-       }
-
-       ip->i_delayed_blks = 0;
-       ip->i_size = ip->i_d.di_size;
-
-       /*
-        * Use xfs_trans_brelse() to release the buffer containing the
-        * on-disk inode, because it was acquired with xfs_trans_read_buf()
-        * in xfs_itobp() above.  If tp is NULL, this is just a normal
-        * brelse().  If we're within a transaction, then xfs_trans_brelse()
-        * will only release the buffer if it is not dirty within the
-        * transaction.  It will be OK to release the buffer in this case,
-        * because inodes on disk are never destroyed and we will be
-        * locking the new in-core inode before putting it in the hash
-        * table where other processes can find it.  Thus we don't have
-        * to worry about the inode being changed just because we released
-        * the buffer.
-        */
-       xfs_trans_brelse(tp, bp);
-       return 0;
-}
-
  /*
   * Allocate an inode on disk and return a copy of its in-core version.
   * Set mode, nlink, and rdev appropriately within the inode.
@@ -193,7 +69,6 @@ libxfs_ialloc(
         struct fsxattr  *fsx,
         int             okalloc,
         xfs_buf_t       **ialloc_context,
-       boolean_t       *call_again,
         xfs_inode_t     **ipp)
  {
         xfs_ino_t       ino;
@@ -206,10 +81,10 @@ libxfs_ialloc(
          * the on-disk inode to be allocated.
          */
         error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
-                           ialloc_context, call_again, &ino);
+                           ialloc_context, &ino);
         if (error != 0)
                 return error;
-       if (*call_again || ino == NULLFSINO) {
+       if (*ialloc_context || ino == NULLFSINO) {
                 *ipp = NULL;
                 return 0;
         }
@@ -455,7 +330,7 @@ libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp)
                         dip->di_onlink = 0;
                         memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
                         memset(&(dip->di_pad[0]), 0, sizeof(dip->di_pad));
-                       ASSERT(xfs_get_projid(ip->i_d) == 0);
+                       ASSERT(xfs_get_projid(&ip->i_d) == 0);
                 }
         }
  
@@ -560,7 +435,7 @@ libxfs_alloc_file_space(
         error = 0;
         imapp = &imaps[0];
         reccount = 1;
-       xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+       xfs_bmapi_flags = alloc_type ? XFS_BMAPI_PREALLOC : 0;
         mp = ip->i_mount;
         startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
         allocatesize_fsb = XFS_B_TO_FSB(mp, count);
@@ -578,7 +453,7 @@ libxfs_alloc_file_space(
                 xfs_trans_ihold(tp, ip);
  
                 xfs_bmap_init(&free_list, &firstfsb);
-               error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb,
+               error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb,
                                 xfs_bmapi_flags, &firstfsb, 0, imapp,
                                 &reccount, &free_list);
  
@@ -616,56 +491,6 @@ libxfs_log2_roundup(unsigned int i)
         return rval;
  }
  
-/*
- * Get a buffer for the dir/attr block, fill in the contents.
- * Don't check magic number, the caller will (it's xfs_repair).
- *
- * Originally from xfs_da_btree.c in the kernel, but only used
- * in userspace so it now resides here.
- */
-int
-libxfs_da_read_bufr(
-       xfs_trans_t     *trans,
-       xfs_inode_t     *dp,
-       xfs_dablk_t     bno,
-       xfs_daddr_t     mappedbno,
-       xfs_dabuf_t     **bpp,
-       int             whichfork)
-{
-       return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 2,
-               (inst_t *)__return_address);
-}
-
-/*
- * Hold dabuf at transaction commit.
- *
- * Originally from xfs_da_btree.c in the kernel, but only used
- * in userspace so it now resides here.
- */
-void
-libxfs_da_bhold(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
-{
-       int     i;
-
-       for (i = 0; i < dabuf->nbuf; i++)
-               xfs_trans_bhold(tp, dabuf->bps[i]);
-}
-
-/*
- * Join dabuf to transaction.
- *
- * Originally from xfs_da_btree.c in the kernel, but only used
- * in userspace so it now resides here.
- */
-void
-libxfs_da_bjoin(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
-{
-       int     i;
-
-       for (i = 0; i < dabuf->nbuf; i++)
-               xfs_trans_bjoin(tp, dabuf->bps[i]);
-}
-
  /*
   * Wrapper around call to libxfs_ialloc. Takes care of committing and
   * allocating a new transaction as needed.
@@ -684,21 +509,25 @@ libxfs_inode_alloc(
         struct fsxattr  *fsx,
         xfs_inode_t     **ipp)
  {
-       boolean_t       call_again;
         int             i;
         xfs_buf_t       *ialloc_context;
         xfs_inode_t     *ip;
         xfs_trans_t     *ntp;
         int             error;
  
-       call_again = B_FALSE;
         ialloc_context = (xfs_buf_t *)0;
         error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, fsx,
-                          1, &ialloc_context, &call_again, &ip);
-       if (error)
+                          1, &ialloc_context, &ip);
+       if (error) {
+               *ipp = NULL;
                 return error;
+       }
+       if (!ialloc_context && !ip) {
+               *ipp = NULL;
+               return XFS_ERROR(ENOSPC);
+       }
  
-       if (call_again) {
+       if (ialloc_context) {
                 xfs_trans_bhold(*tp, ialloc_context);
                 ntp = xfs_trans_dup(*tp);
                 xfs_trans_commit(*tp, 0);
@@ -710,8 +539,7 @@ libxfs_inode_alloc(
                 }
                 xfs_trans_bjoin(*tp, ialloc_context);
                 error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr,
-                                  fsx, 1, &ialloc_context,
-                                  &call_again, &ip);
+                                  fsx, 1, &ialloc_context, &ip);
                 if (!ip)
                         error = ENOSPC;
                 if (error)
diff --git a/libxfs/xfs.h b/libxfs/xfs.h

index b3f837838b0265c02d416d33a275723781ad41f0..62132843dd86a1c95505cd2d89d19dbd6584efc3 100644 (file)
--- a/libxfs/xfs.h
+++ b/libxfs/xfs.h
@@ -43,14 +43,21 @@
   */
  
  #include <xfs/libxfs.h>
+#include "xfs_dir2_priv.h"
  
-typedef struct { dev_t dev; }  xfs_buftarg_t;
+#undef ASSERT
+#define ASSERT(ex) assert(ex)
  
-typedef __uint32_t             uint_t;
+typedef __uint32_t             uint_t;
  typedef __uint32_t             inst_t;         /* an instruction */
  
+#ifndef EWRONGFS
+#define EWRONGFS       EINVAL
+#endif
  
-#define m_ddev_targp                   m_dev
+#define m_ddev_targp                   m_dev
+#define m_logdev_targp                 m_logdev
+#define m_rtdev_targp                  m_rtdev
  #define xfs_error_level                        0
  
  #define STATIC                         static
@@ -64,10 +71,11 @@ typedef __uint32_t          inst_t;         /* an instruction */
  
  #define IHOLD(ip)                      ((void) 0)
  
-#define XFS_CORRUPTION_ERROR(e,l,mp,m) ((void) 0)
+/* stop unused var warnings by assigning mp to itself */
+#define XFS_CORRUPTION_ERROR(e,l,mp,m) do { (mp) = (mp); } while (0)
+#define XFS_ERROR_REPORT(e,l,mp)       do { (mp) = (mp); } while (0)
  #define XFS_QM_DQATTACH(mp,ip,flags)   0
  #define XFS_ERROR(e)                   (e)
-#define XFS_ERROR_REPORT(e,l,mp)       ((void) 0)
  #define XFS_ERRLEVEL_LOW               1
  #define XFS_FORCED_SHUTDOWN(mp)                0
  #define XFS_ILOCK_EXCL                 0
@@ -124,35 +132,6 @@ static inline int __do_div(unsigned long long *n, unsigned base)
         ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
  
  
-static inline __uint32_t __get_unaligned_be32(const __uint8_t *p)
-{
-        return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
-}
-
-static inline __uint64_t get_unaligned_be64(void *p)
-{
-       return (__uint64_t)__get_unaligned_be32(p) << 32 |
-                          __get_unaligned_be32(p + 4);
-}
-
-static inline void __put_unaligned_be16(__uint16_t val, __uint8_t *p)
-{
-       *p++ = val >> 8;
-       *p++ = val;
-}
-
-static inline void __put_unaligned_be32(__uint32_t val, __uint8_t *p)
-{
-       __put_unaligned_be16(val >> 16, p);
-       __put_unaligned_be16(val, p + 2);
-}
-
-static inline void put_unaligned_be64(__uint64_t val, void *p)
-{
-       __put_unaligned_be32(val >> 32, p);
-       __put_unaligned_be32(val, p + 4);
-}
-
  
  static inline __attribute__((const))
  int is_power_of_2(unsigned long n)
@@ -191,10 +170,11 @@ roundup_pow_of_two(uint v)
  #define XBF_LOCK                       XFS_BUF_LOCK
  #define XBF_TRYLOCK                    XFS_BUF_TRYLOCK
  #define XBF_DONT_BLOCK                 0
+#define XBF_UNMAPPED                   0
  #define XFS_BUF_GETERROR(bp)           0
  #define XFS_BUF_DONE(bp)               ((bp)->b_flags |= LIBXFS_B_UPTODATE)
  #define XFS_BUF_ISDONE(bp)             ((bp)->b_flags & LIBXFS_B_UPTODATE)
-#define XFS_BUF_STALE(bp)              ((bp)->b_flags |= LIBXFS_B_STALE)
+#define xfs_buf_stale(bp)              ((bp)->b_flags |= LIBXFS_B_STALE)
  #define XFS_BUF_UNDELAYWRITE(bp)       ((bp)->b_flags &= ~LIBXFS_B_DIRTY)
  #define XFS_BUF_SET_VTYPE(a,b)         ((void) 0)
  #define XFS_BUF_SET_VTYPE_REF(a,b,c)   ((void) 0)
@@ -260,6 +240,8 @@ roundup_pow_of_two(uint v)
  #define xfs_trans_log_inode            libxfs_trans_log_inode
  #define xfs_trans_mod_sb               libxfs_trans_mod_sb
  #define xfs_trans_read_buf             libxfs_trans_read_buf
+#define xfs_trans_read_buf_map         libxfs_trans_read_buf_map
+#define xfs_trans_get_buf_map          libxfs_trans_get_buf_map
  #define xfs_trans_reserve              libxfs_trans_reserve
  
  #define xfs_trans_get_block_res(tp)    1
@@ -268,12 +250,16 @@ roundup_pow_of_two(uint v)
  #define        xfs_trans_agflist_delta(tp, d)
  #define        xfs_trans_agbtree_delta(tp, d)
  
-#define xfs_buf_readahead(a,b,c)       ((void) 0)      /* no readahead */
-#define xfs_btree_reada_bufl(m,fsb,c)  ((void) 0)
-#define xfs_btree_reada_bufs(m,fsb,c,x)        ((void) 0)
-#define xfs_buftrace(x,y)              ((void) 0)      /* debug only */
+#define xfs_buf_readahead(a,b,c,ops)           ((void) 0)      /* no readahead */
+#define xfs_buf_readahead_map(a,b,c,ops)       ((void) 0)      /* no readahead */
+#define xfs_btree_reada_bufl(m,fsb,c,ops)      ((void) 0)
+#define xfs_btree_reada_bufs(m,fsb,c,x,ops)    ((void) 0)
+#define xfs_buftrace(x,y)                      ((void) 0)      /* debug only */
  
  #define xfs_cmn_err(tag,level,mp,fmt,args...)  cmn_err(level,fmt, ## args)
+#define xfs_warn(mp,fmt,args...)               cmn_err(CE_WARN,fmt, ## args)
+#define xfs_alert(mp,fmt,args...)              cmn_err(CE_ALERT,fmt, ## args)
+#define xfs_alert_tag(mp,tag,fmt,args...)      cmn_err(CE_ALERT,fmt, ## args)
  
  #define xfs_dir2_trace_args(where, args)               ((void) 0)
  #define xfs_dir2_trace_args_b(where, args, bp)         ((void) 0)
@@ -289,15 +275,28 @@ roundup_pow_of_two(uint v)
  #define xfs_initialize_perag_icache(pag)               ((void) 0)
  
  #define xfs_ilock(ip,mode)                             ((void) 0)
+#define xfs_ilock_nowait(ip,mode)                      ((void) 0)
+#define xfs_ilock_demote(ip,mode)                      ((void) 0)
  #define xfs_iunlock(ip,mode)                           ((void) 0)
+#define xfs_ilock_map_shared(ip,mode)                  ((void) 0)
+#define xfs_iunlock_map_shared(ip,mode)                        ((void) 0)
+#define __xfs_flock(ip)                                        ((void) 0)
  
  /* space allocation */
-#define xfs_alloc_busy_search(tp,ag,b,len)     0
+#define xfs_extent_busy_reuse(mp,ag,bno,len,user)      ((void) 0)
+#define xfs_extent_busy_insert(tp,ag,bno,len,flags)    ((void) 0)
+#define xfs_extent_busy_trim(args,fbno,flen,bno,len) \
+do { \
+       *(bno) = (fbno); \
+       *(len) = (flen); \
+} while (0)
+
  /* avoid unused variable warning */
  #define xfs_alloc_busy_insert(tp,ag,b,len)     ({      \
         xfs_agnumber_t __foo = ag;                      \
         __foo = 0;                                      \
  })
+
  #define xfs_rotorstep                          1
  #define xfs_bmap_rtalloc(a)                    (ENOSYS)
  #define xfs_rtpick_extent(mp,tp,len,p)         (ENOSYS)
@@ -306,6 +305,15 @@ roundup_pow_of_two(uint v)
  #define xfs_filestream_lookup_ag(ip)           (0)
  #define xfs_filestream_new_ag(ip,ag)           (0)
  
+#define xfs_log_force(mp,flags)                        ((void) 0)
+#define XFS_LOG_SYNC                           1
+
+/* quota bits */
+#define xfs_trans_mod_dquot_byino(t,i,f,d)             ((void) 0)
+#define xfs_trans_reserve_quota_nblks(t,i,b,n,f)       (0)
+#define xfs_trans_unreserve_quota_nblks(t,i,b,n,f)     ((void) 0)
+#define xfs_qm_dqattach(i,f)                           (0)
+
  /*
   * Prototypes for kernel static functions that are aren't in their
   * associated header files
@@ -318,14 +326,20 @@ int xfs_attr_rmtval_get(struct xfs_da_args *);
  void xfs_bmap_del_free(xfs_bmap_free_t *, xfs_bmap_free_item_t *,
                         xfs_bmap_free_item_t *);
  
-/* xfs_da_btree.c */
-int  xfs_da_do_buf(xfs_trans_t *, xfs_inode_t *, xfs_dablk_t, xfs_daddr_t *,
-                       xfs_dabuf_t **, int, int, inst_t *);
-
  /* xfs_inode.c */
  void xfs_iflush_fork(xfs_inode_t *, xfs_dinode_t *, xfs_inode_log_item_t *,
                         int, xfs_buf_t *);
-int xfs_iformat(xfs_inode_t *, xfs_dinode_t *);
+/*
+ * For regular files we only update the on-disk filesize when actually
+ * writing data back to disk.  Until then only the copy in the VFS inode
+ * is uptodate.
+ */
+static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
+{
+       if (S_ISREG(ip->i_d.di_mode))
+               return ip->i_size;
+       return ip->i_d.di_size;
+}
  
  /* xfs_mount.c */
  int xfs_initialize_perag_data(xfs_mount_t *, xfs_agnumber_t);
@@ -348,8 +362,8 @@ void xfs_buf_item_init (xfs_buf_t *, xfs_mount_t *);
  void xfs_buf_item_log (xfs_buf_log_item_t *, uint, uint);
  
  /* xfs_trans_buf.c */
-xfs_buf_t *xfs_trans_buf_item_match (xfs_trans_t *, xfs_buftarg_t *,
-                       xfs_daddr_t, int);
+xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, dev_t,
+                       struct xfs_buf_map *, int);
  
  /* local source files */
  int  xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c

index a76512dc832304bde3f6842f393db16449ef0e46..61cdc6c04dfb947a038cdb3cd372cdae2c23e08b 100644 (file)
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -22,19 +22,11 @@
  #define        XFSA_FIXUP_BNO_OK       1
  #define        XFSA_FIXUP_CNT_OK       2
  
-/*
- * Prototypes for per-ag allocation routines
- */
-
  STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
-       xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
-
-/*
- * Internal functions.
- */
+               xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  
  /*
   * Lookup the record equal to [bno, len] in the btree given by cur.
@@ -55,7 +47,7 @@ xfs_alloc_lookup_eq(
   * Lookup the first record greater than or equal to [bno, len]
   * in the btree given by cur.
   */
-STATIC int                             /* error */
+int                            /* error */
  xfs_alloc_lookup_ge(
         struct xfs_btree_cur    *cur,   /* btree cursor */
         xfs_agblock_t           bno,    /* starting block of extent */
@@ -71,7 +63,7 @@ xfs_alloc_lookup_ge(
   * Lookup the first record less than or equal to [bno, len]
   * in the btree given by cur.
   */
-STATIC int                             /* error */
+int                                    /* error */
  xfs_alloc_lookup_le(
         struct xfs_btree_cur    *cur,   /* btree cursor */
         xfs_agblock_t           bno,    /* starting block of extent */
@@ -104,7 +96,7 @@ xfs_alloc_update(
  /*
   * Get the data from the pointed-to record.
   */
-STATIC int                             /* error */
+int                                    /* error */
  xfs_alloc_get_rec(
         struct xfs_btree_cur    *cur,   /* btree cursor */
         xfs_agblock_t           *bno,   /* output: starting block of extent */
@@ -128,27 +120,28 @@ xfs_alloc_get_rec(
   */
  STATIC void
  xfs_alloc_compute_aligned(
+       xfs_alloc_arg_t *args,          /* allocation argument structure */
         xfs_agblock_t   foundbno,       /* starting block in found extent */
         xfs_extlen_t    foundlen,       /* length in found extent */
-       xfs_extlen_t    alignment,      /* alignment for allocation */
-       xfs_extlen_t    minlen,         /* minimum length for allocation */
         xfs_agblock_t   *resbno,        /* result block number */
         xfs_extlen_t    *reslen)        /* result length */
  {
         xfs_agblock_t   bno;
-       xfs_extlen_t    diff;
         xfs_extlen_t    len;
  
-       if (alignment > 1 && foundlen >= minlen) {
-               bno = roundup(foundbno, alignment);
-               diff = bno - foundbno;
-               len = diff >= foundlen ? 0 : foundlen - diff;
+       /* Trim busy sections out of found extent */
+       xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
+
+       if (args->alignment > 1 && len >= args->minlen) {
+               xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
+               xfs_extlen_t    diff = aligned_bno - bno;
+
+               *resbno = aligned_bno;
+               *reslen = diff >= len ? 0 : len - diff;
         } else {
-               bno = foundbno;
-               len = foundlen;
+               *resbno = bno;
+               *reslen = len;
         }
-       *resbno = bno;
-       *reslen = len;
  }
  
  /*
@@ -262,7 +255,6 @@ xfs_alloc_fix_minleft(
                 return 1;
         agf = XFS_BUF_TO_AGF(args->agbp);
         diff = be32_to_cpu(agf->agf_freeblks)
-               + be32_to_cpu(agf->agf_flcount)
                 - args->len - args->minleft;
         if (diff >= 0)
                 return 1;
@@ -418,6 +410,60 @@ xfs_alloc_fixup_trees(
         return 0;
  }
  
+static void
+xfs_agfl_verify(
+       struct xfs_buf  *bp)
+{
+#ifdef WHEN_CRCS_COME_ALONG
+       /*
+        * we cannot actually do any verification of the AGFL because mkfs does
+        * not initialise the AGFL to zero or NULL. Hence the only valid part of
+        * the AGFL is what the AGF says is active. We can't get to the AGF, so
+        * we can't verify just those entries are valid.
+        *
+        * This problem goes away when the CRC format change comes along as that
+        * requires the AGFL to be initialised by mkfs. At that point, we can
+        * verify the blocks in the agfl -active or not- lie within the bounds
+        * of the AG. Until then, just leave this check ifdef'd out.
+        */
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
+       int             agfl_ok = 1;
+
+       int             i;
+
+       for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
+               if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK ||
+                   be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
+                       agfl_ok = 0;
+       }
+
+       if (!agfl_ok) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+#endif
+}
+
+static void
+xfs_agfl_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_agfl_verify(bp);
+}
+
+static void
+xfs_agfl_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_agfl_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_agfl_buf_ops = {
+       .verify_read = xfs_agfl_read_verify,
+       .verify_write = xfs_agfl_write_verify,
+};
+
  /*
   * Read in the allocation group free block array.
   */
@@ -435,16 +481,36 @@ xfs_alloc_read_agfl(
         error = xfs_trans_read_buf(
                         mp, tp, mp->m_ddev_targp,
                         XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
-                       XFS_FSS_TO_BB(mp, 1), 0, &bp);
+                       XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
         if (error)
                 return error;
-       ASSERT(bp);
-       ASSERT(!XFS_BUF_GETERROR(bp));
-       XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
+       ASSERT(!xfs_buf_geterror(bp));
+       xfs_buf_set_ref(bp, XFS_AGFL_REF);
         *bpp = bp;
         return 0;
  }
  
+STATIC int
+xfs_alloc_update_counters(
+       struct xfs_trans        *tp,
+       struct xfs_perag        *pag,
+       struct xfs_buf          *agbp,
+       long                    len)
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+
+       pag->pagf_freeblks += len;
+       be32_add_cpu(&agf->agf_freeblks, len);
+
+       xfs_trans_agblocks_delta(tp, len);
+       if (unlikely(be32_to_cpu(agf->agf_freeblks) >
+                    be32_to_cpu(agf->agf_length)))
+               return EFSCORRUPTED;
+
+       xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
+       return 0;
+}
+
  /*
   * Allocation group level functions.
   */
@@ -486,49 +552,36 @@ xfs_alloc_ag_vextent(
                 ASSERT(0);
                 /* NOTREACHED */
         }
-       if (error)
+
+       if (error || args->agbno == NULLAGBLOCK)
                 return error;
-       /*
-        * If the allocation worked, need to change the agf structure
-        * (and log it), and the superblock.
-        */
-       if (args->agbno != NULLAGBLOCK) {
-               xfs_agf_t       *agf;   /* allocation group freelist header */
-               long            slen = (long)args->len;
  
-               ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
-               ASSERT(!(args->wasfromfl) || !args->isfl);
-               ASSERT(args->agbno % args->alignment == 0);
-               if (!(args->wasfromfl)) {
-
-                       agf = XFS_BUF_TO_AGF(args->agbp);
-                       be32_add_cpu(&agf->agf_freeblks, -(args->len));
-                       xfs_trans_agblocks_delta(args->tp,
-                                                -((long)(args->len)));
-                       args->pag->pagf_freeblks -= args->len;
-                       ASSERT(be32_to_cpu(agf->agf_freeblks) <=
-                               be32_to_cpu(agf->agf_length));
-                       xfs_alloc_log_agf(args->tp, args->agbp,
-                                               XFS_AGF_FREEBLKS);
-                       /*
-                        * Search the busylist for these blocks and mark the
-                        * transaction as synchronous if blocks are found. This
-                        * avoids the need to block due to a synchronous log
-                        * force to ensure correct ordering as the synchronous
-                        * transaction will guarantee that for us.
-                        */
-                       if (xfs_alloc_busy_search(args->mp, args->agno,
-                                               args->agbno, args->len))
-                               xfs_trans_set_sync(args->tp);
-               }
-               if (!args->isfl)
-                       xfs_trans_mod_sb(args->tp,
-                               args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
-                                       XFS_TRANS_SB_FDBLOCKS, -slen);
-               XFS_STATS_INC(xs_allocx);
-               XFS_STATS_ADD(xs_allocb, args->len);
+       ASSERT(args->len >= args->minlen);
+       ASSERT(args->len <= args->maxlen);
+       ASSERT(!args->wasfromfl || !args->isfl);
+       ASSERT(args->agbno % args->alignment == 0);
+
+       if (!args->wasfromfl) {
+               error = xfs_alloc_update_counters(args->tp, args->pag,
+                                                 args->agbp,
+                                                 -((long)(args->len)));
+               if (error)
+                       return error;
+
+               ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
+                                             args->agbno, args->len));
         }
-       return 0;
+
+       if (!args->isfl) {
+               xfs_trans_mod_sb(args->tp, args->wasdel ?
+                                XFS_TRANS_SB_RES_FDBLOCKS :
+                                XFS_TRANS_SB_FDBLOCKS,
+                                -((long)(args->len)));
+       }
+
+       XFS_STATS_INC(xs_allocx);
+       XFS_STATS_ADD(xs_allocb, args->len);
+       return error;
  }
  
  /*
@@ -543,17 +596,16 @@ xfs_alloc_ag_vextent_exact(
  {
         xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
         xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
-       xfs_agblock_t   end;    /* end of allocated extent */
         int             error;
         xfs_agblock_t   fbno;   /* start block of found extent */
-       xfs_agblock_t   fend;   /* end block of found extent */
         xfs_extlen_t    flen;   /* length of found extent */
+       xfs_agblock_t   tbno;   /* start block of trimmed extent */
+       xfs_extlen_t    tlen;   /* length of trimmed extent */
+       xfs_agblock_t   tend;   /* end block of trimmed extent */
         int             i;      /* success/failure of operation */
-       xfs_agblock_t   maxend; /* end of maximal extent */
-       xfs_agblock_t   minend; /* end of minimal extent */
-       xfs_extlen_t    rlen;   /* length of returned extent */
  
         ASSERT(args->alignment == 1);
+
         /*
          * Allocate/initialize a cursor for the by-number freespace btree.
          */
@@ -579,14 +631,22 @@ xfs_alloc_ag_vextent_exact(
                 goto error0;
         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
         ASSERT(fbno <= args->agbno);
-       minend = args->agbno + args->minlen;
-       maxend = args->agbno + args->maxlen;
-       fend = fbno + flen;
  
         /*
-        * Give up if the freespace isn't long enough for the minimum request.
+        * Check for overlapping busy extents.
+        */
+       xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
+
+       /*
+        * Give up if the start of the extent is busy, or the freespace isn't
+        * long enough for the minimum request.
          */
-       if (fend < minend)
+       if (tbno > args->agbno)
+               goto not_found;
+       if (tlen < args->minlen)
+               goto not_found;
+       tend = tbno + tlen;
+       if (tend < args->agbno + args->minlen)
                 goto not_found;
  
         /*
@@ -595,18 +655,16 @@ xfs_alloc_ag_vextent_exact(
          *
          * Fix the length according to mod and prod if given.
          */
-       end = XFS_AGBLOCK_MIN(fend, maxend);
-       args->len = end - args->agbno;
+       args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
+                                               - args->agbno;
         xfs_alloc_fix_len(args);
         if (!xfs_alloc_fix_minleft(args))
                 goto not_found;
  
-       rlen = args->len;
-       ASSERT(args->agbno + rlen <= fend);
-       end = args->agbno + rlen;
+       ASSERT(args->agbno + args->len <= tend);
  
         /*
-        * We are allocating agbno for rlen [agbno .. end]
+        * We are allocating agbno for args->len
          * Allocate/initialize a cursor for the by-size btree.
          */
         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
@@ -619,8 +677,10 @@ xfs_alloc_ag_vextent_exact(
                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
                 goto error0;
         }
+
         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
         args->wasfromfl = 0;
         trace_xfs_alloc_exact_done(args);
         return 0;
@@ -649,11 +709,11 @@ xfs_alloc_find_best_extent(
         struct xfs_btree_cur    **scur, /* searching cursor */
         xfs_agblock_t           gdiff,  /* difference for search comparison */
         xfs_agblock_t           *sbno,  /* extent found by search */
-       xfs_extlen_t            *slen,
-       xfs_extlen_t            *slena, /* aligned length */
+       xfs_extlen_t            *slen,  /* extent length */
+       xfs_agblock_t           *sbnoa, /* aligned extent found by search */
+       xfs_extlen_t            *slena, /* aligned extent length */
         int                     dir)    /* 0 = search right, 1 = search left */
  {
-       xfs_agblock_t           bno;
         xfs_agblock_t           new;
         xfs_agblock_t           sdiff;
         int                     error;
@@ -671,17 +731,16 @@ xfs_alloc_find_best_extent(
                 if (error)
                         goto error0;
                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               xfs_alloc_compute_aligned(*sbno, *slen, args->alignment,
-                                         args->minlen, &bno, slena);
+               xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
  
                 /*
                  * The good extent is closer than this one.
                  */
                 if (!dir) {
-                       if (bno >= args->agbno + gdiff)
+                       if (*sbnoa >= args->agbno + gdiff)
                                 goto out_use_good;
                 } else {
-                       if (bno <= args->agbno - gdiff)
+                       if (*sbnoa <= args->agbno - gdiff)
                                 goto out_use_good;
                 }
  
@@ -693,8 +752,8 @@ xfs_alloc_find_best_extent(
                         xfs_alloc_fix_len(args);
  
                         sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                                                      args->alignment, *sbno,
-                                                      *slen, &new);
+                                                      args->alignment, *sbnoa,
+                                                      *slena, &new);
  
                         /*
                          * Choose closer size and invalidate other cursor.
@@ -744,7 +803,7 @@ xfs_alloc_ag_vextent_near(
         xfs_agblock_t   gtbnoa;         /* aligned ... */
         xfs_extlen_t    gtdiff;         /* difference to right side entry */
         xfs_extlen_t    gtlen;          /* length of right side entry */
-       xfs_extlen_t    gtlena = 0;     /* aligned ... */
+       xfs_extlen_t    gtlena;         /* aligned ... */
         xfs_agblock_t   gtnew;          /* useful start bno of right side */
         int             error;          /* error code */
         int             i;              /* result code, temporary */
@@ -753,9 +812,10 @@ xfs_alloc_ag_vextent_near(
         xfs_agblock_t   ltbnoa;         /* aligned ... */
         xfs_extlen_t    ltdiff;         /* difference to left side entry */
         xfs_extlen_t    ltlen;          /* length of left side entry */
-       xfs_extlen_t    ltlena = 0;     /* aligned ... */
+       xfs_extlen_t    ltlena;         /* aligned ... */
         xfs_agblock_t   ltnew;          /* useful start bno of left side */
         xfs_extlen_t    rlen;           /* length of returned extent */
+       int             forced = 0;
  #if defined(DEBUG) && defined(__KERNEL__)
         /*
          * Randomly don't execute the first algorithm.
@@ -764,13 +824,20 @@ xfs_alloc_ag_vextent_near(
  
         dofirst = random32() & 1;
  #endif
+
+restart:
+       bno_cur_lt = NULL;
+       bno_cur_gt = NULL;
+       ltlen = 0;
+       gtlena = 0;
+       ltlena = 0;
+
         /*
          * Get a cursor for the by-size btree.
          */
         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
                 args->agno, XFS_BTNUM_CNT);
-       ltlen = 0;
-       bno_cur_lt = bno_cur_gt = NULL;
+
         /*
          * See if there are any free extents as big as maxlen.
          */
@@ -786,11 +853,13 @@ xfs_alloc_ag_vextent_near(
                         goto error0;
                 if (i == 0 || ltlen == 0) {
                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       trace_xfs_alloc_near_noentry(args);
                         return 0;
                 }
                 ASSERT(i == 1);
         }
         args->wasfromfl = 0;
+
         /*
          * First algorithm.
          * If the requested extent is large wrt the freespaces available
@@ -844,8 +913,8 @@ xfs_alloc_ag_vextent_near(
                         if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
                                 goto error0;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                       xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
-                                       args->minlen, &ltbnoa, &ltlena);
+                       xfs_alloc_compute_aligned(args, ltbno, ltlen,
+                                                 &ltbnoa, &ltlena);
                         if (ltlena < args->minlen)
                                 continue;
                         args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
@@ -854,7 +923,7 @@ xfs_alloc_ag_vextent_near(
                         if (args->len < blen)
                                 continue;
                         ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                               args->alignment, ltbno, ltlen, &ltnew);
+                               args->alignment, ltbnoa, ltlena, &ltnew);
                         if (ltnew != NULLAGBLOCK &&
                             (args->len > blen || ltdiff < bdiff)) {
                                 bdiff = ltdiff;
@@ -965,8 +1034,8 @@ xfs_alloc_ag_vextent_near(
                         if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
                                 goto error0;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                       xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
-                                       args->minlen, &ltbnoa, &ltlena);
+                       xfs_alloc_compute_aligned(args, ltbno, ltlen,
+                                                 &ltbnoa, &ltlena);
                         if (ltlena >= args->minlen)
                                 break;
                         if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
@@ -981,8 +1050,8 @@ xfs_alloc_ag_vextent_near(
                         if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
                                 goto error0;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                       xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment,
-                                       args->minlen, &gtbnoa, &gtlena);
+                       xfs_alloc_compute_aligned(args, gtbno, gtlen,
+                                                 &gtbnoa, &gtlena);
                         if (gtlena >= args->minlen)
                                 break;
                         if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
@@ -1005,13 +1074,13 @@ xfs_alloc_ag_vextent_near(
                          */
                         args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
                         xfs_alloc_fix_len(args);
-                       rlen = args->len;
                         ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                               args->alignment, ltbno, ltlen, &ltnew);
+                               args->alignment, ltbnoa, ltlena, &ltnew);
  
                         error = xfs_alloc_find_best_extent(args,
                                                 &bno_cur_lt, &bno_cur_gt,
-                                               ltdiff, &gtbno, &gtlen, &gtlena,
+                                               ltdiff, &gtbno, &gtlen,
+                                               &gtbnoa, &gtlena,
                                                 0 /* search right */);
                 } else {
                         ASSERT(gtlena >= args->minlen);
@@ -1022,11 +1091,12 @@ xfs_alloc_ag_vextent_near(
                         args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
                         xfs_alloc_fix_len(args);
                         gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                               args->alignment, gtbno, gtlen, &gtnew);
+                               args->alignment, gtbnoa, gtlena, &gtnew);
  
                         error = xfs_alloc_find_best_extent(args,
                                                 &bno_cur_gt, &bno_cur_lt,
-                                               gtdiff, &ltbno, &ltlen, &ltlena,
+                                               gtdiff, &ltbno, &ltlen,
+                                               &ltbnoa, &ltlena,
                                                 1 /* search left */);
                 }
  
@@ -1038,6 +1108,13 @@ xfs_alloc_ag_vextent_near(
          * If we couldn't get anything, give up.
          */
         if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
+               if (!forced++) {
+                       trace_xfs_alloc_near_busy(args);
+                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                       goto restart;
+               }
                 trace_xfs_alloc_size_neither(args);
                 args->agbno = NULLAGBLOCK;
                 return 0;
@@ -1072,12 +1149,13 @@ xfs_alloc_ag_vextent_near(
                 return 0;
         }
         rlen = args->len;
-       (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
-               ltlen, &ltnew);
+       (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
+                                    ltbnoa, ltlena, &ltnew);
         ASSERT(ltnew >= ltbno);
-       ASSERT(ltnew + rlen <= ltbno + ltlen);
+       ASSERT(ltnew + rlen <= ltbnoa + ltlena);
         ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
         args->agbno = ltnew;
+
         if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
                         ltnew, rlen, XFSA_FIXUP_BNO_OK)))
                 goto error0;
@@ -1120,26 +1198,35 @@ xfs_alloc_ag_vextent_size(
         int             i;              /* temp status variable */
         xfs_agblock_t   rbno;           /* returned block number */
         xfs_extlen_t    rlen;           /* length of returned extent */
+       int             forced = 0;
  
+restart:
         /*
          * Allocate and initialize a cursor for the by-size btree.
          */
         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
                 args->agno, XFS_BTNUM_CNT);
         bno_cur = NULL;
+
         /*
          * Look for an entry >= maxlen+alignment-1 blocks.
          */
         if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
                         args->maxlen + args->alignment - 1, &i)))
                 goto error0;
+
         /*
-        * If none, then pick up the last entry in the tree unless the
-        * tree is empty.
+        * If none or we have busy extents that we cannot allocate from, then
+        * we have to settle for a smaller extent. In the case that there are
+        * no large extents, this will return the last entry in the tree unless
+        * the tree is empty. In the case that there are only busy large
+        * extents, this will return the largest small extent unless there
+        * are no smaller extents available.
          */
-       if (!i) {
-               if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
-                               &flen, &i)))
+       if (!i || forced > 1) {
+               error = xfs_alloc_ag_vextent_small(args, cnt_cur,
+                                                  &fbno, &flen, &i);
+               if (error)
                         goto error0;
                 if (i == 0 || flen == 0) {
                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1147,23 +1234,56 @@ xfs_alloc_ag_vextent_size(
                         return 0;
                 }
                 ASSERT(i == 1);
+               xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
+       } else {
+               /*
+                * Search for a non-busy extent that is large enough.
+                * If we are at low space, don't check, or if we fall of
+                * the end of the btree, turn off the busy check and
+                * restart.
+                */
+               for (;;) {
+                       error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
+                       if (error)
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+                       xfs_alloc_compute_aligned(args, fbno, flen,
+                                                 &rbno, &rlen);
+
+                       if (rlen >= args->maxlen)
+                               break;
+
+                       error = xfs_btree_increment(cnt_cur, 0, &i);
+                       if (error)
+                               goto error0;
+                       if (i == 0) {
+                               /*
+                                * Our only valid extents must have been busy.
+                                * Make it unbusy by forcing the log out and
+                                * retrying. If we've been here before, forcing
+                                * the log isn't making the extents available,
+                                * which means they have probably been freed in
+                                * this transaction.  In that case, we have to
+                                * give up on them and we'll attempt a minlen
+                                * allocation the next time around.
+                                */
+                               xfs_btree_del_cursor(cnt_cur,
+                                                    XFS_BTREE_NOERROR);
+                               trace_xfs_alloc_size_busy(args);
+                               if (!forced++)
+                                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                               goto restart;
+                       }
+               }
         }
-       /*
-        * There's a freespace as big as maxlen+alignment-1, get it.
-        */
-       else {
-               if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-       }
+
         /*
          * In the first case above, we got the last entry in the
          * by-size btree.  Now we check to see if the space hits maxlen
          * once aligned; if not, we search left for something better.
          * This can't happen in the second case above.
          */
-       xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
-               &rbno, &rlen);
         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
         XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
                         (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1188,8 +1308,8 @@ xfs_alloc_ag_vextent_size(
                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
                         if (flen < bestrlen)
                                 break;
-                       xfs_alloc_compute_aligned(fbno, flen, args->alignment,
-                               args->minlen, &rbno, &rlen);
+                       xfs_alloc_compute_aligned(args, fbno, flen,
+                                                 &rbno, &rlen);
                         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
                         XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
                                 (rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1217,13 +1337,19 @@ xfs_alloc_ag_vextent_size(
          * Fix up the length.
          */
         args->len = rlen;
-       xfs_alloc_fix_len(args);
-       if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
-               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-               trace_xfs_alloc_size_nominleft(args);
-               args->agbno = NULLAGBLOCK;
-               return 0;
+       if (rlen < args->minlen) {
+               if (!forced++) {
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       trace_xfs_alloc_size_busy(args);
+                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                       goto restart;
+               }
+               goto out_nominleft;
         }
+       xfs_alloc_fix_len(args);
+
+       if (!xfs_alloc_fix_minleft(args))
+               goto out_nominleft;
         rlen = args->len;
         XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
         /*
@@ -1253,6 +1379,12 @@ error0:
         if (bno_cur)
                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
         return error;
+
+out_nominleft:
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       trace_xfs_alloc_size_nominleft(args);
+       args->agbno = NULLAGBLOCK;
+       return 0;
  }
  
  /*
@@ -1292,6 +1424,9 @@ xfs_alloc_ag_vextent_small(
                 if (error)
                         goto error0;
                 if (fbno != NULLAGBLOCK) {
+                       xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
+                                            args->userdata);
+
                         if (args->userdata) {
                                 xfs_buf_t       *bp;
  
@@ -1367,6 +1502,7 @@ xfs_free_ag_extent(
         xfs_mount_t     *mp;            /* mount point struct for filesystem */
         xfs_agblock_t   nbno;           /* new starting block of freespace */
         xfs_extlen_t    nlen;           /* new length of freespace */
+       xfs_perag_t     *pag;           /* per allocation group data */
  
         mp = tp->t_mountp;
         /*
@@ -1565,45 +1701,23 @@ xfs_free_ag_extent(
         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
         cnt_cur = NULL;
+
         /*
          * Update the freespace totals in the ag and superblock.
          */
-       {
-               xfs_agf_t       *agf;
-               xfs_perag_t     *pag;           /* per allocation group data */
-
-               pag = xfs_perag_get(mp, agno);
-               pag->pagf_freeblks += len;
-               xfs_perag_put(pag);
-
-               agf = XFS_BUF_TO_AGF(agbp);
-               be32_add_cpu(&agf->agf_freeblks, len);
-               xfs_trans_agblocks_delta(tp, len);
-               XFS_WANT_CORRUPTED_GOTO(
-                       be32_to_cpu(agf->agf_freeblks) <=
-                       be32_to_cpu(agf->agf_length),
-                       error0);
-               xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
-               if (!isfl)
-                       xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
-               XFS_STATS_INC(xs_freex);
-               XFS_STATS_ADD(xs_freeb, len);
-       }
+       pag = xfs_perag_get(mp, agno);
+       error = xfs_alloc_update_counters(tp, pag, agbp, len);
+       xfs_perag_put(pag);
+       if (error)
+               goto error0;
+
+       if (!isfl)
+               xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
+       XFS_STATS_INC(xs_freex);
+       XFS_STATS_ADD(xs_freeb, len);
  
         trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
  
-       /*
-        * Since blocks move to the free list without the coordination
-        * used in xfs_bmap_finish, we can't allow block to be available
-        * for reallocation and non-transaction writing (user data)
-        * until we know that the transaction that moved it to the free
-        * list is permanently on disk.  We track the blocks by declaring
-        * these blocks as "busy"; the busy list is maintained on a per-ag
-        * basis and each transaction records which entries should be removed
-        * when the iclog commits to disk.  If a busy block is allocated,
-        * the iclog is pushed up to the LSN that freed the block.
-        */
-       xfs_alloc_busy_insert(tp, agno, bno, len);
         return 0;
  
   error0:
@@ -1788,12 +1902,11 @@ xfs_alloc_fix_freelist(
         /*
          * Initialize the args structure.
          */
+       memset(&targs, 0, sizeof(targs));
         targs.tp = tp;
         targs.mp = mp;
         targs.agbp = agbp;
         targs.agno = args->agno;
-       targs.mod = targs.minleft = targs.wasdel = targs.userdata =
-               targs.minalignslop = 0;
         targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
         targs.type = XFS_ALLOCTYPE_THIS_AG;
         targs.pag = pag;
@@ -1900,21 +2013,6 @@ xfs_alloc_get_freelist(
         xfs_alloc_log_agf(tp, agbp, logflags);
         *bnop = bno;
  
-       /*
-        * As blocks are freed, they are added to the per-ag busy list and
-        * remain there until the freeing transaction is committed to disk.
-        * Now that we have allocated blocks, this list must be searched to see
-        * if a block is being reused.  If one is, then the freeing transaction
-        * must be pushed to disk before this transaction.
-        *
-        * We do this by setting the current transaction to a sync transaction
-        * which guarantees that the freeing transaction is on disk before this
-        * transaction. This is done instead of a synchronous log force here so
-        * that we don't sit and wait with the AGF locked in the transaction
-        * during the log force.
-        */
-       if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
-               xfs_trans_set_sync(tp);
         return 0;
  }
  
@@ -2027,6 +2125,63 @@ xfs_alloc_put_freelist(
         return 0;
  }
  
+static void
+xfs_agf_verify(
+       struct xfs_buf  *bp)
+ {
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_agf  *agf;
+       int             agf_ok;
+
+       agf = XFS_BUF_TO_AGF(bp);
+
+       agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
+               XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
+               be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
+               be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
+               be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
+               be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
+
+       /*
+        * during growfs operations, the perag is not fully initialised,
+        * so we can't use it for any useful checking. growfs ensures we can't
+        * use it by using uncached buffers that don't have the perag attached
+        * so we can detect and avoid this problem.
+        */
+       if (bp->b_pag)
+               agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) ==
+                                               bp->b_pag->pag_agno;
+
+       if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+               agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
+                                               be32_to_cpu(agf->agf_length);
+
+       if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
+                       XFS_RANDOM_ALLOC_READ_AGF))) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+static void
+xfs_agf_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_agf_verify(bp);
+}
+
+static void
+xfs_agf_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_agf_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_agf_buf_ops = {
+       .verify_read = xfs_agf_read_verify,
+       .verify_write = xfs_agf_write_verify,
+};
+
  /*
   * Read in the allocation group header (free/alloc section).
   */
@@ -2038,45 +2193,20 @@ xfs_read_agf(
         int                     flags,  /* XFS_BUF_ */
         struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
  {
-       struct xfs_agf  *agf;           /* ag freelist header */
-       int             agf_ok;         /* set if agf is consistent */
         int             error;
  
         ASSERT(agno != NULLAGNUMBER);
         error = xfs_trans_read_buf(
                         mp, tp, mp->m_ddev_targp,
                         XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
-                       XFS_FSS_TO_BB(mp, 1), flags, bpp);
+                       XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
         if (error)
                 return error;
         if (!*bpp)
                 return 0;
  
-       ASSERT(!XFS_BUF_GETERROR(*bpp));
-       agf = XFS_BUF_TO_AGF(*bpp);
-
-       /*
-        * Validate the magic number of the agf block.
-        */
-       agf_ok =
-               be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
-               XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
-               be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
-               be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
-               be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
-               be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
-               be32_to_cpu(agf->agf_seqno) == agno;
-       if (xfs_sb_version_haslazysbcount(&mp->m_sb))
-               agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
-                                               be32_to_cpu(agf->agf_length);
-       if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
-                       XFS_RANDOM_ALLOC_READ_AGF))) {
-               XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
-                                    XFS_ERRLEVEL_LOW, mp, agf);
-               xfs_trans_brelse(tp, *bpp);
-               return XFS_ERROR(EFSCORRUPTED);
-       }
-       XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
+       ASSERT(!(*bpp)->b_error);
+       xfs_buf_set_ref(*bpp, XFS_AGF_REF);
         return 0;
  }
  
@@ -2104,7 +2234,7 @@ xfs_alloc_read_agf(
                 return error;
         if (!*bpp)
                 return 0;
-       ASSERT(!XFS_BUF_GETERROR(*bpp));
+       ASSERT(!(*bpp)->b_error);
  
         agf = XFS_BUF_TO_AGF(*bpp);
         pag = xfs_perag_get(mp, agno);
@@ -2371,18 +2501,36 @@ xfs_free_extent(
         memset(&args, 0, sizeof(xfs_alloc_arg_t));
         args.tp = tp;
         args.mp = tp->t_mountp;
+
+       /*
+        * validate that the block number is legal - the enables us to detect
+        * and handle a silent filesystem corruption rather than crashing.
+        */
         args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
-       ASSERT(args.agno < args.mp->m_sb.sb_agcount);
+       if (args.agno >= args.mp->m_sb.sb_agcount)
+               return EFSCORRUPTED;
+
         args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
+       if (args.agbno >= args.mp->m_sb.sb_agblocks)
+               return EFSCORRUPTED;
+
         args.pag = xfs_perag_get(args.mp, args.agno);
-       if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
+       ASSERT(args.pag);
+
+       error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
+       if (error)
                 goto error0;
-#ifdef DEBUG
-       ASSERT(args.agbp != NULL);
-       ASSERT((args.agbno + len) <=
-               be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
-#endif
+
+       /* validate the extent size is legal now we have the agf locked */
+       if (args.agbno + len >
+                       be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
+               error = EFSCORRUPTED;
+               goto error0;
+       }
+
         error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
+       if (!error)
+               xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
  error0:
         xfs_perag_put(args.pag);
         return error;
diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c

index b782d9df94aa08cb8ba29608db456296f9e190a5..dc9ed48ce91909a097474c8597e1e25426e05cfc 100644 (file)
--- a/libxfs/xfs_alloc_btree.c
+++ b/libxfs/xfs_alloc_btree.c
@@ -75,6 +75,8 @@ xfs_allocbt_alloc_block(
                 return 0;
         }
  
+       xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
         xfs_trans_agbtree_delta(cur->bc_tp, 1);
         new->s = cpu_to_be32(bno);
  
@@ -98,19 +100,11 @@ xfs_allocbt_free_block(
         if (error)
                 return error;
  
-       /*
-        * Since blocks move to the free list without the coordination used in
-        * xfs_bmap_finish, we can't allow block to be available for
-        * reallocation and non-transaction writing (user data) until we know
-        * that the transaction that moved it to the free list is permanently
-        * on disk. We track the blocks by declaring these blocks as "busy";
-        * the busy list is maintained on a per-ag basis and each transaction
-        * records which entries should be removed when the iclog commits to
-        * disk. If a busy block is allocated, the iclog is pushed up to the
-        * LSN that freed the block.
-        */
-       xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
+       xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+                             XFS_EXTENT_BUSY_SKIP_DISCARD);
         xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+       xfs_trans_binval(cur->bc_tp, bp);
         return 0;
  }
  
@@ -260,6 +254,82 @@ xfs_allocbt_key_diff(
         return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
  }
  
+static void
+xfs_allocbt_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_perag        *pag = bp->b_pag;
+       unsigned int            level;
+       int                     sblock_ok; /* block passes checks */
+
+       /*
+        * magic number and level verification
+        *
+        * During growfs operations, we can't verify the exact level as the
+        * perag is not fully initialised and hence not attached to the buffer.
+        * In this case, check against the maximum tree depth.
+        */
+       level = be16_to_cpu(block->bb_level);
+       switch (cpu_to_be32(block->bb_magic)) {
+       case XFS_ABTB_MAGIC:
+               if (pag)
+                       sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi];
+               else
+                       sblock_ok = level < mp->m_ag_maxlevels;
+               break;
+       case XFS_ABTC_MAGIC:
+               if (pag)
+                       sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi];
+               else
+                       sblock_ok = level < mp->m_ag_maxlevels;
+               break;
+       default:
+               sblock_ok = 0;
+               break;
+       }
+
+       /* numrecs verification */
+       sblock_ok = sblock_ok &&
+               be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0];
+
+       /* sibling pointer verification */
+       sblock_ok = sblock_ok &&
+               (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
+                be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
+               block->bb_u.s.bb_leftsib &&
+               (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
+                be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
+               block->bb_u.s.bb_rightsib;
+
+       if (!sblock_ok) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+static void
+xfs_allocbt_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_allocbt_verify(bp);
+}
+
+static void
+xfs_allocbt_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_allocbt_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_allocbt_buf_ops = {
+       .verify_read = xfs_allocbt_read_verify,
+       .verify_write = xfs_allocbt_write_verify,
+};
+
+
  #ifdef DEBUG
  STATIC int
  xfs_allocbt_keys_inorder(
@@ -381,7 +451,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
         .init_rec_from_cur      = xfs_allocbt_init_rec_from_cur,
         .init_ptr_from_cur      = xfs_allocbt_init_ptr_from_cur,
         .key_diff               = xfs_allocbt_key_diff,
-
+       .buf_ops                = &xfs_allocbt_buf_ops,
  #ifdef DEBUG
         .keys_inorder           = xfs_allocbt_keys_inorder,
         .recs_inorder           = xfs_allocbt_recs_inorder,
@@ -415,13 +485,16 @@ xfs_allocbt_init_cursor(
  
         cur->bc_tp = tp;
         cur->bc_mp = mp;
-       cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
         cur->bc_btnum = btnum;
         cur->bc_blocklog = mp->m_sb.sb_blocklog;
-
         cur->bc_ops = &xfs_allocbt_ops;
-       if (btnum == XFS_BTNUM_CNT)
+
+       if (btnum == XFS_BTNUM_CNT) {
+               cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
                 cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
+       } else {
+               cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
+       }
  
         cur->bc_private.a.agbp = agbp;
         cur->bc_private.a.agno = agno;
diff --git a/libxfs/xfs_attr.c b/libxfs/xfs_attr.c

index eec7f8fc900ea765241ebfbfadcd95f90c32faa9..42546a947cadede5c249fd41bc3df3bb3636bc77 100644 (file)
--- a/libxfs/xfs_attr.c
+++ b/libxfs/xfs_attr.c
@@ -295,8 +295,7 @@ xfs_attr_set_int(
                 return (error);
         }
  
-       xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
-       xfs_trans_ihold(args.trans, dp);
+       xfs_trans_ijoin(args.trans, dp, 0);
  
         /*
          * If the attribute list is non-existent or a shortform list,
@@ -365,10 +364,8 @@ xfs_attr_set_int(
                  * bmap_finish() may have committed the last trans and started
                  * a new one.  We need the inode to be in all transactions.
                  */
-               if (committed) {
-                       xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
-                       xfs_trans_ihold(args.trans, dp);
-               }
+               if (committed)
+                       xfs_trans_ijoin(args.trans, dp, 0);
  
                 /*
                  * Commit the leaf transformation.  We'll need another (linked)
@@ -468,6 +465,13 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
         args.total = 0;
         args.whichfork = XFS_ATTR_FORK;
  
+       /*
+        * we have no control over the attribute names that userspace passes us
+        * to remove, so we have to allow the name lookup prior to attribute
+        * removal to fail.
+        */
+       args.op_flags = XFS_DA_OP_OKNOENT;
+
         /*
          * Attach the dquots to the inode.
          */
@@ -509,8 +513,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
          * No need to make quota reservations here. We expect to release some
          * blocks not allocate in the common case.
          */
-       xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
-       xfs_trans_ihold(args.trans, dp);
+       xfs_trans_ijoin(args.trans, dp, 0);
  
         /*
          * Decide on what work routines to call based on the inode size.
@@ -603,6 +606,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
  {
         int newsize, forkoff, retval;
  
+       trace_xfs_attr_sf_addname(args);
+
         retval = xfs_attr_shortform_lookup(args);
         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
                 return(retval);
@@ -643,19 +648,19 @@ STATIC int
  xfs_attr_leaf_addname(xfs_da_args_t *args)
  {
         xfs_inode_t *dp;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int retval, error, committed, forkoff;
  
+       trace_xfs_attr_leaf_addname(args);
+
         /*
          * Read the (only) block in the attribute list in.
          */
         dp = args->dp;
         args->blkno = 0;
-       error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
-                                            XFS_ATTR_FORK);
+       error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
         if (error)
-               return(error);
-       ASSERT(bp != NULL);
+               return error;
  
         /*
          * Look up the given attribute in the leaf block.  Figure out if
@@ -663,13 +668,16 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
          */
         retval = xfs_attr_leaf_lookup_int(bp, args);
         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
-               xfs_da_brelse(args->trans, bp);
+               xfs_trans_brelse(args->trans, bp);
                 return(retval);
         } else if (retval == EEXIST) {
                 if (args->flags & ATTR_CREATE) {        /* pure create op */
-                       xfs_da_brelse(args->trans, bp);
+                       xfs_trans_brelse(args->trans, bp);
                         return(retval);
                 }
+
+               trace_xfs_attr_leaf_replace(args);
+
                 args->op_flags |= XFS_DA_OP_RENAME;     /* an atomic rename */
                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
                 args->index2 = args->index;
@@ -682,7 +690,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
          * if required.
          */
         retval = xfs_attr_leaf_add(bp, args);
-       xfs_da_buf_done(bp);
         if (retval == ENOSPC) {
                 /*
                  * Promote the attribute list to the Btree format, then
@@ -706,10 +713,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                  * bmap_finish() may have committed the last trans and started
                  * a new one.  We need the inode to be in all transactions.
                  */
-               if (committed) {
-                       xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                       xfs_trans_ihold(args->trans, dp);
-               }
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
  
                 /*
                  * Commit the current trans (including the inode) and start
@@ -779,12 +784,12 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                  * Read in the block containing the "old" attr, then
                  * remove the "old" attr from that block (neat, huh!)
                  */
-               error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1,
-                                                    &bp, XFS_ATTR_FORK);
+               error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno,
+                                          -1, &bp);
                 if (error)
-                       return(error);
-               ASSERT(bp != NULL);
-               (void)xfs_attr_leaf_remove(bp, args);
+                       return error;
+
+               xfs_attr_leaf_remove(bp, args);
  
                 /*
                  * If the result is small enough, shrink it all into the inode.
@@ -810,12 +815,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                          * and started a new one.  We need the inode to be
                          * in all transactions.
                          */
-                       if (committed) {
-                               xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                               xfs_trans_ihold(args->trans, dp);
-                       }
-               } else
-                       xfs_da_buf_done(bp);
+                       if (committed)
+                               xfs_trans_ijoin(args->trans, dp, 0);
+               }
  
                 /*
                  * Commit the remove and start the next trans in series.
@@ -841,28 +843,27 @@ STATIC int
  xfs_attr_leaf_removename(xfs_da_args_t *args)
  {
         xfs_inode_t *dp;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error, committed, forkoff;
  
+       trace_xfs_attr_leaf_removename(args);
+
         /*
          * Remove the attribute.
          */
         dp = args->dp;
         args->blkno = 0;
-       error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
-                                            XFS_ATTR_FORK);
-       if (error) {
-               return(error);
-       }
+       error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+       if (error)
+               return error;
  
-       ASSERT(bp != NULL);
         error = xfs_attr_leaf_lookup_int(bp, args);
         if (error == ENOATTR) {
-               xfs_da_brelse(args->trans, bp);
+               xfs_trans_brelse(args->trans, bp);
                 return(error);
         }
  
-       (void)xfs_attr_leaf_remove(bp, args);
+       xfs_attr_leaf_remove(bp, args);
  
         /*
          * If the result is small enough, shrink it all into the inode.
@@ -886,12 +887,9 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
                  * bmap_finish() may have committed the last trans and started
                  * a new one.  We need the inode to be in all transactions.
                  */
-               if (committed) {
-                       xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                       xfs_trans_ihold(args->trans, dp);
-               }
-       } else
-               xfs_da_buf_done(bp);
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
+       }
         return(0);
  }
  
@@ -904,23 +902,23 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
  STATIC int
  xfs_attr_leaf_get(xfs_da_args_t *args)
  {
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error;
  
+       trace_xfs_attr_leaf_get(args);
+
         args->blkno = 0;
-       error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
-                                            XFS_ATTR_FORK);
+       error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
         if (error)
-               return(error);
-       ASSERT(bp != NULL);
+               return error;
  
         error = xfs_attr_leaf_lookup_int(bp, args);
         if (error != EEXIST)  {
-               xfs_da_brelse(args->trans, bp);
+               xfs_trans_brelse(args->trans, bp);
                 return(error);
         }
         error = xfs_attr_leaf_getvalue(bp, args);
-       xfs_da_brelse(args->trans, bp);
+       xfs_trans_brelse(args->trans, bp);
         if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
                 error = xfs_attr_rmtval_get(args);
         }
@@ -950,6 +948,8 @@ xfs_attr_node_addname(xfs_da_args_t *args)
         xfs_mount_t *mp;
         int committed, retval, error;
  
+       trace_xfs_attr_node_addname(args);
+
         /*
          * Fill in bucket of arguments/results/context to carry around.
          */
@@ -976,6 +976,9 @@ restart:
         } else if (retval == EEXIST) {
                 if (args->flags & ATTR_CREATE)
                         goto out;
+
+               trace_xfs_attr_node_replace(args);
+
                 args->op_flags |= XFS_DA_OP_RENAME;     /* atomic rename op */
                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
                 args->index2 = args->index;
@@ -1013,10 +1016,8 @@ restart:
                          * and started a new one.  We need the inode to be
                          * in all transactions.
                          */
-                       if (committed) {
-                               xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                               xfs_trans_ihold(args->trans, dp);
-                       }
+                       if (committed)
+                               xfs_trans_ijoin(args->trans, dp, 0);
  
                         /*
                          * Commit the node conversion and start the next
@@ -1052,10 +1053,8 @@ restart:
                  * bmap_finish() may have committed the last trans and started
                  * a new one.  We need the inode to be in all transactions.
                  */
-               if (committed) {
-                       xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                       xfs_trans_ihold(args->trans, dp);
-               }
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
         } else {
                 /*
                  * Addition succeeded, update Btree hashvals.
@@ -1166,10 +1165,8 @@ restart:
                          * and started a new one.  We need the inode to be
                          * in all transactions.
                          */
-                       if (committed) {
-                               xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                               xfs_trans_ihold(args->trans, dp);
-                       }
+                       if (committed)
+                               xfs_trans_ijoin(args->trans, dp, 0);
                 }
  
                 /*
@@ -1210,9 +1207,11 @@ xfs_attr_node_removename(xfs_da_args_t *args)
         xfs_da_state_t *state;
         xfs_da_state_blk_t *blk;
         xfs_inode_t *dp;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int retval, error, committed, forkoff;
  
+       trace_xfs_attr_node_removename(args);
+
         /*
          * Tie a string around our finger to remind us where we are.
          */
@@ -1300,10 +1299,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
                  * bmap_finish() may have committed the last trans and started
                  * a new one.  We need the inode to be in all transactions.
                  */
-               if (committed) {
-                       xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                       xfs_trans_ihold(args->trans, dp);
-               }
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
  
                 /*
                  * Commit the Btree join operation and start a new trans.
@@ -1322,16 +1319,11 @@ xfs_attr_node_removename(xfs_da_args_t *args)
                  */
                 ASSERT(state->path.active == 1);
                 ASSERT(state->path.blk[0].bp);
-               xfs_da_buf_done(state->path.blk[0].bp);
                 state->path.blk[0].bp = NULL;
  
-               error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
-                                                    XFS_ATTR_FORK);
+               error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp);
                 if (error)
                         goto out;
-               ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
-                                     bp->data)->hdr.info.magic)
-                                                      == XFS_ATTR_LEAF_MAGIC);
  
                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
                         xfs_bmap_init(args->flist, args->firstblock);
@@ -1354,12 +1346,10 @@ xfs_attr_node_removename(xfs_da_args_t *args)
                          * and started a new one.  We need the inode to be
                          * in all transactions.
                          */
-                       if (committed) {
-                               xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                               xfs_trans_ihold(args->trans, dp);
-                       }
+                       if (committed)
+                               xfs_trans_ijoin(args->trans, dp, 0);
                 } else
-                       xfs_da_brelse(args->trans, bp);
+                       xfs_trans_brelse(args->trans, bp);
         }
         error = 0;
  
@@ -1381,6 +1371,8 @@ xfs_attr_fillstate(xfs_da_state_t *state)
         xfs_da_state_blk_t *blk;
         int level;
  
+       trace_xfs_attr_fillstate(state->args);
+
         /*
          * Roll down the "path" in the state structure, storing the on-disk
          * block number for those buffers in the "path".
@@ -1389,8 +1381,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
                 if (blk->bp) {
-                       blk->disk_blkno = xfs_da_blkno(blk->bp);
-                       xfs_da_buf_done(blk->bp);
+                       blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
                         blk->bp = NULL;
                 } else {
                         blk->disk_blkno = 0;
@@ -1405,8 +1396,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
                 if (blk->bp) {
-                       blk->disk_blkno = xfs_da_blkno(blk->bp);
-                       xfs_da_buf_done(blk->bp);
+                       blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
                         blk->bp = NULL;
                 } else {
                         blk->disk_blkno = 0;
@@ -1429,6 +1419,8 @@ xfs_attr_refillstate(xfs_da_state_t *state)
         xfs_da_state_blk_t *blk;
         int level, error;
  
+       trace_xfs_attr_refillstate(state->args);
+
         /*
          * Roll down the "path" in the state structure, storing the on-disk
          * block number for those buffers in the "path".
@@ -1437,7 +1429,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
                 if (blk->disk_blkno) {
-                       error = xfs_da_read_buf(state->args->trans,
+                       error = xfs_da_node_read(state->args->trans,
                                                 state->args->dp,
                                                 blk->blkno, blk->disk_blkno,
                                                 &blk->bp, XFS_ATTR_FORK);
@@ -1456,7 +1448,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
                 if (blk->disk_blkno) {
-                       error = xfs_da_read_buf(state->args->trans,
+                       error = xfs_da_node_read(state->args->trans,
                                                 state->args->dp,
                                                 blk->blkno, blk->disk_blkno,
                                                 &blk->bp, XFS_ATTR_FORK);
@@ -1485,6 +1477,8 @@ xfs_attr_node_get(xfs_da_args_t *args)
         int error, retval;
         int i;
  
+       trace_xfs_attr_node_get(args);
+
         state = xfs_da_state_alloc();
         state->args = args;
         state->mp = args->dp->i_mount;
@@ -1516,7 +1510,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
          * If not in a transaction, we have to release all the buffers.
          */
         for (i = 0; i < state->path.active; i++) {
-               xfs_da_brelse(args->trans, state->path.blk[i].bp);
+               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
                 state->path.blk[i].bp = NULL;
         }
  
@@ -1543,6 +1537,8 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
         int nmap, error, tmp, valuelen, blkcnt, i;
         xfs_dablk_t lblkno;
  
+       trace_xfs_attr_rmtval_get(args);
+
         ASSERT(!(args->flags & ATTR_KERNOVAL));
  
         mp = args->dp->i_mount;
@@ -1551,10 +1547,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
         lblkno = args->rmtblkno;
         while (valuelen > 0) {
                 nmap = ATTR_RMTVALUE_MAPSIZE;
-               error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
-                                 args->rmtblkcnt,
-                                 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-                                 NULL, 0, map, &nmap, NULL);
+               error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
+                                      args->rmtblkcnt, map, &nmap,
+                                      XFS_BMAPI_ATTRFORK);
                 if (error)
                         return(error);
                 ASSERT(nmap >= 1);
@@ -1564,14 +1559,12 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
                                (map[i].br_startblock != HOLESTARTBLOCK));
                         dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
                         blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-                       error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
-                                            blkcnt, XBF_LOCK | XBF_DONT_BLOCK,
-                                            &bp);
+                       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+                                                  dblkno, blkcnt, 0, &bp, NULL);
                         if (error)
                                 return(error);
  
-                       tmp = (valuelen < XFS_BUF_SIZE(bp))
-                               ? valuelen : XFS_BUF_SIZE(bp);
+                       tmp = min_t(int, valuelen, BBTOB(bp->b_length));
                         xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
                         xfs_buf_relse(bp);
                         dst += tmp;
@@ -1601,6 +1594,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
         xfs_dablk_t lblkno;
         int blkcnt, valuelen, nmap, error, tmp, committed;
  
+       trace_xfs_attr_rmtval_set(args);
+
         dp = args->dp;
         mp = dp->i_mount;
         src = args->value;
@@ -1628,10 +1623,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                  */
                 xfs_bmap_init(args->flist, args->firstblock);
                 nmap = 1;
-               error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
+               error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
                                   blkcnt,
-                                 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
-                                                       XFS_BMAPI_WRITE,
+                                 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
                                   args->firstblock, args->total, &map, &nmap,
                                   args->flist);
                 if (!error) {
@@ -1649,10 +1643,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                  * bmap_finish() may have committed the last trans and started
                  * a new one.  We need the inode to be in all transactions.
                  */
-               if (committed) {
-                       xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
-                       xfs_trans_ihold(args->trans, dp);
-               }
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
  
                 ASSERT(nmap == 1);
                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -1677,19 +1669,18 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
         lblkno = args->rmtblkno;
         valuelen = args->valuelen;
         while (valuelen > 0) {
+               int buflen;
+
                 /*
                  * Try to remember where we decided to put the value.
                  */
                 xfs_bmap_init(args->flist, args->firstblock);
                 nmap = 1;
-               error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
-                                 args->rmtblkcnt,
-                                 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-                                 args->firstblock, 0, &map, &nmap,
-                                 NULL);
-               if (error) {
+               error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
+                                      args->rmtblkcnt, &map, &nmap,
+                                      XFS_BMAPI_ATTRFORK);
+               if (error)
                         return(error);
-               }
                 ASSERT(nmap == 1);
                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
                        (map.br_startblock != HOLESTARTBLOCK));
@@ -1697,19 +1688,20 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
  
-               bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
-                                XBF_LOCK | XBF_DONT_BLOCK);
-               ASSERT(bp);
-               ASSERT(!XFS_BUF_GETERROR(bp));
+               bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
+               if (!bp)
+                       return ENOMEM;
  
-               tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
-                                                       XFS_BUF_SIZE(bp);
+               buflen = BBTOB(bp->b_length);
+               tmp = min_t(int, valuelen, buflen);
                 xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
-               if (tmp < XFS_BUF_SIZE(bp))
-                       xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
-               if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
-                       return (error);
-               }
+               if (tmp < buflen)
+                       xfs_buf_zero(bp, tmp, buflen - tmp);
+
+               error = xfs_bwrite(mp, bp);     /* GROT: NOTE: synchronous write */
+               xfs_buf_relse(bp);
+               if (error)
+                       return error;
                 src += tmp;
                 valuelen -= tmp;
  
@@ -1733,6 +1725,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
         xfs_dablk_t lblkno;
         int valuelen, blkcnt, nmap, error, done, committed;
  
+       trace_xfs_attr_rmtval_remove(args);
+
         mp = args->dp->i_mount;
  
         /*
@@ -1745,16 +1739,12 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
                 /*
                  * Try to remember where we decided to put the value.
                  */
-               xfs_bmap_init(args->flist, args->firstblock);
                 nmap = 1;
-               error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
-                                       args->rmtblkcnt,
-                                       XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-                                       args->firstblock, 0, &map, &nmap,
-                                       args->flist);
-               if (error) {
+               error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
+                                      args->rmtblkcnt, &map, &nmap,
+                                      XFS_BMAPI_ATTRFORK);
+               if (error)
                         return(error);
-               }
                 ASSERT(nmap == 1);
                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
                        (map.br_startblock != HOLESTARTBLOCK));
@@ -1767,8 +1757,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
                  */
                 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
                 if (bp) {
-                       XFS_BUF_STALE(bp);
-                       XFS_BUF_UNDELAYWRITE(bp);
+                       xfs_buf_stale(bp);
                         xfs_buf_relse(bp);
                         bp = NULL;
                 }
@@ -1805,10 +1794,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
                  * bmap_finish() may have committed the last trans and started
                  * a new one.  We need the inode to be in all transactions.
                  */
-               if (committed) {
-                       xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL);
-                       xfs_trans_ihold(args->trans, args->dp);
-               }
+               if (committed)
+                       xfs_trans_ijoin(args->trans, args->dp, 0);
  
                 /*
                  * Close out trans and start the next one in the chain.
diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c

index d1f7a2097761224240d2fe7f08232b373628d365..824c122956a11e6b56e197ff8d6b43cfe3306ad6 100644 (file)
--- a/libxfs/xfs_attr_leaf.c
+++ b/libxfs/xfs_attr_leaf.c
@@ -32,10 +32,11 @@
   * Routines used for growing the Btree.
   */
  STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block,
-                                   xfs_dabuf_t **bpp);
-STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
-                                             int freemap_index);
-STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer);
+                               struct xfs_buf **bpp);
+STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer,
+                                 xfs_da_args_t *args, int freemap_index);
+STATIC void xfs_attr_leaf_compact(struct xfs_da_args *args,
+                                 struct xfs_buf *leaf_buffer);
  STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state,
                                                    xfs_da_state_blk_t *blk1,
                                                    xfs_da_state_blk_t *blk2);
@@ -56,6 +57,52 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf,
                                          xfs_mount_t *mp);
  STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
  
+static void
+xfs_attr_leaf_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_attr_leaf_hdr *hdr = bp->b_addr;
+       int                     block_ok = 0;
+
+       block_ok = hdr->info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
+       if (!block_ok) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+static void
+xfs_attr_leaf_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_attr_leaf_verify(bp);
+}
+
+static void
+xfs_attr_leaf_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_attr_leaf_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_attr_leaf_buf_ops = {
+       .verify_read = xfs_attr_leaf_read_verify,
+       .verify_write = xfs_attr_leaf_write_verify,
+};
+
+int
+xfs_attr_leaf_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp)
+{
+       return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
+                               XFS_ATTR_FORK, &xfs_attr_leaf_buf_ops);
+}
+
  /*========================================================================
   * Namespace helper routines
   *========================================================================*/
@@ -78,6 +125,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
  /*
   * Query whether the requested number of additional bytes of extended
   * attribute space will be able to fit inline.
+ *
   * Returns zero if not, else the di_forkoff fork offset to be used in the
   * literal area for attribute data once the new bytes have been added.
   *
@@ -90,7 +138,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
         int offset;
         int minforkoff; /* lower limit on valid forkoff locations */
         int maxforkoff; /* upper limit on valid forkoff locations */
-       int dsize;      
+       int dsize;
         xfs_mount_t *mp = dp->i_mount;
  
         offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
@@ -104,47 +152,60 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
                 return (offset >= minforkoff) ? minforkoff : 0;
         }
  
-       if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
-               if (bytes <= XFS_IFORK_ASIZE(dp))
-                       return dp->i_d.di_forkoff;
+       /*
+        * If the requested numbers of bytes is smaller or equal to the
+        * current attribute fork size we can always proceed.
+        *
+        * Note that if_bytes in the data fork might actually be larger than
+        * the current data fork size is due to delalloc extents. In that
+        * case either the extent count will go down when they are converted
+        * to real extents, or the delalloc conversion will take care of the
+        * literal area rebalancing.
+        */
+       if (bytes <= XFS_IFORK_ASIZE(dp))
+               return dp->i_d.di_forkoff;
+
+       /*
+        * For attr2 we can try to move the forkoff if there is space in the
+        * literal area, but for the old format we are done if there is no
+        * space in the fixed attribute fork.
+        */
+       if (!(mp->m_flags & XFS_MOUNT_ATTR2))
                 return 0;
-       }
  
         dsize = dp->i_df.if_bytes;
-       
+
         switch (dp->i_d.di_format) {
         case XFS_DINODE_FMT_EXTENTS:
-               /* 
+               /*
                  * If there is no attr fork and the data fork is extents, 
-                * determine if creating the default attr fork will result 
-                * in the extents form migrating to btree. If so, the 
-                * minimum offset only needs to be the space required for 
+                * determine if creating the default attr fork will result
+                * in the extents form migrating to btree. If so, the
+                * minimum offset only needs to be the space required for
                  * the btree root.
-                */ 
+                */
                 if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
                     xfs_default_attroffset(dp))
                         dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
                 break;
-               
         case XFS_DINODE_FMT_BTREE:
                 /*
-                * If have data btree then keep forkoff if we have one,
-                * otherwise we are adding a new attr, so then we set 
-                * minforkoff to where the btree root can finish so we have 
+                * If we have a data btree then keep forkoff if we have one,
+                * otherwise we are adding a new attr, so then we set
+                * minforkoff to where the btree root can finish so we have
                  * plenty of room for attrs
                  */
                 if (dp->i_d.di_forkoff) {
-                       if (offset < dp->i_d.di_forkoff) 
+                       if (offset < dp->i_d.di_forkoff)
                                 return 0;
-                       else 
-                               return dp->i_d.di_forkoff;
-               } else
-                       dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
+                       return dp->i_d.di_forkoff;
+               }
+               dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
                 break;
         }
-       
-       /* 
-        * A data fork btree root must have space for at least 
+
+       /*
+        * A data fork btree root must have space for at least
          * MINDBTPTRS key/ptr pairs if the data fork is small or empty.
          */
         minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
@@ -154,10 +215,10 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
         maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
         maxforkoff = maxforkoff >> 3;   /* rounded down */
  
-       if (offset >= minforkoff && offset < maxforkoff)
-               return offset;
         if (offset >= maxforkoff)
                 return maxforkoff;
+       if (offset >= minforkoff)
+               return offset;
         return 0;
  }
  
@@ -189,6 +250,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
         xfs_inode_t *dp;
         xfs_ifork_t *ifp;
  
+       trace_xfs_attr_sf_create(args);
+
         dp = args->dp;
         ASSERT(dp != NULL);
         ifp = dp->i_afp;
@@ -222,13 +285,11 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
         xfs_inode_t *dp;
         xfs_ifork_t *ifp;
  
+       trace_xfs_attr_sf_add(args);
+
         dp = args->dp;
         mp = dp->i_mount;
         dp->i_d.di_forkoff = forkoff;
-       dp->i_df.if_ext_max =
-               XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
-       dp->i_afp->if_ext_max =
-               XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
  
         ifp = dp->i_afp;
         ASSERT(ifp->if_flags & XFS_IFINLINE);
@@ -280,7 +341,6 @@ xfs_attr_fork_reset(
         ASSERT(ip->i_d.di_anextents == 0);
         ASSERT(ip->i_afp == NULL);
  
-       ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  }
  
@@ -296,6 +356,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
         xfs_mount_t *mp;
         xfs_inode_t *dp;
  
+       trace_xfs_attr_sf_remove(args);
+
         dp = args->dp;
         mp = dp->i_mount;
         base = sizeof(xfs_attr_sf_hdr_t);
@@ -343,10 +405,6 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
                                 (args->op_flags & XFS_DA_OP_ADDNAME) ||
                                 !(mp->m_flags & XFS_MOUNT_ATTR2) ||
                                 dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
-               dp->i_afp->if_ext_max =
-                       XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
-               dp->i_df.if_ext_max =
-                       XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
                 xfs_trans_log_inode(args->trans, dp,
                                         XFS_ILOG_CORE | XFS_ILOG_ADATA);
         }
@@ -368,6 +426,8 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
         int i;
         xfs_ifork_t *ifp;
  
+       trace_xfs_attr_sf_lookup(args);
+
         ifp = args->dp->i_afp;
         ASSERT(ifp->if_flags & XFS_IFINLINE);
         sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
@@ -436,9 +496,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
         char *tmpbuffer;
         int error, i, size;
         xfs_dablk_t blkno;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         xfs_ifork_t *ifp;
  
+       trace_xfs_attr_sf_to_leaf(args);
+
         dp = args->dp;
         ifp = dp->i_afp;
         sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
@@ -504,8 +566,6 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
         error = 0;
  
  out:
-       if(bp)
-               xfs_da_buf_done(bp);
         kmem_free(tmpbuffer);
         return(error);
  }
@@ -515,15 +575,17 @@ out:
   * a shortform attribute list.
   */
  int
-xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
+xfs_attr_shortform_allfit(
+       struct xfs_buf  *bp,
+       struct xfs_inode *dp)
  {
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_entry_t *entry;
         xfs_attr_leaf_name_local_t *name_loc;
         int bytes, i;
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
  
         entry = &leaf->entries[0];
         bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -552,7 +614,10 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
   * Convert a leaf attribute list to shortform attribute list
   */
  int
-xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
+xfs_attr_leaf_to_shortform(
+       struct xfs_buf  *bp,
+       xfs_da_args_t   *args,
+       int             forkoff)
  {
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_entry_t *entry;
@@ -562,15 +627,17 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
         char *tmpbuffer;
         int error, i;
  
+       trace_xfs_attr_leaf_to_sf(args);
+
         dp = args->dp;
         tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
         ASSERT(tmpbuffer != NULL);
  
         ASSERT(bp != NULL);
-       memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
+       memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount));
         leaf = (xfs_attr_leafblock_t *)tmpbuffer;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-       memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+       memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount));
  
         /*
          * Clean out the prior contents of the attribute list.
@@ -631,30 +698,30 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
         xfs_attr_leafblock_t *leaf;
         xfs_da_intnode_t *node;
         xfs_inode_t *dp;
-       xfs_dabuf_t *bp1, *bp2;
+       struct xfs_buf *bp1, *bp2;
         xfs_dablk_t blkno;
         int error;
  
+       trace_xfs_attr_leaf_to_node(args);
+
         dp = args->dp;
         bp1 = bp2 = NULL;
         error = xfs_da_grow_inode(args, &blkno);
         if (error)
                 goto out;
-       error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
-                                            XFS_ATTR_FORK);
+       error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp1);
         if (error)
                 goto out;
-       ASSERT(bp1 != NULL);
+
         bp2 = NULL;
         error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp2,
                                             XFS_ATTR_FORK);
         if (error)
                 goto out;
-       ASSERT(bp2 != NULL);
-       memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
-       xfs_da_buf_done(bp1);
+       bp2->b_ops = bp1->b_ops;
+       memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount));
         bp1 = NULL;
-       xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
+       xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
  
         /*
          * Set up the new root node.
@@ -662,21 +729,17 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
         error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
         if (error)
                 goto out;
-       node = bp1->data;
-       leaf = bp2->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       node = bp1->b_addr;
+       leaf = bp2->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         /* both on-disk, don't endian-flip twice */
         node->btree[0].hashval =
                 leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
         node->btree[0].before = cpu_to_be32(blkno);
         node->hdr.count = cpu_to_be16(1);
-       xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
+       xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
         error = 0;
  out:
-       if (bp1)
-               xfs_da_buf_done(bp1);
-       if (bp2)
-               xfs_da_buf_done(bp2);
         return(error);
  }
  
@@ -690,22 +753,27 @@ out:
   * or a leaf in a node attribute list.
   */
  STATIC int
-xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
+xfs_attr_leaf_create(
+       xfs_da_args_t   *args,
+       xfs_dablk_t     blkno,
+       struct xfs_buf  **bpp)
  {
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_hdr_t *hdr;
         xfs_inode_t *dp;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error;
  
+       trace_xfs_attr_leaf_create(args);
+
         dp = args->dp;
         ASSERT(dp != NULL);
         error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
                                             XFS_ATTR_FORK);
         if (error)
                 return(error);
-       ASSERT(bp != NULL);
-       leaf = bp->data;
+       bp->b_ops = &xfs_attr_leaf_buf_ops;
+       leaf = bp->b_addr;
         memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
         hdr = &leaf->hdr;
         hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
@@ -719,7 +787,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
         hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) -
                                            sizeof(xfs_attr_leaf_hdr_t));
  
-       xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
+       xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
  
         *bpp = bp;
         return(0);
@@ -735,6 +803,8 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
         xfs_dablk_t blkno;
         int error;
  
+       trace_xfs_attr_leaf_split(state->args);
+
         /*
          * Allocate space for a new leaf node.
          */
@@ -764,10 +834,13 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
          *
          * Insert the "new" entry in the correct block.
          */
-       if (state->inleaf)
+       if (state->inleaf) {
+               trace_xfs_attr_leaf_add_old(state->args);
                 error = xfs_attr_leaf_add(oldblk->bp, state->args);
-       else
+       } else {
+               trace_xfs_attr_leaf_add_new(state->args);
                 error = xfs_attr_leaf_add(newblk->bp, state->args);
+       }
  
         /*
          * Update last hashval in each block since we added the name.
@@ -781,15 +854,19 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
   * Add a name to the leaf attribute list structure.
   */
  int
-xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_add(
+       struct xfs_buf          *bp,
+       struct xfs_da_args      *args)
  {
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_hdr_t *hdr;
         xfs_attr_leaf_map_t *map;
         int tablesize, entsize, sum, tmp, i;
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       trace_xfs_attr_leaf_add(args);
+
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         ASSERT((args->index >= 0)
                 && (args->index <= be16_to_cpu(leaf->hdr.count)));
         hdr = &leaf->hdr;
@@ -833,7 +910,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
          * Compact the entries to coalesce free space.
          * This may change the hdr->count via dropping INCOMPLETE entries.
          */
-       xfs_attr_leaf_compact(args->trans, bp);
+       xfs_attr_leaf_compact(args, bp);
  
         /*
          * After compaction, the block is guaranteed to have only one
@@ -850,7 +927,10 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
   * Add a name to a leaf attribute list structure.
   */
  STATIC int
-xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
+xfs_attr_leaf_add_work(
+       struct xfs_buf  *bp,
+       xfs_da_args_t   *args,
+       int             mapindex)
  {
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_hdr_t *hdr;
@@ -861,8 +941,10 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
         xfs_mount_t *mp;
         int tmp, i;
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       trace_xfs_attr_leaf_add_work(args);
+
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         hdr = &leaf->hdr;
         ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
         ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
@@ -875,7 +957,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
                 tmp  = be16_to_cpu(hdr->count) - args->index;
                 tmp *= sizeof(xfs_attr_leaf_entry_t);
                 memmove((char *)(entry+1), (char *)entry, tmp);
-               xfs_da_log_buf(args->trans, bp,
+               xfs_trans_log_buf(args->trans, bp,
                     XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
         }
         be16_add_cpu(&hdr->count, 1);
@@ -907,7 +989,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
                         args->index2++;
                 }
         }
-       xfs_da_log_buf(args->trans, bp,
+       xfs_trans_log_buf(args->trans, bp,
                           XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
         ASSERT((args->index == 0) ||
                (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
@@ -915,8 +997,6 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
                (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
  
         /*
-        * Copy the attribute name and value into the new space.
-        *
          * For "remote" attribute values, simply note that we need to
          * allocate space for the "remote" value.  We can't actually
          * allocate the extents in this transaction, and we can't decide
@@ -941,7 +1021,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
                 args->rmtblkno = 1;
                 args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
         }
-       xfs_da_log_buf(args->trans, bp,
+       xfs_trans_log_buf(args->trans, bp,
              XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index),
                                    xfs_attr_leaf_entsize(leaf, args->index)));
  
@@ -965,7 +1045,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
                 }
         }
         be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index));
-       xfs_da_log_buf(args->trans, bp,
+       xfs_trans_log_buf(args->trans, bp,
                 XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
         return(0);
  }
@@ -974,24 +1054,28 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
   * Garbage collect a leaf attribute list block by copying it to a new buffer.
   */
  STATIC void
-xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
+xfs_attr_leaf_compact(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp)
  {
-       xfs_attr_leafblock_t *leaf_s, *leaf_d;
-       xfs_attr_leaf_hdr_t *hdr_s, *hdr_d;
-       xfs_mount_t *mp;
-       char *tmpbuffer;
+       xfs_attr_leafblock_t    *leaf_s, *leaf_d;
+       xfs_attr_leaf_hdr_t     *hdr_s, *hdr_d;
+       struct xfs_trans        *trans = args->trans;
+       struct xfs_mount        *mp = trans->t_mountp;
+       char                    *tmpbuffer;
+
+       trace_xfs_attr_leaf_compact(args);
  
-       mp = trans->t_mountp;
         tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
         ASSERT(tmpbuffer != NULL);
-       memcpy(tmpbuffer, bp->data, XFS_LBSIZE(mp));
-       memset(bp->data, 0, XFS_LBSIZE(mp));
+       memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
+       memset(bp->b_addr, 0, XFS_LBSIZE(mp));
  
         /*
          * Copy basic information
          */
         leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
-       leaf_d = bp->data;
+       leaf_d = bp->b_addr;
         hdr_s = &leaf_s->hdr;
         hdr_d = &leaf_d->hdr;
         hdr_d->info = hdr_s->info;      /* struct copy */
@@ -1014,7 +1098,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
          */
         xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0,
                                 be16_to_cpu(hdr_s->count), mp);
-       xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
+       xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
  
         kmem_free(tmpbuffer);
  }
@@ -1046,12 +1130,15 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
          */
         ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC);
         ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
-       leaf1 = blk1->bp->data;
-       leaf2 = blk2->bp->data;
-       ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-       ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       leaf1 = blk1->bp->b_addr;
+       leaf2 = blk2->bp->b_addr;
+       ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+       ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+       ASSERT(leaf2->hdr.count == 0);
         args = state->args;
  
+       trace_xfs_attr_leaf_rebalance(args);
+
         /*
          * Check ordering of blocks, reverse if it makes things simpler.
          *
@@ -1063,8 +1150,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 tmp_blk = blk1;
                 blk1 = blk2;
                 blk2 = tmp_blk;
-               leaf1 = blk1->bp->data;
-               leaf2 = blk2->bp->data;
+               leaf1 = blk1->bp->b_addr;
+               leaf2 = blk2->bp->b_addr;
                 swap = 1;
         }
         hdr1 = &leaf1->hdr;
@@ -1101,9 +1188,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 max  = be16_to_cpu(hdr2->firstused)
                                                 - sizeof(xfs_attr_leaf_hdr_t);
                 max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t);
-               if (space > max) {
-                       xfs_attr_leaf_compact(args->trans, blk2->bp);
-               }
+               if (space > max)
+                       xfs_attr_leaf_compact(args, blk2->bp);
  
                 /*
                  * Move high entries from leaf1 to low end of leaf2.
@@ -1111,13 +1197,14 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count,
                                 leaf2, 0, count, state->mp);
  
-               xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
-               xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+               xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+               xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
         } else if (count > be16_to_cpu(hdr1->count)) {
                 /*
                  * I assert that since all callers pass in an empty
                  * second buffer, this code should never execute.
                  */
+               ASSERT(0);
  
                 /*
                  * Figure the total bytes to be added to the destination leaf.
@@ -1133,9 +1220,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 max  = be16_to_cpu(hdr1->firstused)
                                                 - sizeof(xfs_attr_leaf_hdr_t);
                 max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t);
-               if (space > max) {
-                       xfs_attr_leaf_compact(args->trans, blk1->bp);
-               }
+               if (space > max)
+                       xfs_attr_leaf_compact(args, blk1->bp);
  
                 /*
                  * Move low entries from leaf2 to high end of leaf1.
@@ -1143,8 +1229,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 xfs_attr_leaf_moveents(leaf2, 0, leaf1,
                                 be16_to_cpu(hdr1->count), count, state->mp);
  
-               xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
-               xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+               xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+               xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
         }
  
         /*
@@ -1179,10 +1265,24 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                         args->index2 = 0;
                         args->blkno2 = blk2->blkno;
                 } else {
+                       /*
+                        * On a double leaf split, the original attr location
+                        * is already stored in blkno2/index2, so don't
+                        * overwrite it overwise we corrupt the tree.
+                        */
                         blk2->index = blk1->index
                                     - be16_to_cpu(leaf1->hdr.count);
-                       args->index = args->index2 = blk2->index;
-                       args->blkno = args->blkno2 = blk2->blkno;
+                       args->index = blk2->index;
+                       args->blkno = blk2->blkno;
+                       if (!state->extravalid) {
+                               /*
+                                * set the new attr location to match the old
+                                * one and let the higher level split code
+                                * decide where in the leaf to place it.
+                                */
+                               args->index2 = blk2->index;
+                               args->blkno2 = blk2->blkno;
+                       }
                 }
         } else {
                 ASSERT(state->inleaf == 1);
@@ -1213,8 +1313,8 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
         /*
          * Set up environment.
          */
-       leaf1 = blk1->bp->data;
-       leaf2 = blk2->bp->data;
+       leaf1 = blk1->bp->b_addr;
+       leaf2 = blk2->bp->b_addr;
         hdr1 = &leaf1->hdr;
         hdr2 = &leaf2->hdr;
         foundit = 0;
@@ -1316,7 +1416,9 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
         xfs_da_blkinfo_t *info;
         int count, bytes, forward, error, retval, i;
         xfs_dablk_t blkno;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
+
+       trace_xfs_attr_leaf_toosmall(state->args);
  
         /*
          * Check for the degenerate case of the block being over 50% full.
@@ -1324,8 +1426,8 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
          * to coalesce with a sibling.
          */
         blk = &state->path.blk[ state->path.active-1 ];
-       info = blk->bp->data;
-       ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+       info = blk->bp->b_addr;
+       ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         leaf = (xfs_attr_leafblock_t *)info;
         count = be16_to_cpu(leaf->hdr.count);
         bytes = sizeof(xfs_attr_leaf_hdr_t) +
@@ -1377,23 +1479,21 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
                         blkno = be32_to_cpu(info->back);
                 if (blkno == 0)
                         continue;
-               error = xfs_da_read_buf(state->args->trans, state->args->dp,
-                                       blkno, -1, &bp, XFS_ATTR_FORK);
+               error = xfs_attr_leaf_read(state->args->trans, state->args->dp,
+                                       blkno, -1, &bp);
                 if (error)
                         return(error);
-               ASSERT(bp != NULL);
  
                 leaf = (xfs_attr_leafblock_t *)info;
                 count  = be16_to_cpu(leaf->hdr.count);
                 bytes  = state->blocksize - (state->blocksize>>2);
                 bytes -= be16_to_cpu(leaf->hdr.usedbytes);
-               leaf = bp->data;
-               ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+               leaf = bp->b_addr;
                 count += be16_to_cpu(leaf->hdr.count);
                 bytes -= be16_to_cpu(leaf->hdr.usedbytes);
                 bytes -= count * sizeof(xfs_attr_leaf_entry_t);
                 bytes -= sizeof(xfs_attr_leaf_hdr_t);
-               xfs_da_brelse(state->args->trans, bp);
+               xfs_trans_brelse(state->args->trans, bp);
                 if (bytes >= 0)
                         break;  /* fits with at least 25% to spare */
         }
@@ -1431,7 +1531,9 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
   * If two leaves are 37% full, when combined they will leave 25% free.
   */
  int
-xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_remove(
+       struct xfs_buf  *bp,
+       xfs_da_args_t   *args)
  {
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_hdr_t *hdr;
@@ -1441,8 +1543,10 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
         int tablesize, tmp, i;
         xfs_mount_t *mp;
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       trace_xfs_attr_leaf_remove(args);
+
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         hdr = &leaf->hdr;
         mp = args->trans->t_mountp;
         ASSERT((be16_to_cpu(hdr->count) > 0)
@@ -1534,7 +1638,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
          */
         memset(xfs_attr_leaf_name(leaf, args->index), 0, entsize);
         be16_add_cpu(&hdr->usedbytes, -entsize);
-       xfs_da_log_buf(args->trans, bp,
+       xfs_trans_log_buf(args->trans, bp,
              XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index),
                                    entsize));
  
@@ -1542,7 +1646,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
                                         * sizeof(xfs_attr_leaf_entry_t);
         memmove((char *)entry, (char *)(entry+1), tmp);
         be16_add_cpu(&hdr->count, -1);
-       xfs_da_log_buf(args->trans, bp,
+       xfs_trans_log_buf(args->trans, bp,
             XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
         entry = &leaf->entries[be16_to_cpu(hdr->count)];
         memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t));
@@ -1572,7 +1676,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
         } else {
                 hdr->holes = 1;         /* mark as needing compaction */
         }
-       xfs_da_log_buf(args->trans, bp,
+       xfs_trans_log_buf(args->trans, bp,
                           XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
  
         /*
@@ -1597,16 +1701,18 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
         xfs_mount_t *mp;
         char *tmpbuffer;
  
+       trace_xfs_attr_leaf_unbalance(state->args);
+
         /*
          * Set up environment.
          */
         mp = state->mp;
         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC);
         ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
-       drop_leaf = drop_blk->bp->data;
-       save_leaf = save_blk->bp->data;
-       ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-       ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       drop_leaf = drop_blk->bp->b_addr;
+       save_leaf = save_blk->bp->b_addr;
+       ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+       ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         drop_hdr = &drop_leaf->hdr;
         save_hdr = &save_leaf->hdr;
  
@@ -1669,7 +1775,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
                 kmem_free(tmpbuffer);
         }
  
-       xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
+       xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
                                            state->blocksize - 1);
  
         /*
@@ -1697,7 +1803,9 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
   * Don't change the args->value unless we find the attribute.
   */
  int
-xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_lookup_int(
+       struct xfs_buf  *bp,
+       xfs_da_args_t   *args)
  {
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_entry_t *entry;
@@ -1706,8 +1814,10 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
         int probe, span;
         xfs_dahash_t hashval;
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       trace_xfs_attr_leaf_lookup(args);
+
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         ASSERT(be16_to_cpu(leaf->hdr.count)
                                         < (XFS_LBSIZE(args->dp->i_mount)/8));
  
@@ -1802,7 +1912,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
   * list structure.
   */
  int
-xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_getvalue(
+       struct xfs_buf  *bp,
+       xfs_da_args_t   *args)
  {
         int valuelen;
         xfs_attr_leafblock_t *leaf;
@@ -1810,8 +1922,8 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
         xfs_attr_leaf_name_local_t *name_loc;
         xfs_attr_leaf_name_remote_t *name_rmt;
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         ASSERT(be16_to_cpu(leaf->hdr.count)
                                         < (XFS_LBSIZE(args->dp->i_mount)/8));
         ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
@@ -1879,8 +1991,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
         /*
          * Set up environment.
          */
-       ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
-       ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+       ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         hdr_s = &leaf_s->hdr;
         hdr_d = &leaf_d->hdr;
         ASSERT((be16_to_cpu(hdr_s->count) > 0) &&
@@ -2008,14 +2120,16 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
   * Return 0 unless leaf2 should go before leaf1.
   */
  int
-xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
+xfs_attr_leaf_order(
+       struct xfs_buf  *leaf1_bp,
+       struct xfs_buf  *leaf2_bp)
  {
         xfs_attr_leafblock_t *leaf1, *leaf2;
  
-       leaf1 = leaf1_bp->data;
-       leaf2 = leaf2_bp->data;
-       ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
-              (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
+       leaf1 = leaf1_bp->b_addr;
+       leaf2 = leaf2_bp->b_addr;
+       ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
+              (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
         if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
             (be16_to_cpu(leaf2->hdr.count) > 0) &&
             ((be32_to_cpu(leaf2->entries[0].hashval) <
@@ -2033,12 +2147,14 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
   * Pick up the last hashvalue from a leaf block.
   */
  xfs_dahash_t
-xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
+xfs_attr_leaf_lasthash(
+       struct xfs_buf  *bp,
+       int             *count)
  {
         xfs_attr_leafblock_t *leaf;
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         if (count)
                 *count = be16_to_cpu(leaf->hdr.count);
         if (!leaf->hdr.count)
@@ -2057,7 +2173,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
         xfs_attr_leaf_name_remote_t *name_rmt;
         int size;
  
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
         if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
                 name_loc = xfs_attr_leaf_name_local(leaf, index);
                 size = xfs_attr_leaf_entsize_local(name_loc->namelen,
@@ -2107,7 +2223,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_entry_t *entry;
         xfs_attr_leaf_name_remote_t *name_rmt;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error;
  #ifdef DEBUG
         xfs_attr_leaf_name_local_t *name_loc;
@@ -2115,18 +2231,15 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
         char *name;
  #endif /* DEBUG */
  
+       trace_xfs_attr_leaf_clearflag(args);
         /*
          * Set up the operation.
          */
-       error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
-                                            XFS_ATTR_FORK);
-       if (error) {
+       error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+       if (error)
                 return(error);
-       }
-       ASSERT(bp != NULL);
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       leaf = bp->b_addr;
         ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
         ASSERT(args->index >= 0);
         entry = &leaf->entries[ args->index ];
@@ -2148,7 +2261,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
  #endif /* DEBUG */
  
         entry->flags &= ~XFS_ATTR_INCOMPLETE;
-       xfs_da_log_buf(args->trans, bp,
+       xfs_trans_log_buf(args->trans, bp,
                          XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
  
         if (args->rmtblkno) {
@@ -2156,10 +2269,9 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
                 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index);
                 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
                 name_rmt->valuelen = cpu_to_be32(args->valuelen);
-               xfs_da_log_buf(args->trans, bp,
+               xfs_trans_log_buf(args->trans, bp,
                          XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
         }
-       xfs_da_buf_done(bp);
  
         /*
          * Commit the flag value change and start the next trans in series.
@@ -2176,37 +2288,34 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
         xfs_attr_leafblock_t *leaf;
         xfs_attr_leaf_entry_t *entry;
         xfs_attr_leaf_name_remote_t *name_rmt;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error;
  
+       trace_xfs_attr_leaf_setflag(args);
+
         /*
          * Set up the operation.
          */
-       error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
-                                            XFS_ATTR_FORK);
-       if (error) {
+       error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+       if (error)
                 return(error);
-       }
-       ASSERT(bp != NULL);
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       leaf = bp->b_addr;
         ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
         ASSERT(args->index >= 0);
         entry = &leaf->entries[ args->index ];
  
         ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0);
         entry->flags |= XFS_ATTR_INCOMPLETE;
-       xfs_da_log_buf(args->trans, bp,
+       xfs_trans_log_buf(args->trans, bp,
                         XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
         if ((entry->flags & XFS_ATTR_LOCAL) == 0) {
                 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index);
                 name_rmt->valueblk = 0;
                 name_rmt->valuelen = 0;
-               xfs_da_log_buf(args->trans, bp,
+               xfs_trans_log_buf(args->trans, bp,
                          XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
         }
-       xfs_da_buf_done(bp);
  
         /*
          * Commit the flag value change and start the next trans in series.
@@ -2227,7 +2336,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
         xfs_attr_leafblock_t *leaf1, *leaf2;
         xfs_attr_leaf_entry_t *entry1, *entry2;
         xfs_attr_leaf_name_remote_t *name_rmt;
-       xfs_dabuf_t *bp1, *bp2;
+       struct xfs_buf *bp1, *bp2;
         int error;
  #ifdef DEBUG
         xfs_attr_leaf_name_local_t *name_loc;
@@ -2235,38 +2344,33 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
         char *name1, *name2;
  #endif /* DEBUG */
  
+       trace_xfs_attr_leaf_flipflags(args);
+
         /*
          * Read the block containing the "old" attr
          */
-       error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp1,
-                                            XFS_ATTR_FORK);
-       if (error) {
-               return(error);
-       }
-       ASSERT(bp1 != NULL);
+       error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1);
+       if (error)
+               return error;
  
         /*
          * Read the block containing the "new" attr, if it is different
          */
         if (args->blkno2 != args->blkno) {
-               error = xfs_da_read_buf(args->trans, args->dp, args->blkno2,
-                                       -1, &bp2, XFS_ATTR_FORK);
-               if (error) {
-                       return(error);
-               }
-               ASSERT(bp2 != NULL);
+               error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno2,
+                                          -1, &bp2);
+               if (error)
+                       return error;
         } else {
                 bp2 = bp1;
         }
  
-       leaf1 = bp1->data;
-       ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       leaf1 = bp1->b_addr;
         ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
         ASSERT(args->index >= 0);
         entry1 = &leaf1->entries[ args->index ];
  
-       leaf2 = bp2->data;
-       ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+       leaf2 = bp2->b_addr;
         ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
         ASSERT(args->index2 >= 0);
         entry2 = &leaf2->entries[ args->index2 ];
@@ -2299,30 +2403,27 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
         ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0);
  
         entry1->flags &= ~XFS_ATTR_INCOMPLETE;
-       xfs_da_log_buf(args->trans, bp1,
+       xfs_trans_log_buf(args->trans, bp1,
                           XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1)));
         if (args->rmtblkno) {
                 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
                 name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index);
                 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
                 name_rmt->valuelen = cpu_to_be32(args->valuelen);
-               xfs_da_log_buf(args->trans, bp1,
+               xfs_trans_log_buf(args->trans, bp1,
                          XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
         }
  
         entry2->flags |= XFS_ATTR_INCOMPLETE;
-       xfs_da_log_buf(args->trans, bp2,
+       xfs_trans_log_buf(args->trans, bp2,
                           XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2)));
         if ((entry2->flags & XFS_ATTR_LOCAL) == 0) {
                 name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2);
                 name_rmt->valueblk = 0;
                 name_rmt->valuelen = 0;
-               xfs_da_log_buf(args->trans, bp2,
+               xfs_trans_log_buf(args->trans, bp2,
                          XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt)));
         }
-       xfs_da_buf_done(bp1);
-       if (bp1 != bp2)
-               xfs_da_buf_done(bp2);
  
         /*
          * Commit the flag value change and start the next trans in series.
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c

index 5a626b03b25571f6286cc4c561de063460848f34..b328a0b896f0cdfe216fe417ef70c7b427e56d05 100644 (file)
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -18,10 +18,6 @@
  
  #include <xfs.h>
  
-#ifdef DEBUG
-STATIC void
-xfs_bmap_check_leaf_extents(xfs_btree_cur_t *cur, xfs_inode_t *ip, int whichfork);
-#endif
  
  kmem_zone_t            *xfs_bmap_free_item_zone;
  
@@ -29,6 +25,16 @@ kmem_zone_t          *xfs_bmap_free_item_zone;
   * Prototypes for internal bmap routines.
   */
  
+#ifdef DEBUG
+STATIC void
+xfs_bmap_check_leaf_extents(
+       struct xfs_btree_cur    *cur,
+       struct xfs_inode        *ip,
+       int                     whichfork);
+#else
+#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)                do { } while (0)
+#endif
+
  
  /*
   * Called from xfs_bmap_add_attrfork to handle extents format files.
@@ -52,75 +58,6 @@ xfs_bmap_add_attrfork_local(
         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
         int                     *flags);        /* inode logging flags */
  
-/*
- * Called by xfs_bmapi to update file extent records and the btree
- * after allocating space (or doing a delayed allocation).
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       xfs_fsblock_t           *first, /* pointer to firstblock variable */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
-       int                     *logflagsp, /* inode logging flags */
-       int                     whichfork, /* data or attr fork */
-       int                     rsvd);  /* OK to allocate reserved blocks */
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting a delayed
- * allocation to a real allocation.
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent_delay_real(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       xfs_filblks_t           *dnew,  /* new delayed-alloc indirect blocks */
-       xfs_fsblock_t           *first, /* pointer to firstblock variable */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
-       int                     *logflagsp, /* inode logging flags */
-       int                     rsvd);  /* OK to allocate reserved blocks */
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting a hole
- * to a delayed allocation.
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent_hole_delay(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       int                     *logflagsp,/* inode logging flags */
-       int                     rsvd);  /* OK to allocate reserved blocks */
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting a hole
- * to a real allocation.
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent_hole_real(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_btree_cur_t         *cur,   /* if null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       int                     *logflagsp, /* inode logging flags */
-       int                     whichfork); /* data or attr fork */
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting an unwritten
- * allocation to a real allocation or vice versa.
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent_unwritten_real(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       int                     *logflagsp); /* inode logging flags */
-
  /*
   * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
   * It figures out where to ask the underlying allocator to put the new extent.
@@ -143,22 +80,6 @@ xfs_bmap_btree_to_extents(
         int                     *logflagsp, /* inode logging flags */
         int                     whichfork); /* data or attr fork */
  
-/*
- * Called by xfs_bmapi to update file extent records and the btree
- * after removing space (or undoing a delayed allocation).
- */
-STATIC int                             /* error */
-xfs_bmap_del_extent(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_trans_t             *tp,    /* current trans pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
-       xfs_btree_cur_t         *cur,   /* if null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       int                     *logflagsp,/* inode logging flags */
-       int                     whichfork, /* data or attr fork */
-       int                     rsvd);   /* OK to allocate reserved blocks */
-
  /*
   * Convert an extents-format file into a btree-format file.
   * The new file will have a root block (in the inode) and a single child block.
@@ -188,19 +109,6 @@ xfs_bmap_local_to_extents(
         int             *logflagsp,     /* inode logging flags */
         int             whichfork);     /* data or attr fork */
  
-/*
- * Check the last inode extent to determine whether this allocation will result
- * in blocks being allocated at the end of the file. When we allocate new data
- * blocks at the end of the file which do not start at the previous data block,
- * we will try to align the new blocks at stripe unit boundaries.
- */
-STATIC int                             /* error */
-xfs_bmap_isaeof(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_fileoff_t   off,            /* file offset in fsblocks */
-       int             whichfork,      /* data or attribute fork */
-       char            *aeof);         /* return value */
-
  /*
   * Compute the worst-case number of indirect blocks that will be used
   * for ip's delayed extent of length "len".
@@ -283,7 +191,27 @@ xfs_bmbt_lookup_ge(
  }
  
  /*
-* Update the record referred to by cur to the value given
+ * Check if the inode needs to be converted to btree format.
+ */
+static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
+{
+       return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+               XFS_IFORK_NEXTENTS(ip, whichfork) >
+                       XFS_IFORK_MAXEXT(ip, whichfork);
+}
+
+/*
+ * Check if the inode should be converted to extent format.
+ */
+static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
+{
+       return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+               XFS_IFORK_NEXTENTS(ip, whichfork) <=
+                       XFS_IFORK_MAXEXT(ip, whichfork);
+}
+
+/*
+ * Update the record referred to by cur to the value given
   * by [off, bno, len, state].
   * This either works (return 0) or gets an EFSCORRUPTED error.
   */
@@ -388,7 +316,7 @@ xfs_bmap_add_attrfork_local(
  
         if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
                 return 0;
-       if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+       if (S_ISDIR(ip->i_d.di_mode)) {
                 mp = ip->i_mount;
                 memset(&dargs, 0, sizeof(dargs));
                 dargs.dp = ip;
@@ -405,188 +333,13 @@ xfs_bmap_add_attrfork_local(
  }
  
  /*
- * Called by xfs_bmapi to update file extent records and the btree
- * after allocating space (or doing a delayed allocation).
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       xfs_fsblock_t           *first, /* pointer to firstblock variable */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
-       int                     *logflagsp, /* inode logging flags */
-       int                     whichfork, /* data or attr fork */
-       int                     rsvd)   /* OK to use reserved data blocks */
-{
-       xfs_btree_cur_t         *cur;   /* btree cursor or null */
-       xfs_filblks_t           da_new; /* new count del alloc blocks used */
-       xfs_filblks_t           da_old; /* old count del alloc blocks used */
-       int                     error;  /* error return value */
-       xfs_ifork_t             *ifp;   /* inode fork ptr */
-       int                     logflags; /* returned value */
-       xfs_extnum_t            nextents; /* number of extents in file now */
-
-       XFS_STATS_INC(xs_add_exlist);
-       cur = *curp;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       ASSERT(idx <= nextents);
-       da_old = da_new = 0;
-       error = 0;
-       /*
-        * This is the first extent added to a new/empty file.
-        * Special case this one, so other routines get to assume there are
-        * already extents in the list.
-        */
-       if (nextents == 0) {
-               xfs_iext_insert(ip, 0, 1, new,
-                               whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
-
-               ASSERT(cur == NULL);
-               ifp->if_lastex = 0;
-               if (!isnullstartblock(new->br_startblock)) {
-                       XFS_IFORK_NEXT_SET(ip, whichfork, 1);
-                       logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
-               } else
-                       logflags = 0;
-       }
-       /*
-        * Any kind of new delayed allocation goes here.
-        */
-       else if (isnullstartblock(new->br_startblock)) {
-               if (cur)
-                       ASSERT((cur->bc_private.b.flags &
-                               XFS_BTCUR_BPRV_WASDEL) == 0);
-               if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new,
-                               &logflags, rsvd)))
-                       goto done;
-       }
-       /*
-        * Real allocation off the end of the file.
-        */
-       else if (idx == nextents) {
-               if (cur)
-                       ASSERT((cur->bc_private.b.flags &
-                               XFS_BTCUR_BPRV_WASDEL) == 0);
-               if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
-                               &logflags, whichfork)))
-                       goto done;
-       } else {
-               xfs_bmbt_irec_t prev;   /* old extent at offset idx */
-
-               /*
-                * Get the record referred to by idx.
-                */
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev);
-               /*
-                * If it's a real allocation record, and the new allocation ends
-                * after the start of the referred to record, then we're filling
-                * in a delayed or unwritten allocation with a real one, or
-                * converting real back to unwritten.
-                */
-               if (!isnullstartblock(new->br_startblock) &&
-                   new->br_startoff + new->br_blockcount > prev.br_startoff) {
-                       if (prev.br_state != XFS_EXT_UNWRITTEN &&
-                           isnullstartblock(prev.br_startblock)) {
-                               da_old = startblockval(prev.br_startblock);
-                               if (cur)
-                                       ASSERT(cur->bc_private.b.flags &
-                                               XFS_BTCUR_BPRV_WASDEL);
-                               if ((error = xfs_bmap_add_extent_delay_real(ip,
-                                       idx, &cur, new, &da_new, first, flist,
-                                       &logflags, rsvd)))
-                                       goto done;
-                       } else if (new->br_state == XFS_EXT_NORM) {
-                               ASSERT(new->br_state == XFS_EXT_NORM);
-                               if ((error = xfs_bmap_add_extent_unwritten_real(
-                                       ip, idx, &cur, new, &logflags)))
-                                       goto done;
-                       } else {
-                               ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
-                               if ((error = xfs_bmap_add_extent_unwritten_real(
-                                       ip, idx, &cur, new, &logflags)))
-                                       goto done;
-                       }
-                       ASSERT(*curp == cur || *curp == NULL);
-               }
-               /*
-                * Otherwise we're filling in a hole with an allocation.
-                */
-               else {
-                       if (cur)
-                               ASSERT((cur->bc_private.b.flags &
-                                       XFS_BTCUR_BPRV_WASDEL) == 0);
-                       if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
-                                       new, &logflags, whichfork)))
-                               goto done;
-               }
-       }
-
-       ASSERT(*curp == cur || *curp == NULL);
-       /*
-        * Convert to a btree if necessary.
-        */
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-           XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
-               int     tmp_logflags;   /* partial log flag return val */
-
-               ASSERT(cur == NULL);
-               error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first,
-                       flist, &cur, da_old > 0, &tmp_logflags, whichfork);
-               logflags |= tmp_logflags;
-               if (error)
-                       goto done;
-       }
-       /*
-        * Adjust for changes in reserved delayed indirect blocks.
-        * Nothing to do for disk quotas here.
-        */
-       if (da_old || da_new) {
-               xfs_filblks_t   nblks;
-
-               nblks = da_new;
-               if (cur)
-                       nblks += cur->bc_private.b.allocated;
-               ASSERT(nblks <= da_old);
-               if (nblks < da_old)
-                       xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
-                               (int64_t)(da_old - nblks), rsvd);
-       }
-       /*
-        * Clear out the allocated field, done with it now in any case.
-        */
-       if (cur) {
-               cur->bc_private.b.allocated = 0;
-               *curp = cur;
-       }
-done:
-#ifdef DEBUG
-       if (!error)
-               xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
-#endif
-       *logflagsp = logflags;
-       return error;
-}
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting a delayed
- * allocation to a real allocation.
+ * Convert a delayed allocation to a real allocation.
   */
  STATIC int                             /* error */
  xfs_bmap_add_extent_delay_real(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       xfs_filblks_t           *dnew,  /* new delayed-alloc indirect blocks */
-       xfs_fsblock_t           *first, /* pointer to firstblock variable */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
-       int                     *logflagsp, /* inode logging flags */
-       int                     rsvd)   /* OK to use reserved data block allocation */
+       struct xfs_bmalloca     *bma)
  {
-       xfs_btree_cur_t         *cur;   /* btree cursor */
+       struct xfs_bmbt_irec    *new = &bma->got;
         int                     diff;   /* temp value */
         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
         int                     error;  /* error return value */
@@ -597,10 +350,22 @@ xfs_bmap_add_extent_delay_real(
                                         /* left is 0, right is 1, prev is 2 */
         int                     rval=0; /* return value (logging flags) */
         int                     state = 0;/* state bits, accessed thru macros */
-       xfs_filblks_t           temp=0; /* value for dnew calculations */
-       xfs_filblks_t           temp2=0;/* value for dnew calculations */
+       xfs_filblks_t           da_new; /* new count del alloc blocks used */
+       xfs_filblks_t           da_old; /* old count del alloc blocks used */
+       xfs_filblks_t           temp=0; /* value for da_new calculations */
+       xfs_filblks_t           temp2=0;/* value for da_new calculations */
         int                     tmp_rval;       /* partial logging flags */
  
+       ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
+
+       ASSERT(bma->idx >= 0);
+       ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(!isnullstartblock(new->br_startblock));
+       ASSERT(!bma->cur ||
+              (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+
+       XFS_STATS_INC(xs_add_exlist);
+
  #define        LEFT            r[0]
  #define        RIGHT           r[1]
  #define        PREV            r[2]
@@ -608,14 +373,15 @@ xfs_bmap_add_extent_delay_real(
         /*
          * Set up a bunch of variables to make the tests simpler.
          */
-       cur = *curp;
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       ep = xfs_iext_get_ext(ifp, idx);
+       ep = xfs_iext_get_ext(ifp, bma->idx);
         xfs_bmbt_get_all(ep, &PREV);
         new_endoff = new->br_startoff + new->br_blockcount;
         ASSERT(PREV.br_startoff <= new->br_startoff);
         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
  
+       da_old = startblockval(PREV.br_startblock);
+       da_new = 0;
+
         /*
          * Set flags determining what part of the previous delayed allocation
          * extent is being replaced by a real allocation.
@@ -629,9 +395,9 @@ xfs_bmap_add_extent_delay_real(
          * Check and set flags if this segment has a left neighbor.
          * Don't set contiguous if the combined extent would be too large.
          */
-       if (idx > 0) {
+       if (bma->idx > 0) {
                 state |= BMAP_LEFT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
  
                 if (isnullstartblock(LEFT.br_startblock))
                         state |= BMAP_LEFT_DELAY;
@@ -649,9 +415,9 @@ xfs_bmap_add_extent_delay_real(
          * Don't set contiguous if the combined extent would be too large.
          * Also check for all-three-contiguous being too large.
          */
-       if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+       if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
                 state |= BMAP_RIGHT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
  
                 if (isnullstartblock(RIGHT.br_startblock))
                         state |= BMAP_RIGHT_DELAY;
@@ -682,38 +448,41 @@ xfs_bmap_add_extent_delay_real(
                  * Filling in all of a previously delayed allocation extent.
                  * The left and right neighbors are both contiguous with new.
                  */
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
+               bma->idx--;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
                         LEFT.br_blockcount + PREV.br_blockcount +
                         RIGHT.br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
  
-               xfs_iext_remove(ip, idx, 2, state);
-               ip->i_df.if_lastex = idx - 1;
-               ip->i_d.di_nextents--;
-               if (cur == NULL)
+               xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
+               bma->ip->i_d.di_nextents--;
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                 else {
                         rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
                                         RIGHT.br_startblock,
-                                       RIGHT.br_blockcount, &i)))
+                                       RIGHT.br_blockcount, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_delete(cur, &i)))
+                       error = xfs_btree_delete(bma->cur, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_decrement(cur, 0, &i)))
+                       error = xfs_btree_decrement(bma->cur, 0, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                         LEFT.br_startblock,
                                         LEFT.br_blockcount +
                                         PREV.br_blockcount +
-                                       RIGHT.br_blockcount, LEFT.br_state)))
+                                       RIGHT.br_blockcount, LEFT.br_state);
+                       if (error)
                                 goto done;
                 }
-               *dnew = 0;
                 break;
  
         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -721,29 +490,31 @@ xfs_bmap_add_extent_delay_real(
                  * Filling in all of a previously delayed allocation extent.
                  * The left neighbor is contiguous, the right is not.
                  */
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
+               bma->idx--;
+
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
                         LEFT.br_blockcount + PREV.br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
  
-               ip->i_df.if_lastex = idx - 1;
-               xfs_iext_remove(ip, idx, 1, state);
-               if (cur == NULL)
+               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_DEXT;
                 else {
                         rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
                                         LEFT.br_startblock, LEFT.br_blockcount,
-                                       &i)))
+                                       &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                         LEFT.br_startblock,
                                         LEFT.br_blockcount +
-                                       PREV.br_blockcount, LEFT.br_state)))
+                                       PREV.br_blockcount, LEFT.br_state);
+                       if (error)
                                 goto done;
                 }
-               *dnew = 0;
                 break;
  
         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -751,30 +522,30 @@ xfs_bmap_add_extent_delay_real(
                  * Filling in all of a previously delayed allocation extent.
                  * The right neighbor is contiguous, the left is not.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
                 xfs_bmbt_set_startblock(ep, new->br_startblock);
                 xfs_bmbt_set_blockcount(ep,
                         PREV.br_blockcount + RIGHT.br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
  
-               ip->i_df.if_lastex = idx;
-               xfs_iext_remove(ip, idx + 1, 1, state);
-               if (cur == NULL)
+               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_DEXT;
                 else {
                         rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
                                         RIGHT.br_startblock,
-                                       RIGHT.br_blockcount, &i)))
+                                       RIGHT.br_blockcount, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
+                       error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
                                         new->br_startblock,
                                         PREV.br_blockcount +
-                                       RIGHT.br_blockcount, PREV.br_state)))
+                                       RIGHT.br_blockcount, PREV.br_state);
+                       if (error)
                                 goto done;
                 }
-               *dnew = 0;
                 break;
  
         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -783,27 +554,27 @@ xfs_bmap_add_extent_delay_real(
                  * Neither the left nor right neighbors are contiguous with
                  * the new one.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
                 xfs_bmbt_set_startblock(ep, new->br_startblock);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
  
-               ip->i_df.if_lastex = idx;
-               ip->i_d.di_nextents++;
-               if (cur == NULL)
+               bma->ip->i_d.di_nextents++;
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                 else {
                         rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
                                         new->br_startblock, new->br_blockcount,
-                                       &i)))
+                                       &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       if ((error = xfs_btree_insert(cur, &i)))
+                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                 }
-               *dnew = 0;
                 break;
  
         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -811,38 +582,40 @@ xfs_bmap_add_extent_delay_real(
                  * Filling in the first part of a previous delayed allocation.
                  * The left neighbor is contiguous.
                  */
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
                         LEFT.br_blockcount + new->br_blockcount);
                 xfs_bmbt_set_startoff(ep,
                         PREV.br_startoff + new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
  
                 temp = PREV.br_blockcount - new->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep, temp);
-               ip->i_df.if_lastex = idx - 1;
-               if (cur == NULL)
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_DEXT;
                 else {
                         rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
                                         LEFT.br_startblock, LEFT.br_blockcount,
-                                       &i)))
+                                       &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                         LEFT.br_startblock,
                                         LEFT.br_blockcount +
                                         new->br_blockcount,
-                                       LEFT.br_state)))
+                                       LEFT.br_state);
+                       if (error)
                                 goto done;
                 }
-               temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
                         startblockval(PREV.br_startblock));
-               xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
-               *dnew = temp;
+               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               bma->idx--;
                 break;
  
         case BMAP_LEFT_FILLING:
@@ -850,43 +623,43 @@ xfs_bmap_add_extent_delay_real(
                  * Filling in the first part of a previous delayed allocation.
                  * The left neighbor is not contiguous.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
                 xfs_bmbt_set_startoff(ep, new_endoff);
                 temp = PREV.br_blockcount - new->br_blockcount;
                 xfs_bmbt_set_blockcount(ep, temp);
-               xfs_iext_insert(ip, idx, 1, new, state);
-               ip->i_df.if_lastex = idx;
-               ip->i_d.di_nextents++;
-               if (cur == NULL)
+               xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
+               bma->ip->i_d.di_nextents++;
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                 else {
                         rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
                                         new->br_startblock, new->br_blockcount,
-                                       &i)))
+                                       &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       if ((error = xfs_btree_insert(cur, &i)))
+                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                 }
-               if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-                   ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-                       error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
-                                       first, flist, &cur, 1, &tmp_rval,
-                                       XFS_DATA_FORK);
+
+               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                                       bma->firstblock, bma->flist,
+                                       &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
                         rval |= tmp_rval;
                         if (error)
                                 goto done;
                 }
-               temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
                         startblockval(PREV.br_startblock) -
-                       (cur ? cur->bc_private.b.allocated : 0));
-               ep = xfs_iext_get_ext(ifp, idx + 1);
-               xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-               trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
-               *dnew = temp;
+                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+               ep = xfs_iext_get_ext(ifp, bma->idx + 1);
+               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
                 break;
  
         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -895,36 +668,39 @@ xfs_bmap_add_extent_delay_real(
                  * The right neighbor is contiguous with the new allocation.
                  */
                 temp = PREV.br_blockcount - new->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
-               trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep, temp);
-               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
+               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
                         new->br_startoff, new->br_startblock,
                         new->br_blockcount + RIGHT.br_blockcount,
                         RIGHT.br_state);
-               trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
-               ip->i_df.if_lastex = idx + 1;
-               if (cur == NULL)
+               trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_DEXT;
                 else {
                         rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
                                         RIGHT.br_startblock,
-                                       RIGHT.br_blockcount, &i)))
+                                       RIGHT.br_blockcount, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, new->br_startoff,
+                       error = xfs_bmbt_update(bma->cur, new->br_startoff,
                                         new->br_startblock,
                                         new->br_blockcount +
                                         RIGHT.br_blockcount,
-                                       RIGHT.br_state)))
+                                       RIGHT.br_state);
+                       if (error)
                                 goto done;
                 }
-               temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
                         startblockval(PREV.br_startblock));
-               xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
-               *dnew = temp;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               bma->idx++;
                 break;
  
         case BMAP_RIGHT_FILLING:
@@ -933,41 +709,43 @@ xfs_bmap_add_extent_delay_real(
                  * The right neighbor is not contiguous.
                  */
                 temp = PREV.br_blockcount - new->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep, temp);
-               xfs_iext_insert(ip, idx + 1, 1, new, state);
-               ip->i_df.if_lastex = idx + 1;
-               ip->i_d.di_nextents++;
-               if (cur == NULL)
+               xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
+               bma->ip->i_d.di_nextents++;
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                 else {
                         rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
                                         new->br_startblock, new->br_blockcount,
-                                       &i)))
+                                       &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       if ((error = xfs_btree_insert(cur, &i)))
+                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                 }
-               if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-                   ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-                       error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
-                               first, flist, &cur, 1, &tmp_rval,
-                               XFS_DATA_FORK);
+
+               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                               bma->firstblock, bma->flist, &bma->cur, 1,
+                               &tmp_rval, XFS_DATA_FORK);
                         rval |= tmp_rval;
                         if (error)
                                 goto done;
                 }
-               temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
                         startblockval(PREV.br_startblock) -
-                       (cur ? cur->bc_private.b.allocated : 0));
-               ep = xfs_iext_get_ext(ifp, idx);
-               xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
-               *dnew = temp;
+                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+               ep = xfs_iext_get_ext(ifp, bma->idx);
+               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               bma->idx++;
                 break;
  
         case 0:
@@ -975,82 +753,83 @@ xfs_bmap_add_extent_delay_real(
                  * Filling in the middle part of a previous delayed allocation.
                  * Contiguity is impossible here.
                  * This case is avoided almost all the time.
+                *
+                * We start with a delayed allocation:
+                *
+                * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
+                *  PREV @ idx
+                *
+                * and we are allocating:
+                *                     +rrrrrrrrrrrrrrrrr+
+                *                            new
+                *
+                * and we set it up for insertion as:
+                * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
+                *                            new
+                *  PREV @ idx          LEFT              RIGHT
+                *                      inserted at idx + 1
                  */
                 temp = new->br_startoff - PREV.br_startoff;
-               trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               r[0] = *new;
-               r[1].br_state = PREV.br_state;
-               r[1].br_startblock = 0;
-               r[1].br_startoff = new_endoff;
                 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
-               r[1].br_blockcount = temp2;
-               xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
-               ip->i_df.if_lastex = idx + 1;
-               ip->i_d.di_nextents++;
-               if (cur == NULL)
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
+               LEFT = *new;
+               RIGHT.br_state = PREV.br_state;
+               RIGHT.br_startblock = nullstartblock(
+                               (int)xfs_bmap_worst_indlen(bma->ip, temp2));
+               RIGHT.br_startoff = new_endoff;
+               RIGHT.br_blockcount = temp2;
+               /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
+               xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
+               bma->ip->i_d.di_nextents++;
+               if (bma->cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                 else {
                         rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
                                         new->br_startblock, new->br_blockcount,
-                                       &i)))
+                                       &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       if ((error = xfs_btree_insert(cur, &i)))
+                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                 }
-               if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-                   ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-                       error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
-                                       first, flist, &cur, 1, &tmp_rval,
-                                       XFS_DATA_FORK);
+
+               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                                       bma->firstblock, bma->flist, &bma->cur,
+                                       1, &tmp_rval, XFS_DATA_FORK);
                         rval |= tmp_rval;
                         if (error)
                                 goto done;
                 }
-               temp = xfs_bmap_worst_indlen(ip, temp);
-               temp2 = xfs_bmap_worst_indlen(ip, temp2);
+               temp = xfs_bmap_worst_indlen(bma->ip, temp);
+               temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
                 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
-                       (cur ? cur->bc_private.b.allocated : 0));
-               if (diff > 0 &&
-                   xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
-                                            -((int64_t)diff), rsvd)) {
-                       /*
-                        * Ick gross gag me with a spoon.
-                        */
-                       ASSERT(0);      /* want to see if this ever happens! */
-                       while (diff > 0) {
-                               if (temp) {
-                                       temp--;
-                                       diff--;
-                                       if (!diff ||
-                                           !xfs_icsb_modify_counters(ip->i_mount,
-                                                   XFS_SBS_FDBLOCKS,
-                                                   -((int64_t)diff), rsvd))
-                                               break;
-                               }
-                               if (temp2) {
-                                       temp2--;
-                                       diff--;
-                                       if (!diff ||
-                                           !xfs_icsb_modify_counters(ip->i_mount,
-                                                   XFS_SBS_FDBLOCKS,
-                                                   -((int64_t)diff), rsvd))
-                                               break;
-                               }
-                       }
+                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+               if (diff > 0) {
+                       error = xfs_icsb_modify_counters(bma->ip->i_mount,
+                                       XFS_SBS_FDBLOCKS,
+                                       -((int64_t)diff), 0);
+                       ASSERT(!error);
+                       if (error)
+                               goto done;
                 }
-               ep = xfs_iext_get_ext(ifp, idx);
+
+               ep = xfs_iext_get_ext(ifp, bma->idx);
                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
-               trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_);
-               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
+               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
                         nullstartblock((int)temp2));
-               trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_);
-               *dnew = temp + temp2;
+               trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
+
+               bma->idx++;
+               da_new = temp + temp2;
                 break;
  
         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1065,9 +844,39 @@ xfs_bmap_add_extent_delay_real(
                  */
                 ASSERT(0);
         }
-       *curp = cur;
+
+       /* convert to a btree if necessary */
+       if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+               int     tmp_logflags;   /* partial log flag return val */
+
+               ASSERT(bma->cur == NULL);
+               error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                               bma->firstblock, bma->flist, &bma->cur,
+                               da_old > 0, &tmp_logflags, XFS_DATA_FORK);
+               bma->logflags |= tmp_logflags;
+               if (error)
+                       goto done;
+       }
+
+       /* adjust for changes in reserved delayed indirect blocks */
+       if (da_old || da_new) {
+               temp = da_new;
+               if (bma->cur)
+                       temp += bma->cur->bc_private.b.allocated;
+               ASSERT(temp <= da_old);
+               if (temp < da_old)
+                       xfs_icsb_modify_counters(bma->ip->i_mount,
+                                       XFS_SBS_FDBLOCKS,
+                                       (int64_t)(da_old - temp), 0);
+       }
+
+       /* clear out the allocated field, done with it now in any case. */
+       if (bma->cur)
+               bma->cur->bc_private.b.allocated = 0;
+
+       xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
  done:
-       *logflagsp = rval;
+       bma->logflags |= rval;
         return error;
  #undef LEFT
  #undef RIGHT
@@ -1075,15 +884,17 @@ done:
  }
  
  /*
- * Called by xfs_bmap_add_extent to handle cases converting an unwritten
- * allocation to a real allocation or vice versa.
+ * Convert an unwritten allocation to a real allocation or vice versa.
   */
  STATIC int                             /* error */
  xfs_bmap_add_extent_unwritten_real(
+       struct xfs_trans        *tp,
         xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
+       xfs_extnum_t            *idx,   /* extent number to update/insert */
         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
+       xfs_fsblock_t           *first, /* pointer to firstblock variable */
+       xfs_bmap_free_t         *flist, /* list of extents to be freed */
         int                     *logflagsp) /* inode logging flags */
  {
         xfs_btree_cur_t         *cur;   /* btree cursor */
@@ -1099,16 +910,26 @@ xfs_bmap_add_extent_unwritten_real(
         int                     rval=0; /* return value (logging flags) */
         int                     state = 0;/* state bits, accessed thru macros */
  
+       *logflagsp = 0;
+
+       cur = *curp;
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+
+       ASSERT(*idx >= 0);
+       ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(!isnullstartblock(new->br_startblock));
+
+       XFS_STATS_INC(xs_add_exlist);
+
  #define        LEFT            r[0]
  #define        RIGHT           r[1]
  #define        PREV            r[2]
+
         /*
          * Set up a bunch of variables to make the tests simpler.
          */
         error = 0;
-       cur = *curp;
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       ep = xfs_iext_get_ext(ifp, idx);
+       ep = xfs_iext_get_ext(ifp, *idx);
         xfs_bmbt_get_all(ep, &PREV);
         newext = new->br_state;
         oldext = (newext == XFS_EXT_UNWRITTEN) ?
@@ -1131,9 +952,9 @@ xfs_bmap_add_extent_unwritten_real(
          * Check and set flags if this segment has a left neighbor.
          * Don't set contiguous if the combined extent would be too large.
          */
-       if (idx > 0) {
+       if (*idx > 0) {
                 state |= BMAP_LEFT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
  
                 if (isnullstartblock(LEFT.br_startblock))
                         state |= BMAP_LEFT_DELAY;
@@ -1151,9 +972,9 @@ xfs_bmap_add_extent_unwritten_real(
          * Don't set contiguous if the combined extent would be too large.
          * Also check for all-three-contiguous being too large.
          */
-       if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+       if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
                 state |= BMAP_RIGHT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
                 if (isnullstartblock(RIGHT.br_startblock))
                         state |= BMAP_RIGHT_DELAY;
         }
@@ -1182,14 +1003,15 @@ xfs_bmap_add_extent_unwritten_real(
                  * Setting all of a previous oldext extent to newext.
                  * The left and right neighbors are both contiguous with new.
                  */
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
+               --*idx;
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
                         LEFT.br_blockcount + PREV.br_blockcount +
                         RIGHT.br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
  
-               xfs_iext_remove(ip, idx, 2, state);
-               ip->i_df.if_lastex = idx - 1;
+               xfs_iext_remove(ip, *idx + 1, 2, state);
                 ip->i_d.di_nextents -= 2;
                 if (cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1225,13 +1047,14 @@ xfs_bmap_add_extent_unwritten_real(
                  * Setting all of a previous oldext extent to newext.
                  * The left neighbor is contiguous, the right is not.
                  */
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
+               --*idx;
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
                         LEFT.br_blockcount + PREV.br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
  
-               ip->i_df.if_lastex = idx - 1;
-               xfs_iext_remove(ip, idx, 1, state);
+               xfs_iext_remove(ip, *idx + 1, 1, state);
                 ip->i_d.di_nextents--;
                 if (cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1261,13 +1084,12 @@ xfs_bmap_add_extent_unwritten_real(
                  * Setting all of a previous oldext extent to newext.
                  * The right neighbor is contiguous, the left is not.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep,
                         PREV.br_blockcount + RIGHT.br_blockcount);
                 xfs_bmbt_set_state(ep, newext);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
-               ip->i_df.if_lastex = idx;
-               xfs_iext_remove(ip, idx + 1, 1, state);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               xfs_iext_remove(ip, *idx + 1, 1, state);
                 ip->i_d.di_nextents--;
                 if (cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1298,11 +1120,10 @@ xfs_bmap_add_extent_unwritten_real(
                  * Neither the left nor right neighbors are contiguous with
                  * the new one.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_state(ep, newext);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
  
-               ip->i_df.if_lastex = idx;
                 if (cur == NULL)
                         rval = XFS_ILOG_DEXT;
                 else {
@@ -1324,21 +1145,22 @@ xfs_bmap_add_extent_unwritten_real(
                  * Setting the first part of a previous oldext extent to newext.
                  * The left neighbor is contiguous.
                  */
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
+               trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
                         LEFT.br_blockcount + new->br_blockcount);
                 xfs_bmbt_set_startoff(ep,
                         PREV.br_startoff + new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
  
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_startblock(ep,
                         new->br_startblock + new->br_blockcount);
                 xfs_bmbt_set_blockcount(ep,
                         PREV.br_blockcount - new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               --*idx;
  
-               ip->i_df.if_lastex = idx - 1;
                 if (cur == NULL)
                         rval = XFS_ILOG_DEXT;
                 else {
@@ -1356,10 +1178,11 @@ xfs_bmap_add_extent_unwritten_real(
                                 goto done;
                         if ((error = xfs_btree_decrement(cur, 0, &i)))
                                 goto done;
-                       if (xfs_bmbt_update(cur, LEFT.br_startoff,
+                       error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                 LEFT.br_startblock,
                                 LEFT.br_blockcount + new->br_blockcount,
-                               LEFT.br_state))
+                               LEFT.br_state);
+                       if (error)
                                 goto done;
                 }
                 break;
@@ -1369,17 +1192,16 @@ xfs_bmap_add_extent_unwritten_real(
                  * Setting the first part of a previous oldext extent to newext.
                  * The left neighbor is not contiguous.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
                 xfs_bmbt_set_startoff(ep, new_endoff);
                 xfs_bmbt_set_blockcount(ep,
                         PREV.br_blockcount - new->br_blockcount);
                 xfs_bmbt_set_startblock(ep,
                         new->br_startblock + new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
  
-               xfs_iext_insert(ip, idx, 1, new, state);
-               ip->i_df.if_lastex = idx;
+               xfs_iext_insert(ip, *idx, 1, new, state);
                 ip->i_d.di_nextents++;
                 if (cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1408,17 +1230,19 @@ xfs_bmap_add_extent_unwritten_real(
                  * Setting the last part of a previous oldext extent to newext.
                  * The right neighbor is contiguous with the new allocation.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
-               trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep,
                         PREV.br_blockcount - new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
-               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               ++*idx;
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
                         new->br_startoff, new->br_startblock,
                         new->br_blockcount + RIGHT.br_blockcount, newext);
-               trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
  
-               ip->i_df.if_lastex = idx + 1;
                 if (cur == NULL)
                         rval = XFS_ILOG_DEXT;
                 else {
@@ -1448,13 +1272,14 @@ xfs_bmap_add_extent_unwritten_real(
                  * Setting the last part of a previous oldext extent to newext.
                  * The right neighbor is not contiguous.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep,
                         PREV.br_blockcount - new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 1, new, state);
  
-               xfs_iext_insert(ip, idx + 1, 1, new, state);
-               ip->i_df.if_lastex = idx + 1;
                 ip->i_d.di_nextents++;
                 if (cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1488,10 +1313,10 @@ xfs_bmap_add_extent_unwritten_real(
                  * newext.  Contiguity is impossible here.
                  * One extent becomes three extents.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep,
                         new->br_startoff - PREV.br_startoff);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
  
                 r[0] = *new;
                 r[1].br_startoff = new_endoff;
@@ -1499,8 +1324,10 @@ xfs_bmap_add_extent_unwritten_real(
                         PREV.br_startoff + PREV.br_blockcount - new_endoff;
                 r[1].br_startblock = new->br_startblock + new->br_blockcount;
                 r[1].br_state = oldext;
-               xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
-               ip->i_df.if_lastex = idx + 1;
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 2, &r[0], state);
+
                 ip->i_d.di_nextents += 2;
                 if (cur == NULL)
                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1553,9 +1380,28 @@ xfs_bmap_add_extent_unwritten_real(
                  */
                 ASSERT(0);
         }
-       *curp = cur;
+
+       /* convert to a btree if necessary */
+       if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
+               int     tmp_logflags;   /* partial log flag return val */
+
+               ASSERT(cur == NULL);
+               error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
+                               0, &tmp_logflags, XFS_DATA_FORK);
+               *logflagsp |= tmp_logflags;
+               if (error)
+                       goto done;
+       }
+
+       /* clear out the allocated field, done with it now in any case. */
+       if (cur) {
+               cur->bc_private.b.allocated = 0;
+               *curp = cur;
+       }
+
+       xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
  done:
-       *logflagsp = rval;
+       *logflagsp |= rval;
         return error;
  #undef LEFT
  #undef RIGHT
@@ -1563,19 +1409,14 @@ done:
  }
  
  /*
- * Called by xfs_bmap_add_extent to handle cases converting a hole
- * to a delayed allocation.
+ * Convert a hole to a delayed allocation.
   */
-/*ARGSUSED*/
-STATIC int                             /* error */
+STATIC void
  xfs_bmap_add_extent_hole_delay(
         xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       int                     *logflagsp, /* inode logging flags */
-       int                     rsvd)           /* OK to allocate reserved blocks */
+       xfs_extnum_t            *idx,   /* extent number to update/insert */
+       xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
  {
-       xfs_bmbt_rec_host_t     *ep;    /* extent record for idx */
         xfs_ifork_t             *ifp;   /* inode fork pointer */
         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
         xfs_filblks_t           newlen=0;       /* new indirect size */
@@ -1585,16 +1426,15 @@ xfs_bmap_add_extent_hole_delay(
         xfs_filblks_t           temp=0; /* temp for indirect calculations */
  
         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       ep = xfs_iext_get_ext(ifp, idx);
         state = 0;
         ASSERT(isnullstartblock(new->br_startblock));
  
         /*
          * Check and set flags if this segment has a left neighbor
          */
-       if (idx > 0) {
+       if (*idx > 0) {
                 state |= BMAP_LEFT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
  
                 if (isnullstartblock(left.br_startblock))
                         state |= BMAP_LEFT_DELAY;
@@ -1604,9 +1444,9 @@ xfs_bmap_add_extent_hole_delay(
          * Check and set flags if the current (right) segment exists.
          * If it doesn't exist, we're converting the hole at end-of-file.
          */
-       if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+       if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
                 state |= BMAP_RIGHT_VALID;
-               xfs_bmbt_get_all(ep, &right);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
  
                 if (isnullstartblock(right.br_startblock))
                         state |= BMAP_RIGHT_DELAY;
@@ -1639,21 +1479,21 @@ xfs_bmap_add_extent_hole_delay(
                  * on the left and on the right.
                  * Merge all three into a single extent record.
                  */
+               --*idx;
                 temp = left.br_blockcount + new->br_blockcount +
                         right.br_blockcount;
  
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
                 oldlen = startblockval(left.br_startblock) +
                         startblockval(new->br_startblock) +
                         startblockval(right.br_startblock);
                 newlen = xfs_bmap_worst_indlen(ip, temp);
-               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
+               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
                         nullstartblock((int)newlen));
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
  
-               xfs_iext_remove(ip, idx, 1, state);
-               ip->i_df.if_lastex = idx - 1;
+               xfs_iext_remove(ip, *idx + 1, 1, state);
                 break;
  
         case BMAP_LEFT_CONTIG:
@@ -1662,17 +1502,17 @@ xfs_bmap_add_extent_hole_delay(
                  * on the left.
                  * Merge the new allocation with the left neighbor.
                  */
+               --*idx;
                 temp = left.br_blockcount + new->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
                 oldlen = startblockval(left.br_startblock) +
                         startblockval(new->br_startblock);
                 newlen = xfs_bmap_worst_indlen(ip, temp);
-               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
+               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
                         nullstartblock((int)newlen));
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
-
-               ip->i_df.if_lastex = idx - 1;
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                 break;
  
         case BMAP_RIGHT_CONTIG:
@@ -1681,16 +1521,15 @@ xfs_bmap_add_extent_hole_delay(
                  * on the right.
                  * Merge the new allocation with the right neighbor.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 temp = new->br_blockcount + right.br_blockcount;
                 oldlen = startblockval(new->br_startblock) +
                         startblockval(right.br_startblock);
                 newlen = xfs_bmap_worst_indlen(ip, temp);
-               xfs_bmbt_set_allf(ep, new->br_startoff,
+               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
+                       new->br_startoff,
                         nullstartblock((int)newlen), temp, right.br_state);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
-
-               ip->i_df.if_lastex = idx;
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                 break;
  
         case 0:
@@ -1700,36 +1539,28 @@ xfs_bmap_add_extent_hole_delay(
                  * Insert a new entry.
                  */
                 oldlen = newlen = 0;
-               xfs_iext_insert(ip, idx, 1, new, state);
-               ip->i_df.if_lastex = idx;
+               xfs_iext_insert(ip, *idx, 1, new, state);
                 break;
         }
         if (oldlen != newlen) {
                 ASSERT(oldlen > newlen);
                 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
-                       (int64_t)(oldlen - newlen), rsvd);
+                       (int64_t)(oldlen - newlen), 0);
                 /*
                  * Nothing to do for disk quota accounting here.
                  */
         }
-       *logflagsp = 0;
-       return 0;
  }
  
  /*
- * Called by xfs_bmap_add_extent to handle cases converting a hole
- * to a real allocation.
+ * Convert a hole to a real allocation.
   */
  STATIC int                             /* error */
  xfs_bmap_add_extent_hole_real(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            idx,    /* extent number to update/insert */
-       xfs_btree_cur_t         *cur,   /* if null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       int                     *logflagsp, /* inode logging flags */
-       int                     whichfork) /* data or attr fork */
+       struct xfs_bmalloca     *bma,
+       int                     whichfork)
  {
-       xfs_bmbt_rec_host_t     *ep;    /* pointer to extent entry ins. point */
+       struct xfs_bmbt_irec    *new = &bma->got;
         int                     error;  /* error return value */
         int                     i;      /* temp state */
         xfs_ifork_t             *ifp;   /* inode fork pointer */
@@ -1738,20 +1569,26 @@ xfs_bmap_add_extent_hole_real(
         int                     rval=0; /* return value (logging flags) */
         int                     state;  /* state bits, accessed thru macros */
  
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
-       ep = xfs_iext_get_ext(ifp, idx);
-       state = 0;
+       ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+
+       ASSERT(bma->idx >= 0);
+       ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(!isnullstartblock(new->br_startblock));
+       ASSERT(!bma->cur ||
+              !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+
+       XFS_STATS_INC(xs_add_exlist);
  
+       state = 0;
         if (whichfork == XFS_ATTR_FORK)
                 state |= BMAP_ATTRFORK;
  
         /*
          * Check and set flags if this segment has a left neighbor.
          */
-       if (idx > 0) {
+       if (bma->idx > 0) {
                 state |= BMAP_LEFT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
                 if (isnullstartblock(left.br_startblock))
                         state |= BMAP_LEFT_DELAY;
         }
@@ -1760,9 +1597,9 @@ xfs_bmap_add_extent_hole_real(
          * Check and set flags if this segment has a current value.
          * Not true if we're inserting into the "hole" at eof.
          */
-       if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+       if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
                 state |= BMAP_RIGHT_VALID;
-               xfs_bmbt_get_all(ep, &right);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
                 if (isnullstartblock(right.br_startblock))
                         state |= BMAP_RIGHT_DELAY;
         }
@@ -1799,38 +1636,42 @@ xfs_bmap_add_extent_hole_real(
                  * left and on the right.
                  * Merge all three into a single extent record.
                  */
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
+               --bma->idx;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
                         left.br_blockcount + new->br_blockcount +
                         right.br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
  
-               xfs_iext_remove(ip, idx, 1, state);
-               ifp->if_lastex = idx - 1;
-               XFS_IFORK_NEXT_SET(ip, whichfork,
-                       XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
-               if (cur == NULL) {
+               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+
+               XFS_IFORK_NEXT_SET(bma->ip, whichfork,
+                       XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
+               if (bma->cur == NULL) {
                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
                 } else {
                         rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur,
-                                       right.br_startoff,
-                                       right.br_startblock,
-                                       right.br_blockcount, &i)))
+                       error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
+                                       right.br_startblock, right.br_blockcount,
+                                       &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_delete(cur, &i)))
+                       error = xfs_btree_delete(bma->cur, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_decrement(cur, 0, &i)))
+                       error = xfs_btree_decrement(bma->cur, 0, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, left.br_startoff,
+                       error = xfs_bmbt_update(bma->cur, left.br_startoff,
                                         left.br_startblock,
                                         left.br_blockcount +
                                                 new->br_blockcount +
                                                 right.br_blockcount,
-                                       left.br_state)))
+                                       left.br_state);
+                       if (error)
                                 goto done;
                 }
                 break;
@@ -1841,27 +1682,28 @@ xfs_bmap_add_extent_hole_real(
                  * on the left.
                  * Merge the new allocation with the left neighbor.
                  */
-               trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
+               --bma->idx;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
                         left.br_blockcount + new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
  
-               ifp->if_lastex = idx - 1;
-               if (cur == NULL) {
+               if (bma->cur == NULL) {
                         rval = xfs_ilog_fext(whichfork);
                 } else {
                         rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur,
-                                       left.br_startoff,
-                                       left.br_startblock,
-                                       left.br_blockcount, &i)))
+                       error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
+                                       left.br_startblock, left.br_blockcount,
+                                       &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, left.br_startoff,
+                       error = xfs_bmbt_update(bma->cur, left.br_startoff,
                                         left.br_startblock,
                                         left.br_blockcount +
                                                 new->br_blockcount,
-                                       left.br_state)))
+                                       left.br_state);
+                       if (error)
                                 goto done;
                 }
                 break;
@@ -1872,28 +1714,30 @@ xfs_bmap_add_extent_hole_real(
                  * on the right.
                  * Merge the new allocation with the right neighbor.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
-               xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
+                       new->br_startoff, new->br_startblock,
                         new->br_blockcount + right.br_blockcount,
                         right.br_state);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
  
-               ifp->if_lastex = idx;
-               if (cur == NULL) {
+               if (bma->cur == NULL) {
                         rval = xfs_ilog_fext(whichfork);
                 } else {
                         rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur,
+                       error = xfs_bmbt_lookup_eq(bma->cur,
                                         right.br_startoff,
                                         right.br_startblock,
-                                       right.br_blockcount, &i)))
+                                       right.br_blockcount, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, new->br_startoff,
+                       error = xfs_bmbt_update(bma->cur, new->br_startoff,
                                         new->br_startblock,
                                         new->br_blockcount +
                                                 right.br_blockcount,
-                                       right.br_state)))
+                                       right.br_state);
+                       if (error)
                                 goto done;
                 }
                 break;
@@ -1904,29 +1748,49 @@ xfs_bmap_add_extent_hole_real(
                  * real allocation.
                  * Insert a new entry.
                  */
-               xfs_iext_insert(ip, idx, 1, new, state);
-               ifp->if_lastex = idx;
-               XFS_IFORK_NEXT_SET(ip, whichfork,
-                       XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
-               if (cur == NULL) {
+               xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
+               XFS_IFORK_NEXT_SET(bma->ip, whichfork,
+                       XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
+               if (bma->cur == NULL) {
                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
                 } else {
                         rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur,
+                       error = xfs_bmbt_lookup_eq(bma->cur,
                                         new->br_startoff,
                                         new->br_startblock,
-                                       new->br_blockcount, &i)))
+                                       new->br_blockcount, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       cur->bc_rec.b.br_state = new->br_state;
-                       if ((error = xfs_btree_insert(cur, &i)))
+                       bma->cur->bc_rec.b.br_state = new->br_state;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
                                 goto done;
                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                 }
                 break;
         }
+
+       /* convert to a btree if necessary */
+       if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
+               int     tmp_logflags;   /* partial log flag return val */
+
+               ASSERT(bma->cur == NULL);
+               error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                               bma->firstblock, bma->flist, &bma->cur,
+                               0, &tmp_logflags, whichfork);
+               bma->logflags |= tmp_logflags;
+               if (error)
+                       goto done;
+       }
+
+       /* clear out the allocated field, done with it now in any case. */
+       if (bma->cur)
+               bma->cur->bc_private.b.allocated = 0;
+
+       xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
  done:
-       *logflagsp = rval;
+       bma->logflags |= rval;
         return error;
  }
  
@@ -2113,26 +1977,26 @@ xfs_bmap_adjacent(
                 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
  
         mp = ap->ip->i_mount;
-       nullfb = ap->firstblock == NULLFSBLOCK;
+       nullfb = *ap->firstblock == NULLFSBLOCK;
         rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
-       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
+       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
         /*
          * If allocating at eof, and there's a previous real block,
          * try to use its last block as our starting point.
          */
-       if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF &&
-           !isnullstartblock(ap->prevp->br_startblock) &&
-           ISVALID(ap->prevp->br_startblock + ap->prevp->br_blockcount,
-                   ap->prevp->br_startblock)) {
-               ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount;
+       if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
+           !isnullstartblock(ap->prev.br_startblock) &&
+           ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
+                   ap->prev.br_startblock)) {
+               ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
                 /*
                  * Adjust for the gap between prevp and us.
                  */
-               adjust = ap->off -
-                       (ap->prevp->br_startoff + ap->prevp->br_blockcount);
+               adjust = ap->offset -
+                       (ap->prev.br_startoff + ap->prev.br_blockcount);
                 if (adjust &&
-                   ISVALID(ap->rval + adjust, ap->prevp->br_startblock))
-                       ap->rval += adjust;
+                   ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
+                       ap->blkno += adjust;
         }
         /*
          * If not at eof, then compare the two neighbor blocks.
@@ -2149,17 +2013,17 @@ xfs_bmap_adjacent(
                  * If there's a previous (left) block, select a requested
                  * start block based on it.
                  */
-               if (ap->prevp->br_startoff != NULLFILEOFF &&
-                   !isnullstartblock(ap->prevp->br_startblock) &&
-                   (prevbno = ap->prevp->br_startblock +
-                              ap->prevp->br_blockcount) &&
-                   ISVALID(prevbno, ap->prevp->br_startblock)) {
+               if (ap->prev.br_startoff != NULLFILEOFF &&
+                   !isnullstartblock(ap->prev.br_startblock) &&
+                   (prevbno = ap->prev.br_startblock +
+                              ap->prev.br_blockcount) &&
+                   ISVALID(prevbno, ap->prev.br_startblock)) {
                         /*
                          * Calculate gap to end of previous block.
                          */
-                       adjust = prevdiff = ap->off -
-                               (ap->prevp->br_startoff +
-                                ap->prevp->br_blockcount);
+                       adjust = prevdiff = ap->offset -
+                               (ap->prev.br_startoff +
+                                ap->prev.br_blockcount);
                         /*
                          * Figure the startblock based on the previous block's
                          * end and the gap size.
@@ -2168,9 +2032,9 @@ xfs_bmap_adjacent(
                          * allocating, or using it gives us an invalid block
                          * number, then just use the end of the previous block.
                          */
-                       if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
+                       if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
                             ISVALID(prevbno + prevdiff,
-                                   ap->prevp->br_startblock))
+                                   ap->prev.br_startblock))
                                 prevbno += adjust;
                         else
                                 prevdiff += adjust;
@@ -2191,16 +2055,16 @@ xfs_bmap_adjacent(
                  * If there's a following (right) block, select a requested
                  * start block based on it.
                  */
-               if (!isnullstartblock(ap->gotp->br_startblock)) {
+               if (!isnullstartblock(ap->got.br_startblock)) {
                         /*
                          * Calculate gap to start of next block.
                          */
-                       adjust = gotdiff = ap->gotp->br_startoff - ap->off;
+                       adjust = gotdiff = ap->got.br_startoff - ap->offset;
                         /*
                          * Figure the startblock based on the next block's
                          * start and the gap size.
                          */
-                       gotbno = ap->gotp->br_startblock;
+                       gotbno = ap->got.br_startblock;
                         /*
                          * Heuristic!
                          * If the gap is large relative to the piece we're
@@ -2208,12 +2072,12 @@ xfs_bmap_adjacent(
                          * number, then just use the start of the next block
                          * offset by our length.
                          */
-                       if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
+                       if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
                             ISVALID(gotbno - gotdiff, gotbno))
                                 gotbno -= adjust;
-                       else if (ISVALID(gotbno - ap->alen, gotbno)) {
-                               gotbno -= ap->alen;
-                               gotdiff += adjust - ap->alen;
+                       else if (ISVALID(gotbno - ap->length, gotbno)) {
+                               gotbno -= ap->length;
+                               gotdiff += adjust - ap->length;
                         } else
                                 gotdiff += adjust;
                         /*
@@ -2234,11 +2098,11 @@ xfs_bmap_adjacent(
                  * one, else ap->rval is already set (to 0 or the inode block).
                  */
                 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
-                       ap->rval = prevdiff <= gotdiff ? prevbno : gotbno;
+                       ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
                 else if (prevbno != NULLFSBLOCK)
-                       ap->rval = prevbno;
+                       ap->blkno = prevbno;
                 else if (gotbno != NULLFSBLOCK)
-                       ap->rval = gotbno;
+                       ap->blkno = gotbno;
         }
  #undef ISVALID
  }
@@ -2271,7 +2135,7 @@ xfs_bmap_btalloc_nullfb(
                 startag = ag = 0;
  
         pag = xfs_perag_get(mp, ag);
-       while (*blen < ap->alen) {
+       while (*blen < args->maxlen) {
                 if (!pag->pagf_init) {
                         error = xfs_alloc_pagf_init(mp, args->tp, ag,
                                                     XFS_ALLOC_FLAG_TRYLOCK);
@@ -2293,7 +2157,7 @@ xfs_bmap_btalloc_nullfb(
                         notinit = 1;
  
                 if (xfs_inode_is_filestream(ap->ip)) {
-                       if (*blen >= ap->alen)
+                       if (*blen >= args->maxlen)
                                 break;
  
                         if (ap->userdata) {
@@ -2339,21 +2203,21 @@ xfs_bmap_btalloc_nullfb(
          * If the best seen length is less than the request
          * length, use the best as the minimum.
          */
-       else if (*blen < ap->alen)
+       else if (*blen < args->maxlen)
                 args->minlen = *blen;
         /*
-        * Otherwise we've seen an extent as big as alen,
+        * Otherwise we've seen an extent as big as maxlen,
          * use that as the minimum.
          */
         else
-               args->minlen = ap->alen;
+               args->minlen = args->maxlen;
  
         /*
          * set the failure fallback case to look in the selected
          * AG as the stream may have moved.
          */
         if (xfs_inode_is_filestream(ap->ip))
-               ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
+               ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
  
         return 0;
  }
@@ -2375,53 +2239,58 @@ xfs_bmap_btalloc(
         int             tryagain;
         int             error;
  
+       ASSERT(ap->length);
+
         mp = ap->ip->i_mount;
         align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
         if (unlikely(align)) {
-               error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+               error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
                                                 align, 0, ap->eof, 0, ap->conv,
-                                               &ap->off, &ap->alen);
+                                               &ap->offset, &ap->length);
                 ASSERT(!error);
-               ASSERT(ap->alen);
+               ASSERT(ap->length);
         }
-       nullfb = ap->firstblock == NULLFSBLOCK;
-       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
+       nullfb = *ap->firstblock == NULLFSBLOCK;
+       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
         if (nullfb) {
                 if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
                         ag = xfs_filestream_lookup_ag(ap->ip);
                         ag = (ag != NULLAGNUMBER) ? ag : 0;
-                       ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
+                       ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
                 } else {
-                       ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+                       ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
                 }
         } else
-               ap->rval = ap->firstblock;
+               ap->blkno = *ap->firstblock;
  
         xfs_bmap_adjacent(ap);
  
         /*
-        * If allowed, use ap->rval; otherwise must use firstblock since
+        * If allowed, use ap->blkno; otherwise must use firstblock since
          * it's in the right allocation group.
          */
-       if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
+       if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
                 ;
         else
-               ap->rval = ap->firstblock;
+               ap->blkno = *ap->firstblock;
         /*
          * Normal allocation, done through xfs_alloc_vextent.
          */
         tryagain = isaligned = 0;
+       memset(&args, 0, sizeof(args));
         args.tp = ap->tp;
         args.mp = mp;
-       args.fsbno = ap->rval;
-       args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
-       args.firstblock = ap->firstblock;
+       args.fsbno = ap->blkno;
+
+       /* Trim the allocation back to the maximum an AG can fit. */
+       args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
+       args.firstblock = *ap->firstblock;
         blen = 0;
         if (nullfb) {
                 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
                 if (error)
                         return error;
-       } else if (ap->low) {
+       } else if (ap->flist->xbf_low) {
                 if (xfs_inode_is_filestream(ap->ip))
                         args.type = XFS_ALLOCTYPE_FIRST_AG;
                 else
@@ -2435,14 +2304,14 @@ xfs_bmap_btalloc(
         /* apply extent size hints if obtained earlier */
         if (unlikely(align)) {
                 args.prod = align;
-               if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
+               if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
         } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
                 args.prod = 1;
                 args.mod = 0;
         } else {
                 args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
-               if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
+               if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
         }
         /*
@@ -2454,15 +2323,15 @@ xfs_bmap_btalloc(
          * is >= the stripe unit and the allocation offset is
          * at the end of file.
          */
-       if (!ap->low && ap->aeof) {
-               if (!ap->off) {
+       if (!ap->flist->xbf_low && ap->aeof) {
+               if (!ap->offset) {
                         args.alignment = mp->m_dalign;
                         atype = args.type;
                         isaligned = 1;
                         /*
                          * Adjust for alignment
                          */
-                       if (blen > args.alignment && blen <= ap->alen)
+                       if (blen > args.alignment && blen <= args.maxlen)
                                 args.minlen = blen - args.alignment;
                         args.minalignslop = 0;
                 } else {
@@ -2481,7 +2350,7 @@ xfs_bmap_btalloc(
                          * of minlen+alignment+slop doesn't go up
                          * between the calls.
                          */
-                       if (blen > mp->m_dalign && blen <= ap->alen)
+                       if (blen > mp->m_dalign && blen <= args.maxlen)
                                 nextminlen = blen - mp->m_dalign;
                         else
                                 nextminlen = args.minlen;
@@ -2508,7 +2377,7 @@ xfs_bmap_btalloc(
                  * turned on.
                  */
                 args.type = atype;
-               args.fsbno = ap->rval;
+               args.fsbno = ap->blkno;
                 args.alignment = mp->m_dalign;
                 args.minlen = nextminlen;
                 args.minalignslop = 0;
@@ -2522,7 +2391,7 @@ xfs_bmap_btalloc(
                  * try again.
                  */
                 args.type = atype;
-               args.fsbno = ap->rval;
+               args.fsbno = ap->blkno;
                 args.alignment = 0;
                 if ((error = xfs_alloc_vextent(&args)))
                         return error;
@@ -2531,7 +2400,7 @@ xfs_bmap_btalloc(
             args.minlen > ap->minlen) {
                 args.minlen = ap->minlen;
                 args.type = XFS_ALLOCTYPE_START_BNO;
-               args.fsbno = ap->rval;
+               args.fsbno = ap->blkno;
                 if ((error = xfs_alloc_vextent(&args)))
                         return error;
         }
@@ -2542,13 +2411,26 @@ xfs_bmap_btalloc(
                 args.minleft = 0;
                 if ((error = xfs_alloc_vextent(&args)))
                         return error;
-               ap->low = 1;
+               ap->flist->xbf_low = 1;
         }
         if (args.fsbno != NULLFSBLOCK) {
-               ap->firstblock = ap->rval = args.fsbno;
+               /*
+                * check the allocation happened at the same or higher AG than
+                * the first block that was allocated.
+                */
+               ASSERT(*ap->firstblock == NULLFSBLOCK ||
+                      XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
+                      XFS_FSB_TO_AGNO(mp, args.fsbno) ||
+                      (ap->flist->xbf_low &&
+                       XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
+                       XFS_FSB_TO_AGNO(mp, args.fsbno)));
+
+               ap->blkno = args.fsbno;
+               if (*ap->firstblock == NULLFSBLOCK)
+                       *ap->firstblock = args.fsbno;
                 ASSERT(nullfb || fb_agno == args.agno ||
-                      (ap->low && fb_agno < args.agno));
-               ap->alen = args.len;
+                      (ap->flist->xbf_low && fb_agno < args.agno));
+               ap->length = args.len;
                 ap->ip->i_d.di_nblocks += args.len;
                 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
                 if (ap->wasdel)
@@ -2562,8 +2444,8 @@ xfs_bmap_btalloc(
                                         XFS_TRANS_DQ_BCOUNT,
                         (long) args.len);
         } else {
-               ap->rval = NULLFSBLOCK;
-               ap->alen = 0;
+               ap->blkno = NULLFSBLOCK;
+               ap->length = 0;
         }
         return 0;
  }
@@ -2620,8 +2502,9 @@ xfs_bmap_btree_to_extents(
         if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
                 return error;
  #endif
-       if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
-                       XFS_BMAP_BTREE_REF)))
+       error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
+                               &xfs_bmbt_buf_ops);
+       if (error)
                 return error;
         cblock = XFS_BUF_TO_BLOCK(cbp);
         if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
@@ -2648,13 +2531,12 @@ STATIC int                              /* error */
  xfs_bmap_del_extent(
         xfs_inode_t             *ip,    /* incore inode pointer */
         xfs_trans_t             *tp,    /* current transaction pointer */
-       xfs_extnum_t            idx,    /* extent number to update/delete */
+       xfs_extnum_t            *idx,   /* extent number to update/delete */
         xfs_bmap_free_t         *flist, /* list of extents to be freed */
         xfs_btree_cur_t         *cur,   /* if null, not a btree */
         xfs_bmbt_irec_t         *del,   /* data to remove from extents */
         int                     *logflagsp, /* inode logging flags */
-       int                     whichfork, /* data or attr fork */
-       int                     rsvd)   /* OK to allocate reserved blocks */
+       int                     whichfork) /* data or attr fork */
  {
         xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
         xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
@@ -2685,10 +2567,10 @@ xfs_bmap_del_extent(
  
         mp = ip->i_mount;
         ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT((idx >= 0) && (idx < ifp->if_bytes /
+       ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
                 (uint)sizeof(xfs_bmbt_rec_t)));
         ASSERT(del->br_blockcount > 0);
-       ep = xfs_iext_get_ext(ifp, idx);
+       ep = xfs_iext_get_ext(ifp, *idx);
         xfs_bmbt_get_all(ep, &got);
         ASSERT(got.br_startoff <= del->br_startoff);
         del_endoff = del->br_startoff + del->br_blockcount;
@@ -2719,8 +2601,8 @@ xfs_bmap_del_extent(
                         len = del->br_blockcount;
                         do_div(bno, mp->m_sb.sb_rextsize);
                         do_div(len, mp->m_sb.sb_rextsize);
-                       if ((error = xfs_rtfree_extent(ip->i_transp, bno,
-                                       (xfs_extlen_t)len)))
+                       error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+                       if (error)
                                 goto done;
                         do_fx = 0;
                         nblks = len * mp->m_sb.sb_rextsize;
@@ -2762,11 +2644,12 @@ xfs_bmap_del_extent(
                 /*
                  * Matches the whole extent.  Delete the entry.
                  */
-               xfs_iext_remove(ip, idx, 1,
+               xfs_iext_remove(ip, *idx, 1,
                                 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
-               ifp->if_lastex = idx;
+               --*idx;
                 if (delay)
                         break;
+
                 XFS_IFORK_NEXT_SET(ip, whichfork,
                         XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
                 flags |= XFS_ILOG_CORE;
@@ -2783,21 +2666,20 @@ xfs_bmap_del_extent(
                 /*
                  * Deleting the first part of the extent.
                  */
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_startoff(ep, del_endoff);
                 temp = got.br_blockcount - del->br_blockcount;
                 xfs_bmbt_set_blockcount(ep, temp);
-               ifp->if_lastex = idx;
                 if (delay) {
                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
                                 da_old);
                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+                       trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                         da_new = temp;
                         break;
                 }
                 xfs_bmbt_set_startblock(ep, del_endblock);
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                 if (!cur) {
                         flags |= xfs_ilog_fext(whichfork);
                         break;
@@ -2813,18 +2695,17 @@ xfs_bmap_del_extent(
                  * Deleting the last part of the extent.
                  */
                 temp = got.br_blockcount - del->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep, temp);
-               ifp->if_lastex = idx;
                 if (delay) {
                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
                                 da_old);
                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+                       trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                         da_new = temp;
                         break;
                 }
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                 if (!cur) {
                         flags |= xfs_ilog_fext(whichfork);
                         break;
@@ -2841,7 +2722,7 @@ xfs_bmap_del_extent(
                  * Deleting the middle of the extent.
                  */
                 temp = del->br_startoff - got.br_startoff;
-               trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                 xfs_bmbt_set_blockcount(ep, temp);
                 new.br_startoff = del_endoff;
                 temp2 = got_endoff - del_endoff;
@@ -2928,9 +2809,9 @@ xfs_bmap_del_extent(
                                 }
                         }
                 }
-               trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
-               xfs_iext_insert(ip, idx + 1, 1, &new, state);
-               ifp->if_lastex = idx + 1;
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               xfs_iext_insert(ip, *idx + 1, 1, &new, state);
+               ++*idx;
                 break;
         }
         /*
@@ -2957,7 +2838,7 @@ xfs_bmap_del_extent(
         ASSERT(da_old >= da_new);
         if (da_old > da_new) {
                 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                       (int64_t)(da_old - da_new), rsvd);
+                       (int64_t)(da_old - da_new), 0);
         }
  done:
         *logflagsp = flags;
@@ -3014,8 +2895,7 @@ xfs_bmap_extents_to_btree(
  
         ifp = XFS_IFORK_PTR(ip, whichfork);
         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
-       ASSERT(ifp->if_ext_max ==
-              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
         /*
          * Make space in the inode incore.
          */
@@ -3044,6 +2924,7 @@ xfs_bmap_extents_to_btree(
          * Convert to a btree with two levels, one record in root.
          */
         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
+       memset(&args, 0, sizeof(args));
         args.tp = tp;
         args.mp = mp;
         args.firstblock = *firstblock;
@@ -3058,8 +2939,6 @@ xfs_bmap_extents_to_btree(
                 args.fsbno = *firstblock;
         }
         args.minlen = args.maxlen = args.prod = 1;
-       args.total = args.minleft = args.alignment = args.mod = args.isfl =
-               args.minalignslop = 0;
         args.wasdel = wasdel;
         *logflagsp = 0;
         if ((error = xfs_alloc_vextent(&args))) {
@@ -3083,6 +2962,7 @@ xfs_bmap_extents_to_btree(
         /*
          * Fill in the child block.
          */
+       abp->b_ops = &xfs_bmbt_buf_ops;
         ablock = XFS_BUF_TO_BLOCK(abp);
         ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
         ablock->bb_level = 0;
@@ -3161,13 +3041,8 @@ xfs_bmap_forkoff_reset(
             ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
                 uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
  
-               if (dfl_forkoff > ip->i_d.di_forkoff) {
+               if (dfl_forkoff > ip->i_d.di_forkoff)
                         ip->i_d.di_forkoff = dfl_forkoff;
-                       ip->i_df.if_ext_max =
-                               XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
-                       ip->i_afp->if_ext_max =
-                               XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t);
-               }
         }
  }
  
@@ -3194,8 +3069,7 @@ xfs_bmap_local_to_extents(
          * We don't want to deal with the case of keeping inode data inline yet.
          * So sending the data fork of a regular inode is invalid.
          */
-       ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG &&
-                whichfork == XFS_DATA_FORK));
+       ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
         ifp = XFS_IFORK_PTR(ip, whichfork);
         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
         flags = 0;
@@ -3205,6 +3079,7 @@ xfs_bmap_local_to_extents(
                 xfs_buf_t       *bp;    /* buffer for extent block */
                 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
  
+               memset(&args, 0, sizeof(args));
                 args.tp = tp;
                 args.mp = ip->i_mount;
                 args.firstblock = *firstblock;
@@ -3222,8 +3097,6 @@ xfs_bmap_local_to_extents(
                         args.type = XFS_ALLOCTYPE_NEAR_BNO;
                 }
                 args.total = total;
-               args.mod = args.minleft = args.alignment = args.wasdel =
-                       args.isfl = args.minalignslop = 0;
                 args.minlen = args.maxlen = args.prod = 1;
                 if ((error = xfs_alloc_vextent(&args)))
                         goto done;
@@ -3234,8 +3107,8 @@ xfs_bmap_local_to_extents(
                 ASSERT(args.len == 1);
                 *firstblock = args.fsbno;
                 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
-               memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data,
-                       ifp->if_bytes);
+               bp->b_ops = &xfs_bmbt_buf_ops;
+               memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
                 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
                 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
                 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -3341,7 +3214,7 @@ xfs_bmap_search_extents(
  
         if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
                      !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
-               xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
+               xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
                                 "Access to block zero in inode %llu "
                                 "start_block: %llx start_off: %llx "
                                 "blkcnt: %llx extent-state: %x lastx: %x\n",
@@ -3409,8 +3282,6 @@ xfs_bmap_add_attrfork(
         int                     error;          /* error return value */
  
         ASSERT(XFS_IFORK_Q(ip) == 0);
-       ASSERT(ip->i_df.if_ext_max ==
-              XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
  
         mp = ip->i_mount;
         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
@@ -3441,7 +3312,7 @@ xfs_bmap_add_attrfork(
         }
         ASSERT(ip->i_d.di_anextents == 0);
  
-       xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  
         switch (ip->i_d.di_format) {
@@ -3465,12 +3336,9 @@ xfs_bmap_add_attrfork(
                 error = XFS_ERROR(EINVAL);
                 goto error1;
         }
-       ip->i_df.if_ext_max =
-               XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+
         ASSERT(ip->i_afp == NULL);
         ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
-       ip->i_afp->if_ext_max =
-               XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
         ip->i_afp->if_flags = XFS_IFEXTENTS;
         logflags = 0;
         xfs_bmap_init(&flist, &firstblock);
@@ -3514,20 +3382,17 @@ xfs_bmap_add_attrfork(
                 } else
                         spin_unlock(&mp->m_sb_lock);
         }
-       if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
+
+       error = xfs_bmap_finish(&tp, &flist, &committed);
+       if (error)
                 goto error2;
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-       ASSERT(ip->i_df.if_ext_max ==
-              XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
-       return error;
+       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
  error2:
         xfs_bmap_cancel(&flist);
  error1:
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
  error0:
         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-       ASSERT(ip->i_df.if_ext_max ==
-              XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
         return error;
  }
  
@@ -3753,42 +3618,122 @@ xfs_bmap_last_before(
         return 0;
  }
  
+STATIC int
+xfs_bmap_last_extent(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       struct xfs_bmbt_irec    *rec,
+       int                     *is_empty)
+{
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       int                     error;
+       int                     nextents;
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       if (nextents == 0) {
+               *is_empty = 1;
+               return 0;
+       }
+
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
+       *is_empty = 0;
+       return 0;
+}
+
+/*
+ * Check the last inode extent to determine whether this allocation will result
+ * in blocks being allocated at the end of the file. When we allocate new data
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ *
+ * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
+ * at, or past the EOF.
+ */
+STATIC int
+xfs_bmap_isaeof(
+       struct xfs_bmalloca     *bma,
+       int                     whichfork)
+{
+       struct xfs_bmbt_irec    rec;
+       int                     is_empty;
+       int                     error;
+
+       bma->aeof = 0;
+       error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
+                                    &is_empty);
+       if (error || is_empty)
+               return error;
+
+       /*
+        * Check if we are allocation or past the last extent, or at least into
+        * the last delayed allocated extent.
+        */
+       bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
+               (bma->offset >= rec.br_startoff &&
+                isnullstartblock(rec.br_startblock));
+       return 0;
+}
+
+/*
+ * Check if the endoff is outside the last extent. If so the caller will grow
+ * the allocation to a stripe unit boundary.  All offsets are considered outside
+ * the end of file for an empty fork, so 1 is returned in *eof in that case.
+ */
+int
+xfs_bmap_eof(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           endoff,
+       int                     whichfork,
+       int                     *eof)
+{
+       struct xfs_bmbt_irec    rec;
+       int                     error;
+
+       error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
+       if (error || *eof)
+               return error;
+
+       *eof = endoff >= rec.br_startoff + rec.br_blockcount;
+       return 0;
+}
+
  /*
   * Returns the file-relative block number of the first block past eof in
   * the file.  This is not based on i_size, it is based on the extent records.
   * Returns 0 for local files, as they do not have extent records.
   */
-int                                            /* error */
+int
  xfs_bmap_last_offset(
-       xfs_trans_t     *tp,                    /* transaction pointer */
-       xfs_inode_t     *ip,                    /* incore inode */
-       xfs_fileoff_t   *last_block,            /* last block */
-       int             whichfork)              /* data or attr fork */
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           *last_block,
+       int                     whichfork)
  {
-       xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
-       int             error;                  /* error return value */
-       xfs_ifork_t     *ifp;                   /* inode fork pointer */
-       xfs_extnum_t    nextents;               /* number of extent entries */
+       struct xfs_bmbt_irec    rec;
+       int                     is_empty;
+       int                     error;
+
+       *last_block = 0;
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
+               return 0;
  
         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
                return XFS_ERROR(EIO);
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
-               *last_block = 0;
-               return 0;
-       }
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(tp, ip, whichfork)))
+
+       error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
+       if (error || is_empty)
                 return error;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       if (!nextents) {
-               *last_block = 0;
-               return 0;
-       }
-       ep = xfs_iext_get_ext(ifp, nextents - 1);
-       *last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep);
+
+       *last_block = rec.br_startoff + rec.br_blockcount;
         return 0;
  }
  
@@ -3808,11 +3753,8 @@ xfs_bmap_one_block(
         xfs_bmbt_irec_t s;              /* internal version of extent */
  
  #ifndef DEBUG
-       if (whichfork == XFS_DATA_FORK) {
-               return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ?
-                       (ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
-                       (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
-       }
+       if (whichfork == XFS_DATA_FORK)
+               return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
  #endif /* !DEBUG */
         if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
                 return 0;
@@ -3824,7 +3766,7 @@ xfs_bmap_one_block(
         xfs_bmbt_get_all(ep, &s);
         rval = s.br_startoff == 0 && s.br_blockcount == 1;
         if (rval && whichfork == XFS_DATA_FORK)
-               ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize);
+               ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
         return rval;
  }
  
@@ -3836,7 +3778,7 @@ xfs_bmap_sanity_check(
  {
         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
  
-       if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
+       if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
             be16_to_cpu(block->bb_level) != level ||
             be16_to_cpu(block->bb_numrecs) == 0 ||
             be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
@@ -3890,8 +3832,9 @@ xfs_bmap_read_extents(
          * pointer (leftmost) at each level.
          */
         while (level-- > 0) {
-               if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF)))
+               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                               XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
+               if (error)
                         return error;
                 block = XFS_BUF_TO_BLOCK(bp);
                 XFS_WANT_CORRUPTED_GOTO(
@@ -3918,16 +3861,14 @@ xfs_bmap_read_extents(
                 xfs_extnum_t    num_recs;
                 xfs_extnum_t    start;
  
-
                 num_recs = xfs_btree_get_numrecs(block);
                 if (unlikely(i + num_recs > room)) {
                         ASSERT(i + num_recs <= room);
-                       xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+                       xfs_warn(ip->i_mount,
                                 "corrupt dinode %Lu, (btree extents).",
                                 (unsigned long long) ip->i_ino);
-                       XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",
-                                        XFS_ERRLEVEL_LOW,
-                                       ip->i_mount);
+                       XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
+                               XFS_ERRLEVEL_LOW, ip->i_mount, block);
                         goto error0;
                 }
                 XFS_WANT_CORRUPTED_GOTO(
@@ -3938,7 +3879,8 @@ xfs_bmap_read_extents(
                  */
                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
                 if (nextbno != NULLFSBLOCK)
-                       xfs_btree_reada_bufl(mp, nextbno, 1);
+                       xfs_btree_reada_bufl(mp, nextbno, 1,
+                                            &xfs_bmbt_buf_ops);
                 /*
                  * Copy records into the extent records.
                  */
@@ -3970,8 +3912,9 @@ xfs_bmap_read_extents(
                  */
                 if (bno == NULLFSBLOCK)
                         break;
-               if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF)))
+               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                               XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
+               if (error)
                         return error;
                 block = XFS_BUF_TO_BLOCK(bp);
         }
@@ -4042,9 +3985,8 @@ xfs_bmap_validate_ret(
                 ASSERT(i == 0 ||
                        mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
                        mval[i].br_startoff);
-               if ((flags & XFS_BMAPI_WRITE) && !(flags & XFS_BMAPI_DELAY))
-                       ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
-                              mval[i].br_startblock != HOLESTARTBLOCK);
+               ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
+                      mval[i].br_startblock != HOLESTARTBLOCK);
                 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
                        mval[i].br_state == XFS_EXT_UNWRITTEN);
         }
@@ -4053,395 +3995,166 @@ xfs_bmap_validate_ret(
  
  
  /*
- * Map file blocks to filesystem blocks.
- * File range is given by the bno/len pair.
- * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
- * into a hole or past eof.
- * Only allocates blocks from a single allocation group,
- * to avoid locking problems.
- * The returned value in "firstblock" from the first call in a transaction
- * must be remembered and presented to subsequent calls in "firstblock".
- * An upper bound for the number of blocks to be allocated is supplied to
- * the first call in "total"; if no allocation group has that many free
- * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
+ * Trim the returned map to the required bounds
   */
-int                                    /* error */
-xfs_bmapi(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_inode_t     *ip,            /* incore inode */
-       xfs_fileoff_t   bno,            /* starting file offs. mapped */
-       xfs_filblks_t   len,            /* length to map in file */
-       int             flags,          /* XFS_BMAPI_... */
-       xfs_fsblock_t   *firstblock,    /* first allocated block
-                                          controls a.g. for allocs */
-       xfs_extlen_t    total,          /* total blocks needed */
-       xfs_bmbt_irec_t *mval,          /* output: map values */
-       int             *nmap,          /* i/o: mval size/count */
-       xfs_bmap_free_t *flist)         /* i/o: list extents to free */
+STATIC void
+xfs_bmapi_trim_map(
+       struct xfs_bmbt_irec    *mval,
+       struct xfs_bmbt_irec    *got,
+       xfs_fileoff_t           *bno,
+       xfs_filblks_t           len,
+       xfs_fileoff_t           obno,
+       xfs_fileoff_t           end,
+       int                     n,
+       int                     flags)
  {
-       xfs_fsblock_t   abno;           /* allocated block number */
-       xfs_extlen_t    alen;           /* allocated extent length */
-       xfs_fileoff_t   aoff;           /* allocated file offset */
-       xfs_bmalloca_t  bma = { 0 };    /* args for xfs_bmap_alloc */
-       xfs_btree_cur_t *cur;           /* bmap btree cursor */
-       xfs_fileoff_t   end;            /* end of mapped file region */
-       int             eof;            /* we've hit the end of extents */
-       xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
-       int             error;          /* error return */
-       xfs_bmbt_irec_t got;            /* current file extent record */
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       xfs_extlen_t    indlen;         /* indirect blocks length */
-       xfs_extnum_t    lastx;          /* last useful extent number */
-       int             logflags;       /* flags for transaction logging */
-       xfs_extlen_t    minleft;        /* min blocks left after allocation */
-       xfs_extlen_t    minlen;         /* min allocation size */
-       xfs_mount_t     *mp;            /* xfs mount structure */
-       int             n;              /* current extent index */
-       int             nallocs;        /* number of extents alloc'd */
-       xfs_extnum_t    nextents;       /* number of extents in file */
-       xfs_fileoff_t   obno;           /* old block number (offset) */
-       xfs_bmbt_irec_t prev;           /* previous file extent record */
-       int             tmp_logflags;   /* temp flags holder */
-       int             whichfork;      /* data or attr fork */
-       char            inhole;         /* current location is hole in file */
-       char            wasdelay;       /* old extent was delayed */
-       char            wr;             /* this is a write request */
-       char            rt;             /* this is a realtime file */
-#ifdef DEBUG
-       xfs_fileoff_t   orig_bno;       /* original block number value */
-       int             orig_flags;     /* original flags arg value */
-       xfs_filblks_t   orig_len;       /* original value of len arg */
-       xfs_bmbt_irec_t *orig_mval;     /* original value of mval */
-       int             orig_nmap;      /* original value of *nmap */
-
-       orig_bno = bno;
-       orig_len = len;
-       orig_flags = flags;
-       orig_mval = mval;
-       orig_nmap = *nmap;
-#endif
-       ASSERT(*nmap >= 1);
-       ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE));
-       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-               XFS_ATTR_FORK : XFS_DATA_FORK;
-       mp = ip->i_mount;
-       if (unlikely(XFS_TEST_ERROR(
-           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
-            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
-            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL),
-            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
-               XFS_ERROR_REPORT("xfs_bmapi", XFS_ERRLEVEL_LOW, mp);
-               return XFS_ERROR(EFSCORRUPTED);
+       if ((flags & XFS_BMAPI_ENTIRE) ||
+           got->br_startoff + got->br_blockcount <= obno) {
+               *mval = *got;
+               if (isnullstartblock(got->br_startblock))
+                       mval->br_startblock = DELAYSTARTBLOCK;
+               return;
         }
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-       rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(ifp->if_ext_max ==
-              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
-       if ((wr = (flags & XFS_BMAPI_WRITE)) != 0)
-               XFS_STATS_INC(xs_blk_mapw);
+
+       if (obno > *bno)
+               *bno = obno;
+       ASSERT((*bno >= obno) || (n == 0));
+       ASSERT(*bno < end);
+       mval->br_startoff = *bno;
+       if (isnullstartblock(got->br_startblock))
+               mval->br_startblock = DELAYSTARTBLOCK;
         else
-               XFS_STATS_INC(xs_blk_mapr);
+               mval->br_startblock = got->br_startblock +
+                                       (*bno - got->br_startoff);
         /*
-        * IGSTATE flag is used to combine extents which
-        * differ only due to the state of the extents.
-        * This technique is used from xfs_getbmap()
-        * when the caller does not wish to see the
-        * separation (which is the default).
-        *
-        * This technique is also used when writing a
-        * buffer which has been partially written,
-        * (usually by being flushed during a chunkread),
-        * to ensure one write takes place. This also
-        * prevents a change in the xfs inode extents at
-        * this time, intentionally. This change occurs
-        * on completion of the write operation, in
-        * xfs_strat_comp(), where the xfs_bmapi() call
-        * is transactioned, and the extents combined.
+        * Return the minimum of what we got and what we asked for for
+        * the length.  We can use the len variable here because it is
+        * modified below and we could have been there before coming
+        * here if the first part of the allocation didn't overlap what
+        * was asked for.
          */
-       if ((flags & XFS_BMAPI_IGSTATE) && wr)  /* if writing unwritten space */
-               wr = 0;                         /* no allocations are allowed */
-       ASSERT(wr || !(flags & XFS_BMAPI_DELAY));
-       logflags = 0;
-       nallocs = 0;
-       cur = NULL;
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
-               ASSERT(wr && tp);
-               if ((error = xfs_bmap_local_to_extents(tp, ip,
-                               firstblock, total, &logflags, whichfork)))
-                       goto error0;
-       }
-       if (wr && *firstblock == NULLFSBLOCK) {
-               if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
-                       minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
-               else
-                       minleft = 1;
-       } else
-               minleft = 0;
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(tp, ip, whichfork)))
-               goto error0;
-       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
-               &prev);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       n = 0;
-       end = bno + len;
-       obno = bno;
-       bma.ip = NULL;
+       mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
+                       got->br_blockcount - (*bno - got->br_startoff));
+       mval->br_state = got->br_state;
+       ASSERT(mval->br_blockcount <= len);
+       return;
+}
  
-       while (bno < end && n < *nmap) {
-               /*
-                * Reading past eof, act as though there's a hole
-                * up to end.
-                */
-               if (eof && !wr)
-                       got.br_startoff = end;
-               inhole = eof || got.br_startoff > bno;
-               wasdelay = wr && !inhole && !(flags & XFS_BMAPI_DELAY) &&
-                       isnullstartblock(got.br_startblock);
-               /*
-                * First, deal with the hole before the allocated space
-                * that we found, if any.
-                */
-               if (wr && (inhole || wasdelay)) {
-                       /*
-                        * For the wasdelay case, we could also just
-                        * allocate the stuff asked for in this bmap call
-                        * but that wouldn't be as good.
-                        */
-                       if (wasdelay) {
-                               alen = (xfs_extlen_t)got.br_blockcount;
-                               aoff = got.br_startoff;
-                               if (lastx != NULLEXTNUM && lastx) {
-                                       ep = xfs_iext_get_ext(ifp, lastx - 1);
-                                       xfs_bmbt_get_all(ep, &prev);
-                               }
-                       } else {
-                               alen = (xfs_extlen_t)
-                                       XFS_FILBLKS_MIN(len, MAXEXTLEN);
-                               if (!eof)
-                                       alen = (xfs_extlen_t)
-                                               XFS_FILBLKS_MIN(alen,
-                                                       got.br_startoff - bno);
-                               aoff = bno;
-                       }
-                       minlen = (flags & XFS_BMAPI_CONTIG) ? alen : 1;
-                       if (flags & XFS_BMAPI_DELAY) {
-                               xfs_extlen_t    extsz;
-
-                               /* Figure out the extent size, adjust alen */
-                               extsz = xfs_get_extsz_hint(ip);
-                               if (extsz) {
-                                       error = xfs_bmap_extsize_align(mp,
-                                                       &got, &prev, extsz,
-                                                       rt, eof,
-                                                       flags&XFS_BMAPI_DELAY,
-                                                       flags&XFS_BMAPI_CONVERT,
-                                                       &aoff, &alen);
-                                       ASSERT(!error);
-                               }
+/*
+ * Update and validate the extent map to return
+ */
+STATIC void
+xfs_bmapi_update_map(
+       struct xfs_bmbt_irec    **map,
+       xfs_fileoff_t           *bno,
+       xfs_filblks_t           *len,
+       xfs_fileoff_t           obno,
+       xfs_fileoff_t           end,
+       int                     *n,
+       int                     flags)
+{
+       xfs_bmbt_irec_t *mval = *map;
+
+       ASSERT((flags & XFS_BMAPI_ENTIRE) ||
+              ((mval->br_startoff + mval->br_blockcount) <= end));
+       ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
+              (mval->br_startoff < obno));
+
+       *bno = mval->br_startoff + mval->br_blockcount;
+       *len = end - *bno;
+       if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
+               /* update previous map with new information */
+               ASSERT(mval->br_startblock == mval[-1].br_startblock);
+               ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
+               ASSERT(mval->br_state == mval[-1].br_state);
+               mval[-1].br_blockcount = mval->br_blockcount;
+               mval[-1].br_state = mval->br_state;
+       } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
+                  mval[-1].br_startblock != DELAYSTARTBLOCK &&
+                  mval[-1].br_startblock != HOLESTARTBLOCK &&
+                  mval->br_startblock == mval[-1].br_startblock +
+                                         mval[-1].br_blockcount &&
+                  ((flags & XFS_BMAPI_IGSTATE) ||
+                       mval[-1].br_state == mval->br_state)) {
+               ASSERT(mval->br_startoff ==
+                      mval[-1].br_startoff + mval[-1].br_blockcount);
+               mval[-1].br_blockcount += mval->br_blockcount;
+       } else if (*n > 0 &&
+                  mval->br_startblock == DELAYSTARTBLOCK &&
+                  mval[-1].br_startblock == DELAYSTARTBLOCK &&
+                  mval->br_startoff ==
+                  mval[-1].br_startoff + mval[-1].br_blockcount) {
+               mval[-1].br_blockcount += mval->br_blockcount;
+               mval[-1].br_state = mval->br_state;
+       } else if (!((*n == 0) &&
+                    ((mval->br_startoff + mval->br_blockcount) <=
+                     obno))) {
+               mval++;
+               (*n)++;
+       }
+       *map = mval;
+}
  
-                               if (rt)
-                                       extsz = alen / mp->m_sb.sb_rextsize;
+/*
+ * Map file blocks to filesystem blocks without allocation.
+ */
+int
+xfs_bmapi_read(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           bno,
+       xfs_filblks_t           len,
+       struct xfs_bmbt_irec    *mval,
+       int                     *nmap,
+       int                     flags)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp;
+       struct xfs_bmbt_irec    got;
+       struct xfs_bmbt_irec    prev;
+       xfs_fileoff_t           obno;
+       xfs_fileoff_t           end;
+       xfs_extnum_t            lastx;
+       int                     error;
+       int                     eof;
+       int                     n = 0;
+       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+                                               XFS_ATTR_FORK : XFS_DATA_FORK;
  
-                               /*
-                                * Make a transaction-less quota reservation for
-                                * delayed allocation blocks. This number gets
-                                * adjusted later.  We return if we haven't
-                                * allocated blocks already inside this loop.
-                                */
-                               error = xfs_trans_reserve_quota_nblks(
-                                               NULL, ip, (long)alen, 0,
-                                               rt ? XFS_QMOPT_RES_RTBLKS :
-                                                    XFS_QMOPT_RES_REGBLKS);
-                               if (error) {
-                                       if (n == 0) {
-                                               *nmap = 0;
-                                               ASSERT(cur == NULL);
-                                               return error;
-                                       }
-                                       break;
-                               }
+       ASSERT(*nmap >= 1);
+       ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
+                          XFS_BMAPI_IGSTATE)));
  
-                               /*
-                                * Split changing sb for alen and indlen since
-                                * they could be coming from different places.
-                                */
-                               indlen = (xfs_extlen_t)
-                                       xfs_bmap_worst_indlen(ip, alen);
-                               ASSERT(indlen > 0);
-
-                               if (rt) {
-                                       error = xfs_mod_incore_sb(mp,
-                                                       XFS_SBS_FREXTENTS,
-                                                       -((int64_t)extsz), (flags &
-                                                       XFS_BMAPI_RSVBLOCKS));
-                               } else {
-                                       error = xfs_icsb_modify_counters(mp,
-                                                       XFS_SBS_FDBLOCKS,
-                                                       -((int64_t)alen), (flags &
-                                                       XFS_BMAPI_RSVBLOCKS));
-                               }
-                               if (!error) {
-                                       error = xfs_icsb_modify_counters(mp,
-                                                       XFS_SBS_FDBLOCKS,
-                                                       -((int64_t)indlen), (flags &
-                                                       XFS_BMAPI_RSVBLOCKS));
-                                       if (error && rt)
-                                               xfs_mod_incore_sb(mp,
-                                                       XFS_SBS_FREXTENTS,
-                                                       (int64_t)extsz, (flags &
-                                                       XFS_BMAPI_RSVBLOCKS));
-                                       else if (error)
-                                               xfs_icsb_modify_counters(mp,
-                                                       XFS_SBS_FDBLOCKS,
-                                                       (int64_t)alen, (flags &
-                                                       XFS_BMAPI_RSVBLOCKS));
-                               }
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
  
-                               if (error) {
-                                       if (XFS_IS_QUOTA_ON(mp))
-                                               /* unreserve the blocks now */
-                                               (void)
-                                               xfs_trans_unreserve_quota_nblks(
-                                                       NULL, ip,
-                                                       (long)alen, 0, rt ?
-                                                       XFS_QMOPT_RES_RTBLKS :
-                                                       XFS_QMOPT_RES_REGBLKS);
-                                       break;
-                               }
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
  
-                               ip->i_delayed_blks += alen;
-                               abno = nullstartblock(indlen);
-                       } else {
-                               /*
-                                * If first time, allocate and fill in
-                                * once-only bma fields.
-                                */
-                               if (bma.ip == NULL) {
-                                       bma.tp = tp;
-                                       bma.ip = ip;
-                                       bma.prevp = &prev;
-                                       bma.gotp = &got;
-                                       bma.total = total;
-                                       bma.userdata = 0;
-                               }
-                               /* Indicate if this is the first user data
-                                * in the file, or just any user data.
-                                */
-                               if (!(flags & XFS_BMAPI_METADATA)) {
-                                       bma.userdata = (aoff == 0) ?
-                                               XFS_ALLOC_INITIAL_USER_DATA :
-                                               XFS_ALLOC_USERDATA;
-                               }
-                               /*
-                                * Fill in changeable bma fields.
-                                */
-                               bma.eof = eof;
-                               bma.firstblock = *firstblock;
-                               bma.alen = alen;
-                               bma.off = aoff;
-                               bma.conv = !!(flags & XFS_BMAPI_CONVERT);
-                               bma.wasdel = wasdelay;
-                               bma.minlen = minlen;
-                               bma.low = flist->xbf_low;
-                               bma.minleft = minleft;
-                               /*
-                                * Only want to do the alignment at the
-                                * eof if it is userdata and allocation length
-                                * is larger than a stripe unit.
-                                */
-                               if (mp->m_dalign && alen >= mp->m_dalign &&
-                                   (!(flags & XFS_BMAPI_METADATA)) &&
-                                   (whichfork == XFS_DATA_FORK)) {
-                                       if ((error = xfs_bmap_isaeof(ip, aoff,
-                                                       whichfork, &bma.aeof)))
-                                               goto error0;
-                               } else
-                                       bma.aeof = 0;
-                               /*
-                                * Call allocator.
-                                */
-                               if ((error = xfs_bmap_alloc(&bma)))
-                                       goto error0;
-                               /*
-                                * Copy out result fields.
-                                */
-                               abno = bma.rval;
-                               if ((flist->xbf_low = bma.low))
-                                       minleft = 0;
-                               alen = bma.alen;
-                               aoff = bma.off;
-                               ASSERT(*firstblock == NULLFSBLOCK ||
-                                      XFS_FSB_TO_AGNO(mp, *firstblock) ==
-                                      XFS_FSB_TO_AGNO(mp, bma.firstblock) ||
-                                      (flist->xbf_low &&
-                                       XFS_FSB_TO_AGNO(mp, *firstblock) <
-                                       XFS_FSB_TO_AGNO(mp, bma.firstblock)));
-                               *firstblock = bma.firstblock;
-                               if (cur)
-                                       cur->bc_private.b.firstblock =
-                                               *firstblock;
-                               if (abno == NULLFSBLOCK)
-                                       break;
-                               if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
-                                       cur = xfs_bmbt_init_cursor(mp, tp,
-                                               ip, whichfork);
-                                       cur->bc_private.b.firstblock =
-                                               *firstblock;
-                                       cur->bc_private.b.flist = flist;
-                               }
-                               /*
-                                * Bump the number of extents we've allocated
-                                * in this call.
-                                */
-                               nallocs++;
-                       }
-                       if (cur)
-                               cur->bc_private.b.flags =
-                                       wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0;
-                       got.br_startoff = aoff;
-                       got.br_startblock = abno;
-                       got.br_blockcount = alen;
-                       got.br_state = XFS_EXT_NORM;    /* assume normal */
-                       /*
-                        * Determine state of extent, and the filesystem.
-                        * A wasdelay extent has been initialized, so
-                        * shouldn't be flagged as unwritten.
-                        */
-                       if (wr && xfs_sb_version_hasextflgbit(&mp->m_sb)) {
-                               if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
-                                       got.br_state = XFS_EXT_UNWRITTEN;
-                       }
-                       error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
-                               firstblock, flist, &tmp_logflags,
-                               whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
-                       logflags |= tmp_logflags;
-                       if (error)
-                               goto error0;
-                       lastx = ifp->if_lastex;
-                       ep = xfs_iext_get_ext(ifp, lastx);
-                       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-                       xfs_bmbt_get_all(ep, &got);
-                       ASSERT(got.br_startoff <= aoff);
-                       ASSERT(got.br_startoff + got.br_blockcount >=
-                               aoff + alen);
-#ifdef DEBUG
-                       if (flags & XFS_BMAPI_DELAY) {
-                               ASSERT(isnullstartblock(got.br_startblock));
-                               ASSERT(startblockval(got.br_startblock) > 0);
-                       }
-                       ASSERT(got.br_state == XFS_EXT_NORM ||
-                              got.br_state == XFS_EXT_UNWRITTEN);
-#endif
-                       /*
-                        * Fall down into the found allocated space case.
-                        */
-               } else if (inhole) {
-                       /*
-                        * Reading in a hole.
-                        */
+       XFS_STATS_INC(xs_blk_mapr);
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(NULL, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
+       end = bno + len;
+       obno = bno;
+
+       while (bno < end && n < *nmap) {
+               /* Reading past eof, act as though there's a hole up to end. */
+               if (eof)
+                       got.br_startoff = end;
+               if (got.br_startoff > bno) {
+                       /* Reading in a hole.  */
                         mval->br_startoff = bno;
                         mval->br_startblock = HOLESTARTBLOCK;
                         mval->br_blockcount =
@@ -4453,193 +4166,624 @@ xfs_bmapi(
                         n++;
                         continue;
                 }
+
+               /* set up the extent map to return. */
+               xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
+               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
+               /* If we're done, stop now. */
+               if (bno >= end || n >= *nmap)
+                       break;
+
+               /* Else go on to the next record. */
+               if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
+               else
+                       eof = 1;
+       }
+       *nmap = n;
+       return 0;
+}
+
+STATIC int
+xfs_bmapi_reserve_delalloc(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           aoff,
+       xfs_filblks_t           len,
+       struct xfs_bmbt_irec    *got,
+       struct xfs_bmbt_irec    *prev,
+       xfs_extnum_t            *lastx,
+       int                     eof)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       xfs_extlen_t            alen;
+       xfs_extlen_t            indlen;
+       char                    rt = XFS_IS_REALTIME_INODE(ip);
+       xfs_extlen_t            extsz;
+       int                     error;
+
+       alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
+       if (!eof)
+               alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
+
+       /* Figure out the extent size, adjust alen */
+       extsz = xfs_get_extsz_hint(ip);
+       if (extsz) {
                 /*
-                * Then deal with the allocated space we found.
+                * Make sure we don't exceed a single extent length when we
+                * align the extent by reducing length we are going to
+                * allocate by the maximum amount extent size aligment may
+                * require.
                  */
-               ASSERT(ep != NULL);
-               if (!(flags & XFS_BMAPI_ENTIRE) &&
-                   (got.br_startoff + got.br_blockcount > obno)) {
-                       if (obno > bno)
-                               bno = obno;
-                       ASSERT((bno >= obno) || (n == 0));
-                       ASSERT(bno < end);
-                       mval->br_startoff = bno;
-                       if (isnullstartblock(got.br_startblock)) {
-                               ASSERT(!wr || (flags & XFS_BMAPI_DELAY));
-                               mval->br_startblock = DELAYSTARTBLOCK;
-                       } else
-                               mval->br_startblock =
-                                       got.br_startblock +
-                                       (bno - got.br_startoff);
-                       /*
-                        * Return the minimum of what we got and what we
-                        * asked for for the length.  We can use the len
-                        * variable here because it is modified below
-                        * and we could have been there before coming
-                        * here if the first part of the allocation
-                        * didn't overlap what was asked for.
-                        */
-                       mval->br_blockcount =
-                               XFS_FILBLKS_MIN(end - bno, got.br_blockcount -
-                                       (bno - got.br_startoff));
-                       mval->br_state = got.br_state;
-                       ASSERT(mval->br_blockcount <= len);
-               } else {
-                       *mval = got;
-                       if (isnullstartblock(mval->br_startblock)) {
-                               ASSERT(!wr || (flags & XFS_BMAPI_DELAY));
-                               mval->br_startblock = DELAYSTARTBLOCK;
+               alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
+               error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
+                                              1, 0, &aoff, &alen);
+               ASSERT(!error);
+       }
+
+       if (rt)
+               extsz = alen / mp->m_sb.sb_rextsize;
+
+       /*
+        * Make a transaction-less quota reservation for delayed allocation
+        * blocks.  This number gets adjusted later.  We return if we haven't
+        * allocated blocks already inside this loop.
+        */
+       error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
+                       rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+       if (error)
+               return error;
+
+       /*
+        * Split changing sb for alen and indlen since they could be coming
+        * from different places.
+        */
+       indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
+       ASSERT(indlen > 0);
+
+       if (rt) {
+               error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
+                                         -((int64_t)extsz), 0);
+       } else {
+               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                                -((int64_t)alen), 0);
+       }
+
+       if (error)
+               goto out_unreserve_quota;
+
+       error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                        -((int64_t)indlen), 0);
+       if (error)
+               goto out_unreserve_blocks;
+
+
+       ip->i_delayed_blks += alen;
+
+       got->br_startoff = aoff;
+       got->br_startblock = nullstartblock(indlen);
+       got->br_blockcount = alen;
+       got->br_state = XFS_EXT_NORM;
+       xfs_bmap_add_extent_hole_delay(ip, lastx, got);
+
+       /*
+        * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
+        * might have merged it into one of the neighbouring ones.
+        */
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+
+       ASSERT(got->br_startoff <= aoff);
+       ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
+       ASSERT(isnullstartblock(got->br_startblock));
+       ASSERT(got->br_state == XFS_EXT_NORM);
+       return 0;
+
+out_unreserve_blocks:
+       if (rt)
+               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
+       else
+               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
+out_unreserve_quota:
+       if (XFS_IS_QUOTA_ON(mp))
+               xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
+                               XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+       return error;
+}
+
+/*
+ * Map file blocks to filesystem blocks, adding delayed allocations as needed.
+ */
+int
+xfs_bmapi_delay(
+       struct xfs_inode        *ip,    /* incore inode */
+       xfs_fileoff_t           bno,    /* starting file offs. mapped */
+       xfs_filblks_t           len,    /* length to map in file */
+       struct xfs_bmbt_irec    *mval,  /* output: map values */
+       int                     *nmap,  /* i/o: mval size/count */
+       int                     flags)  /* XFS_BMAPI_... */
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       struct xfs_bmbt_irec    got;    /* current file extent record */
+       struct xfs_bmbt_irec    prev;   /* previous file extent record */
+       xfs_fileoff_t           obno;   /* old block number (offset) */
+       xfs_fileoff_t           end;    /* end of mapped file region */
+       xfs_extnum_t            lastx;  /* last useful extent number */
+       int                     eof;    /* we've hit the end of extents */
+       int                     n = 0;  /* current extent index */
+       int                     error = 0;
+
+       ASSERT(*nmap >= 1);
+       ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
+       ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       XFS_STATS_INC(xs_blk_mapw);
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+               if (error)
+                       return error;
+       }
+
+       xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
+       end = bno + len;
+       obno = bno;
+
+       while (bno < end && n < *nmap) {
+               if (eof || got.br_startoff > bno) {
+                       error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
+                                                          &prev, &lastx, eof);
+                       if (error) {
+                               if (n == 0) {
+                                       *nmap = 0;
+                                       return error;
+                               }
+                               break;
                         }
                 }
  
+               /* set up the extent map to return. */
+               xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
+               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
+               /* If we're done, stop now. */
+               if (bno >= end || n >= *nmap)
+                       break;
+
+               /* Else go on to the next record. */
+               prev = got;
+               if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
+               else
+                       eof = 1;
+       }
+
+       *nmap = n;
+       return 0;
+}
+
+
+STATIC int
+xfs_bmapi_allocate(
+       struct xfs_bmalloca     *bma)
+{
+       struct xfs_mount        *mp = bma->ip->i_mount;
+       int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
+                                               XFS_ATTR_FORK : XFS_DATA_FORK;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+       int                     tmp_logflags = 0;
+       int                     error;
+
+       ASSERT(bma->length > 0);
+
+       /*
+        * For the wasdelay case, we could also just allocate the stuff asked
+        * for in this bmap call but that wouldn't be as good.
+        */
+       if (bma->wasdel) {
+               bma->length = (xfs_extlen_t)bma->got.br_blockcount;
+               bma->offset = bma->got.br_startoff;
+               if (bma->idx != NULLEXTNUM && bma->idx) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
+                                        &bma->prev);
+               }
+       } else {
+               bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
+               if (!bma->eof)
+                       bma->length = XFS_FILBLKS_MIN(bma->length,
+                                       bma->got.br_startoff - bma->offset);
+       }
+
+       /*
+        * Indicate if this is the first user data in the file, or just any
+        * user data.
+        */
+       if (!(bma->flags & XFS_BMAPI_METADATA)) {
+               bma->userdata = (bma->offset == 0) ?
+                       XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
+       }
+
+       bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
+
+       /*
+        * Only want to do the alignment at the eof if it is userdata and
+        * allocation length is larger than a stripe unit.
+        */
+       if (mp->m_dalign && bma->length >= mp->m_dalign &&
+           !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
+               error = xfs_bmap_isaeof(bma, whichfork);
+               if (error)
+                       return error;
+       }
+
+       if (bma->flags & XFS_BMAPI_STACK_SWITCH)
+               bma->stack_switch = 1;
+
+       error = xfs_bmap_alloc(bma);
+       if (error)
+               return error;
+
+       if (bma->flist->xbf_low)
+               bma->minleft = 0;
+       if (bma->cur)
+               bma->cur->bc_private.b.firstblock = *bma->firstblock;
+       if (bma->blkno == NULLFSBLOCK)
+               return 0;
+       if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
+               bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
+               bma->cur->bc_private.b.firstblock = *bma->firstblock;
+               bma->cur->bc_private.b.flist = bma->flist;
+       }
+       /*
+        * Bump the number of extents we've allocated
+        * in this call.
+        */
+       bma->nallocs++;
+
+       if (bma->cur)
+               bma->cur->bc_private.b.flags =
+                       bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
+
+       bma->got.br_startoff = bma->offset;
+       bma->got.br_startblock = bma->blkno;
+       bma->got.br_blockcount = bma->length;
+       bma->got.br_state = XFS_EXT_NORM;
+
+       /*
+        * A wasdelay extent has been initialized, so shouldn't be flagged
+        * as unwritten.
+        */
+       if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
+           xfs_sb_version_hasextflgbit(&mp->m_sb))
+               bma->got.br_state = XFS_EXT_UNWRITTEN;
+
+       if (bma->wasdel)
+               error = xfs_bmap_add_extent_delay_real(bma);
+       else
+               error = xfs_bmap_add_extent_hole_real(bma, whichfork);
+
+       bma->logflags |= tmp_logflags;
+       if (error)
+               return error;
+
+       /*
+        * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
+        * or xfs_bmap_add_extent_hole_real might have merged it into one of
+        * the neighbouring ones.
+        */
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
+
+       ASSERT(bma->got.br_startoff <= bma->offset);
+       ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
+              bma->offset + bma->length);
+       ASSERT(bma->got.br_state == XFS_EXT_NORM ||
+              bma->got.br_state == XFS_EXT_UNWRITTEN);
+       return 0;
+}
+
+
+STATIC int
+xfs_bmapi_convert_unwritten(
+       struct xfs_bmalloca     *bma,
+       struct xfs_bmbt_irec    *mval,
+       xfs_filblks_t           len,
+       int                     flags)
+{
+       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+                                               XFS_ATTR_FORK : XFS_DATA_FORK;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+       int                     tmp_logflags = 0;
+       int                     error;
+
+       /* check if we need to do unwritten->real conversion */
+       if (mval->br_state == XFS_EXT_UNWRITTEN &&
+           (flags & XFS_BMAPI_PREALLOC))
+               return 0;
+
+       /* check if we need to do real->unwritten conversion */
+       if (mval->br_state == XFS_EXT_NORM &&
+           (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
+                       (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
+               return 0;
+
+       /*
+        * Modify (by adding) the state flag, if writing.
+        */
+       ASSERT(mval->br_blockcount <= len);
+       if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
+               bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
+                                       bma->ip, whichfork);
+               bma->cur->bc_private.b.firstblock = *bma->firstblock;
+               bma->cur->bc_private.b.flist = bma->flist;
+       }
+       mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
+                               ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
+
+       error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
+                       &bma->cur, mval, bma->firstblock, bma->flist,
+                       &tmp_logflags);
+       bma->logflags |= tmp_logflags;
+       if (error)
+               return error;
+
+       /*
+        * Update our extent pointer, given that
+        * xfs_bmap_add_extent_unwritten_real might have merged it into one
+        * of the neighbouring ones.
+        */
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
+
+       /*
+        * We may have combined previously unwritten space with written space,
+        * so generate another request.
+        */
+       if (mval->br_blockcount < len)
+               return EAGAIN;
+       return 0;
+}
+
+/*
+ * Map file blocks to filesystem blocks, and allocate blocks or convert the
+ * extent state if necessary.  Details behaviour is controlled by the flags
+ * parameter.  Only allocates blocks from a single allocation group, to avoid
+ * locking problems.
+ *
+ * The returned value in "firstblock" from the first call in a transaction
+ * must be remembered and presented to subsequent calls in "firstblock".
+ * An upper bound for the number of blocks to be allocated is supplied to
+ * the first call in "total"; if no allocation group has that many free
+ * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
+ */
+int
+xfs_bmapi_write(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           bno,            /* starting file offs. mapped */
+       xfs_filblks_t           len,            /* length to map in file */
+       int                     flags,          /* XFS_BMAPI_... */
+       xfs_fsblock_t           *firstblock,    /* first allocated block
+                                                  controls a.g. for allocs */
+       xfs_extlen_t            total,          /* total blocks needed */
+       struct xfs_bmbt_irec    *mval,          /* output: map values */
+       int                     *nmap,          /* i/o: mval size/count */
+       struct xfs_bmap_free    *flist)         /* i/o: list extents to free */
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp;
+       struct xfs_bmalloca     bma = { 0 };    /* args for xfs_bmap_alloc */
+       xfs_fileoff_t           end;            /* end of mapped file region */
+       int                     eof;            /* after the end of extents */
+       int                     error;          /* error return */
+       int                     n;              /* current extent index */
+       xfs_fileoff_t           obno;           /* old block number (offset) */
+       int                     whichfork;      /* data or attr fork */
+       char                    inhole;         /* current location is hole in file */
+       char                    wasdelay;       /* old extent was delayed */
+
+#ifdef DEBUG
+       xfs_fileoff_t           orig_bno;       /* original block number value */
+       int                     orig_flags;     /* original flags arg value */
+       xfs_filblks_t           orig_len;       /* original value of len arg */
+       struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
+       int                     orig_nmap;      /* original value of *nmap */
+
+       orig_bno = bno;
+       orig_len = len;
+       orig_flags = flags;
+       orig_mval = mval;
+       orig_nmap = *nmap;
+#endif
+
+       ASSERT(*nmap >= 1);
+       ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
+       ASSERT(!(flags & XFS_BMAPI_IGSTATE));
+       ASSERT(tp != NULL);
+       ASSERT(len > 0);
+
+       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+               XFS_ATTR_FORK : XFS_DATA_FORK;
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       XFS_STATS_INC(xs_blk_mapw);
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
+                                                 &bma.logflags, whichfork);
+               if (error)
+                       goto error0;
+       }
+
+       if (*firstblock == NULLFSBLOCK) {
+               if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
+                       bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
+               else
+                       bma.minleft = 1;
+       } else {
+               bma.minleft = 0;
+       }
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       goto error0;
+       }
+
+       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
+                               &bma.prev);
+       n = 0;
+       end = bno + len;
+       obno = bno;
+
+       bma.tp = tp;
+       bma.ip = ip;
+       bma.total = total;
+       bma.userdata = 0;
+       bma.flist = flist;
+       bma.firstblock = firstblock;
+
+       while (bno < end && n < *nmap) {
+               inhole = eof || bma.got.br_startoff > bno;
+               wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
+
                 /*
-                * Check if writing previously allocated but
-                * unwritten extents.
+                * First, deal with the hole before the allocated space
+                * that we found, if any.
                  */
-               if (wr &&
-                   ((mval->br_state == XFS_EXT_UNWRITTEN &&
-                     ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) ||
-                    (mval->br_state == XFS_EXT_NORM &&
-                     ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) ==
-                               (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) {
+               if (inhole || wasdelay) {
+                       bma.eof = eof;
+                       bma.conv = !!(flags & XFS_BMAPI_CONVERT);
+                       bma.wasdel = wasdelay;
+                       bma.offset = bno;
+                       bma.flags = flags;
+
                         /*
-                        * Modify (by adding) the state flag, if writing.
+                        * There's a 32/64 bit type mismatch between the
+                        * allocation length request (which can be 64 bits in
+                        * length) and the bma length request, which is
+                        * xfs_extlen_t and therefore 32 bits. Hence we have to
+                        * check for 32-bit overflows and handle them here.
                          */
-                       ASSERT(mval->br_blockcount <= len);
-                       if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
-                               cur = xfs_bmbt_init_cursor(mp,
-                                       tp, ip, whichfork);
-                               cur->bc_private.b.firstblock =
-                                       *firstblock;
-                               cur->bc_private.b.flist = flist;
-                       }
-                       mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
-                                               ? XFS_EXT_NORM
-                                               : XFS_EXT_UNWRITTEN;
-                       error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
-                               firstblock, flist, &tmp_logflags,
-                               whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
-                       logflags |= tmp_logflags;
+                       if (len > (xfs_filblks_t)MAXEXTLEN)
+                               bma.length = MAXEXTLEN;
+                       else
+                               bma.length = len;
+
+                       ASSERT(len > 0);
+                       ASSERT(bma.length > 0);
+                       error = xfs_bmapi_allocate(&bma);
                         if (error)
                                 goto error0;
-                       lastx = ifp->if_lastex;
-                       ep = xfs_iext_get_ext(ifp, lastx);
-                       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-                       xfs_bmbt_get_all(ep, &got);
-                       /*
-                        * We may have combined previously unwritten
-                        * space with written space, so generate
-                        * another request.
-                        */
-                       if (mval->br_blockcount < len)
-                               continue;
+                       if (bma.blkno == NULLFSBLOCK)
+                               break;
                 }
  
-               ASSERT((flags & XFS_BMAPI_ENTIRE) ||
-                      ((mval->br_startoff + mval->br_blockcount) <= end));
-               ASSERT((flags & XFS_BMAPI_ENTIRE) ||
-                      (mval->br_blockcount <= len) ||
-                      (mval->br_startoff < obno));
-               bno = mval->br_startoff + mval->br_blockcount;
-               len = end - bno;
-               if (n > 0 && mval->br_startoff == mval[-1].br_startoff) {
-                       ASSERT(mval->br_startblock == mval[-1].br_startblock);
-                       ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
-                       ASSERT(mval->br_state == mval[-1].br_state);
-                       mval[-1].br_blockcount = mval->br_blockcount;
-                       mval[-1].br_state = mval->br_state;
-               } else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
-                          mval[-1].br_startblock != DELAYSTARTBLOCK &&
-                          mval[-1].br_startblock != HOLESTARTBLOCK &&
-                          mval->br_startblock ==
-                          mval[-1].br_startblock + mval[-1].br_blockcount &&
-                          ((flags & XFS_BMAPI_IGSTATE) ||
-                               mval[-1].br_state == mval->br_state)) {
-                       ASSERT(mval->br_startoff ==
-                              mval[-1].br_startoff + mval[-1].br_blockcount);
-                       mval[-1].br_blockcount += mval->br_blockcount;
-               } else if (n > 0 &&
-                          mval->br_startblock == DELAYSTARTBLOCK &&
-                          mval[-1].br_startblock == DELAYSTARTBLOCK &&
-                          mval->br_startoff ==
-                          mval[-1].br_startoff + mval[-1].br_blockcount) {
-                       mval[-1].br_blockcount += mval->br_blockcount;
-                       mval[-1].br_state = mval->br_state;
-               } else if (!((n == 0) &&
-                            ((mval->br_startoff + mval->br_blockcount) <=
-                             obno))) {
-                       mval++;
-                       n++;
-               }
+               /* Deal with the allocated space we found.  */
+               xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
+                                                       end, n, flags);
+
+               /* Execute unwritten extent conversion if necessary */
+               error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
+               if (error == EAGAIN)
+                       continue;
+               if (error)
+                       goto error0;
+
+               /* update the extent map to return */
+               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
                 /*
                  * If we're done, stop now.  Stop when we've allocated
                  * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
                  * the transaction may get too big.
                  */
-               if (bno >= end || n >= *nmap || nallocs >= *nmap)
+               if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
                         break;
-               /*
-                * Else go on to the next record.
-                */
-               ep = xfs_iext_get_ext(ifp, ++lastx);
-               prev = got;
-               if (lastx >= nextents)
+
+               /* Else go on to the next record. */
+               bma.prev = bma.got;
+               if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
+                                        &bma.got);
+               } else
                         eof = 1;
-               else
-                       xfs_bmbt_get_all(ep, &got);
         }
-       ifp->if_lastex = lastx;
         *nmap = n;
+
         /*
          * Transform from btree to extents, give it cur.
          */
-       if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
-           XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
-               ASSERT(wr && cur);
-               error = xfs_bmap_btree_to_extents(tp, ip, cur,
+       if (xfs_bmap_wants_extents(ip, whichfork)) {
+               int             tmp_logflags = 0;
+
+               ASSERT(bma.cur);
+               error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
                         &tmp_logflags, whichfork);
-               logflags |= tmp_logflags;
+               bma.logflags |= tmp_logflags;
                 if (error)
                         goto error0;
         }
-       ASSERT(ifp->if_ext_max ==
-              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
-              XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
+              XFS_IFORK_NEXTENTS(ip, whichfork) >
+               XFS_IFORK_MAXEXT(ip, whichfork));
         error = 0;
  error0:
         /*
          * Log everything.  Do this after conversion, there's no point in
          * logging the extent records if we've converted to btree format.
          */
-       if ((logflags & xfs_ilog_fext(whichfork)) &&
+       if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
-               logflags &= ~xfs_ilog_fext(whichfork);
-       else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
+               bma.logflags &= ~xfs_ilog_fext(whichfork);
+       else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
-               logflags &= ~xfs_ilog_fbroot(whichfork);
+               bma.logflags &= ~xfs_ilog_fbroot(whichfork);
         /*
          * Log whatever the flags say, even if error.  Otherwise we might miss
          * detecting a case where the data is changed, there's an error,
          * and it's not logged so we don't shutdown when we should.
          */
-       if (logflags) {
-               ASSERT(tp && wr);
-               xfs_trans_log_inode(tp, ip, logflags);
-       }
-       if (cur) {
+       if (bma.logflags)
+               xfs_trans_log_inode(tp, ip, bma.logflags);
+
+       if (bma.cur) {
                 if (!error) {
                         ASSERT(*firstblock == NULLFSBLOCK ||
                                XFS_FSB_TO_AGNO(mp, *firstblock) ==
                                XFS_FSB_TO_AGNO(mp,
-                                      cur->bc_private.b.firstblock) ||
+                                      bma.cur->bc_private.b.firstblock) ||
                                (flist->xbf_low &&
                                 XFS_FSB_TO_AGNO(mp, *firstblock) <
                                 XFS_FSB_TO_AGNO(mp,
-                                       cur->bc_private.b.firstblock)));
-                       *firstblock = cur->bc_private.b.firstblock;
+                                       bma.cur->bc_private.b.firstblock)));
+                       *firstblock = bma.cur->bc_private.b.firstblock;
                 }
-               xfs_btree_del_cursor(cur,
+               xfs_btree_del_cursor(bma.cur,
                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
         }
         if (!error)
@@ -4648,59 +4792,6 @@ error0:
         return error;
  }
  
-/*
- * Map file blocks to filesystem blocks, simple version.
- * One block (extent) only, read-only.
- * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
- * For the other flag values, the effect is as if XFS_BMAPI_METADATA
- * was set and all the others were clear.
- */
-int                                            /* error */
-xfs_bmapi_single(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_inode_t     *ip,            /* incore inode */
-       int             whichfork,      /* data or attr fork */
-       xfs_fsblock_t   *fsb,           /* output: mapped block */
-       xfs_fileoff_t   bno)            /* starting file offs. mapped */
-{
-       int             eof;            /* we've hit the end of extents */
-       int             error;          /* error return */
-       xfs_bmbt_irec_t got;            /* current file extent record */
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       xfs_extnum_t    lastx;          /* last useful extent number */
-       xfs_bmbt_irec_t prev;           /* previous file extent record */
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (unlikely(
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)) {
-              XFS_ERROR_REPORT("xfs_bmapi_single", XFS_ERRLEVEL_LOW,
-                               ip->i_mount);
-              return XFS_ERROR(EFSCORRUPTED);
-       }
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return XFS_ERROR(EIO);
-       XFS_STATS_INC(xs_blk_mapr);
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(tp, ip, whichfork)))
-               return error;
-       (void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
-               &prev);
-       /*
-        * Reading past eof, act as though there's a hole
-        * up to end.
-        */
-       if (eof || got.br_startoff > bno) {
-               *fsb = NULLFSBLOCK;
-               return 0;
-       }
-       ASSERT(!isnullstartblock(got.br_startblock));
-       ASSERT(bno < got.br_startoff + got.br_blockcount);
-       *fsb = got.br_startblock + (bno - got.br_startoff);
-       ifp->if_lastex = lastx;
-       return 0;
-}
-
  /*
   * Unmap (remove) blocks from a file.
   * If nexts is nonzero then the number of extents to remove is limited to
@@ -4739,7 +4830,6 @@ xfs_bunmapi(
         int                     tmp_logflags;   /* partial logging flags */
         int                     wasdel;         /* was a delayed alloc extent */
         int                     whichfork;      /* data or attribute fork */
-       int                     rsvd;           /* OK to allocate reserved blocks */
         xfs_fsblock_t           sum;
  
         trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
@@ -4757,11 +4847,10 @@ xfs_bunmapi(
         mp = ip->i_mount;
         if (XFS_FORCED_SHUTDOWN(mp))
                 return XFS_ERROR(EIO);
-       rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
+
         ASSERT(len > 0);
         ASSERT(nexts >= 0);
-       ASSERT(ifp->if_ext_max ==
-              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
             (error = xfs_iread_extents(tp, ip, whichfork)))
                 return error;
@@ -4795,6 +4884,15 @@ xfs_bunmapi(
                 cur->bc_private.b.flags = 0;
         } else
                 cur = NULL;
+
+       if (isrt) {
+               /*
+                * Synchronize by locking the bitmap inode.
+                */
+               xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+               xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+       }
+
         extno = 0;
         while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
                (nexts == 0 || extno < nexts)) {
@@ -4873,9 +4971,9 @@ xfs_bunmapi(
                                 del.br_blockcount = mod;
                         }
                         del.br_state = XFS_EXT_UNWRITTEN;
-                       error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
-                               firstblock, flist, &logflags,
-                               XFS_DATA_FORK, 0);
+                       error = xfs_bmap_add_extent_unwritten_real(tp, ip,
+                                       &lastx, &cur, &del, firstblock, flist,
+                                       &logflags);
                         if (error)
                                 goto error0;
                         goto nodelete;
@@ -4901,9 +4999,12 @@ xfs_bunmapi(
                                  */
                                 ASSERT(bno >= del.br_blockcount);
                                 bno -= del.br_blockcount;
-                               if (bno < got.br_startoff) {
-                                       if (--lastx >= 0)
-                                               xfs_bmbt_get_all(--ep, &got);
+                               if (got.br_startoff > bno) {
+                                       if (--lastx >= 0) {
+                                               ep = xfs_iext_get_ext(ifp,
+                                                                     lastx);
+                                               xfs_bmbt_get_all(ep, &got);
+                                       }
                                 }
                                 continue;
                         } else if (del.br_state == XFS_EXT_UNWRITTEN) {
@@ -4927,18 +5028,19 @@ xfs_bunmapi(
                                         prev.br_startoff = start;
                                 }
                                 prev.br_state = XFS_EXT_UNWRITTEN;
-                               error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
-                                       &prev, firstblock, flist, &logflags,
-                                       XFS_DATA_FORK, 0);
+                               lastx--;
+                               error = xfs_bmap_add_extent_unwritten_real(tp,
+                                               ip, &lastx, &cur, &prev,
+                                               firstblock, flist, &logflags);
                                 if (error)
                                         goto error0;
                                 goto nodelete;
                         } else {
                                 ASSERT(del.br_state == XFS_EXT_NORM);
                                 del.br_state = XFS_EXT_UNWRITTEN;
-                               error = xfs_bmap_add_extent(ip, lastx, &cur,
-                                       &del, firstblock, flist, &logflags,
-                                       XFS_DATA_FORK, 0);
+                               error = xfs_bmap_add_extent_unwritten_real(tp,
+                                               ip, &lastx, &cur, &del,
+                                               firstblock, flist, &logflags);
                                 if (error)
                                         goto error0;
                                 goto nodelete;
@@ -4953,13 +5055,13 @@ xfs_bunmapi(
                                 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
                                 do_div(rtexts, mp->m_sb.sb_rextsize);
                                 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                               (int64_t)rtexts, rsvd);
+                                               (int64_t)rtexts, 0);
                                 (void)xfs_trans_reserve_quota_nblks(NULL,
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_RTBLKS);
                         } else {
                                 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                               (int64_t)del.br_blockcount, rsvd);
+                                               (int64_t)del.br_blockcount, 0);
                                 (void)xfs_trans_reserve_quota_nblks(NULL,
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_REGBLKS);
@@ -4983,46 +5085,43 @@ xfs_bunmapi(
                  */
                 if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
                     XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-                   XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max &&
+                   XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
+                       XFS_IFORK_MAXEXT(ip, whichfork) &&
                     del.br_startoff > got.br_startoff &&
                     del.br_startoff + del.br_blockcount <
                     got.br_startoff + got.br_blockcount) {
                         error = XFS_ERROR(ENOSPC);
                         goto error0;
                 }
-               error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
-                               &tmp_logflags, whichfork, rsvd);
+               error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
+                               &tmp_logflags, whichfork);
                 logflags |= tmp_logflags;
                 if (error)
                         goto error0;
                 bno = del.br_startoff - 1;
  nodelete:
-               lastx = ifp->if_lastex;
                 /*
                  * If not done go on to the next (previous) record.
-                * Reset ep in case the extents array was re-alloced.
                  */
-               ep = xfs_iext_get_ext(ifp, lastx);
                 if (bno != (xfs_fileoff_t)-1 && bno >= start) {
-                       if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) ||
-                           xfs_bmbt_get_startoff(ep) > bno) {
-                               if (--lastx >= 0)
-                                       ep = xfs_iext_get_ext(ifp, lastx);
-                       }
-                       if (lastx >= 0)
+                       if (lastx >= 0) {
+                               ep = xfs_iext_get_ext(ifp, lastx);
+                               if (xfs_bmbt_get_startoff(ep) > bno) {
+                                       if (--lastx >= 0)
+                                               ep = xfs_iext_get_ext(ifp,
+                                                                     lastx);
+                               }
                                 xfs_bmbt_get_all(ep, &got);
+                       }
                         extno++;
                 }
         }
-       ifp->if_lastex = lastx;
         *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
-       ASSERT(ifp->if_ext_max ==
-              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
         /*
          * Convert to a btree if necessary.
          */
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-           XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
+       if (xfs_bmap_needs_btree(ip, whichfork)) {
                 ASSERT(cur == NULL);
                 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
                         &cur, 0, &tmp_logflags, whichfork);
@@ -5033,8 +5132,7 @@ nodelete:
         /*
          * transform from btree to extents, give it cur
          */
-       else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
-                XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
+       else if (xfs_bmap_wants_extents(ip, whichfork)) {
                 ASSERT(cur != NULL);
                 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
                         whichfork);
@@ -5045,8 +5143,6 @@ nodelete:
         /*
          * transform from extents to local?
          */
-       ASSERT(ifp->if_ext_max ==
-              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
         error = 0;
  error0:
         /*
@@ -5076,88 +5172,252 @@ error0:
         return error;
  }
  
+#ifdef DEBUG
+STATIC struct xfs_buf *
+xfs_bmap_get_bp(
+       struct xfs_btree_cur    *cur,
+       xfs_fsblock_t           bno)
+{
+       struct xfs_log_item_desc *lidp;
+       int                     i;
+
+       if (!cur)
+               return NULL;
+
+       for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
+               if (!cur->bc_bufs[i])
+                       break;
+               if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
+                       return cur->bc_bufs[i];
+       }
+
+       /* Chase down all the log items to see if the bp is there */
+       list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
+               struct xfs_buf_log_item *bip;
+               bip = (struct xfs_buf_log_item *)lidp->lid_item;
+               if (bip->bli_item.li_type == XFS_LI_BUF &&
+                   XFS_BUF_ADDR(bip->bli_buf) == bno)
+                       return bip->bli_buf;
+       }
+
+       return NULL;
+}
+
+STATIC void
+xfs_check_block(
+       struct xfs_btree_block  *block,
+       xfs_mount_t             *mp,
+       int                     root,
+       short                   sz)
+{
+       int                     i, j, dmxr;
+       __be64                  *pp, *thispa;   /* pointer to block address */
+       xfs_bmbt_key_t          *prevp, *keyp;
+
+       ASSERT(be16_to_cpu(block->bb_level) > 0);
+
+       prevp = NULL;
+       for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
+               dmxr = mp->m_bmap_dmxr[0];
+               keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
+
+               if (prevp) {
+                       ASSERT(be64_to_cpu(prevp->br_startoff) <
+                              be64_to_cpu(keyp->br_startoff));
+               }
+               prevp = keyp;
+
+               /*
+                * Compare the block numbers to see if there are dups.
+                */
+               if (root)
+                       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
+               else
+                       pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
+
+               for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
+                       if (root)
+                               thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
+                       else
+                               thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
+                       if (*thispa == *pp) {
+                               xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
+                                       __func__, j, i,
+                                       (unsigned long long)be64_to_cpu(*thispa));
+                               panic("%s: ptrs are equal in node\n",
+                                       __func__);
+                       }
+               }
+       }
+}
+
  /*
- * Check the last inode extent to determine whether this allocation will result
- * in blocks being allocated at the end of the file. When we allocate new data
- * blocks at the end of the file which do not start at the previous data block,
- * we will try to align the new blocks at stripe unit boundaries.
+ * Check that the extents for the inode ip are in the right order in all
+ * btree leaves.
   */
-STATIC int                             /* error */
-xfs_bmap_isaeof(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_fileoff_t   off,            /* file offset in fsblocks */
-       int             whichfork,      /* data or attribute fork */
-       char            *aeof)          /* return value */
+
+STATIC void
+xfs_bmap_check_leaf_extents(
+       xfs_btree_cur_t         *cur,   /* btree cursor or null */
+       xfs_inode_t             *ip,            /* incore inode pointer */
+       int                     whichfork)      /* data or attr fork */
  {
-       int             error;          /* error return value */
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       xfs_bmbt_rec_host_t *lastrec;   /* extent record pointer */
-       xfs_extnum_t    nextents;       /* number of file extents */
-       xfs_bmbt_irec_t s;              /* expanded extent record */
+       struct xfs_btree_block  *block; /* current btree block */
+       xfs_fsblock_t           bno;    /* block # of "block" */
+       xfs_buf_t               *bp;    /* buffer for "block" */
+       int                     error;  /* error return value */
+       xfs_extnum_t            i=0, j; /* index into the extents list */
+       xfs_ifork_t             *ifp;   /* fork structure */
+       int                     level;  /* btree level, for checking */
+       xfs_mount_t             *mp;    /* file system mount structure */
+       __be64                  *pp;    /* pointer to block address */
+       xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
+       xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
+       xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
+       int                     bp_release = 0;
  
-       ASSERT(whichfork == XFS_DATA_FORK);
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(NULL, ip, whichfork)))
-               return error;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       if (nextents == 0) {
-               *aeof = 1;
-               return 0;
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
+               return;
         }
+
+       bno = NULLFSBLOCK;
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       block = ifp->if_broot;
         /*
-        * Go to the last extent
+        * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
          */
-       lastrec = xfs_iext_get_ext(ifp, nextents - 1);
-       xfs_bmbt_get_all(lastrec, &s);
+       level = be16_to_cpu(block->bb_level);
+       ASSERT(level > 0);
+       xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
+       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+       bno = be64_to_cpu(*pp);
+
+       ASSERT(bno != NULLDFSBNO);
+       ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+       ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
         /*
-        * Check we are allocating in the last extent (for delayed allocations)
-        * or past the last extent for non-delayed allocations.
+        * Go down the tree until leaf level is reached, following the first
+        * pointer (leftmost) at each level.
          */
-       *aeof = (off >= s.br_startoff &&
-                off < s.br_startoff + s.br_blockcount &&
-                isnullstartblock(s.br_startblock)) ||
-               off >= s.br_startoff + s.br_blockcount;
-       return 0;
-}
+       while (level-- > 0) {
+               /* See if buf is in cur first */
+               bp_release = 0;
+               bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
+               if (!bp) {
+                       bp_release = 1;
+                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                                               XFS_BMAP_BTREE_REF,
+                                               &xfs_bmbt_buf_ops);
+                       if (error)
+                               goto error_norelse;
+               }
+               block = XFS_BUF_TO_BLOCK(bp);
+               XFS_WANT_CORRUPTED_GOTO(
+                       xfs_bmap_sanity_check(mp, bp, level),
+                       error0);
+               if (level == 0)
+                       break;
  
-/*
- * Check if the endoff is outside the last extent. If so the caller will grow
- * the allocation to a stripe unit boundary.
- */
-int                                    /* error */
-xfs_bmap_eof(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_fileoff_t   endoff,         /* file offset in fsblocks */
-       int             whichfork,      /* data or attribute fork */
-       int             *eof)           /* result value */
-{
-       xfs_fsblock_t   blockcount;     /* extent block count */
-       int             error;          /* error return value */
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       xfs_bmbt_rec_host_t *lastrec;   /* extent record pointer */
-       xfs_extnum_t    nextents;       /* number of file extents */
-       xfs_fileoff_t   startoff;       /* extent starting file offset */
+               /*
+                * Check this block for basic sanity (increasing keys and
+                * no duplicate blocks).
+                */
  
-       ASSERT(whichfork == XFS_DATA_FORK);
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(NULL, ip, whichfork)))
-               return error;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       if (nextents == 0) {
-               *eof = 1;
-               return 0;
+               xfs_check_block(block, mp, 0, 0);
+               pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
+               bno = be64_to_cpu(*pp);
+               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
+               if (bp_release) {
+                       bp_release = 0;
+                       xfs_trans_brelse(NULL, bp);
+               }
         }
+
+       /*
+        * Here with bp and block set to the leftmost leaf node in the tree.
+        */
+       i = 0;
+
         /*
-        * Go to the last extent
+        * Loop over all leaf nodes checking that all extents are in the right order.
          */
-       lastrec = xfs_iext_get_ext(ifp, nextents - 1);
-       startoff = xfs_bmbt_get_startoff(lastrec);
-       blockcount = xfs_bmbt_get_blockcount(lastrec);
-       *eof = endoff >= startoff + blockcount;
-       return 0;
+       for (;;) {
+               xfs_fsblock_t   nextbno;
+               xfs_extnum_t    num_recs;
+
+
+               num_recs = xfs_btree_get_numrecs(block);
+
+               /*
+                * Read-ahead the next leaf block, if any.
+                */
+
+               nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
+
+               /*
+                * Check all the extents to make sure they are OK.
+                * If we had a previous block, the last entry should
+                * conform with the first entry in this one.
+                */
+
+               ep = XFS_BMBT_REC_ADDR(mp, block, 1);
+               if (i) {
+                       ASSERT(xfs_bmbt_disk_get_startoff(&last) +
+                              xfs_bmbt_disk_get_blockcount(&last) <=
+                              xfs_bmbt_disk_get_startoff(ep));
+               }
+               for (j = 1; j < num_recs; j++) {
+                       nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
+                       ASSERT(xfs_bmbt_disk_get_startoff(ep) +
+                              xfs_bmbt_disk_get_blockcount(ep) <=
+                              xfs_bmbt_disk_get_startoff(nextp));
+                       ep = nextp;
+               }
+
+               last = *ep;
+               i += num_recs;
+               if (bp_release) {
+                       bp_release = 0;
+                       xfs_trans_brelse(NULL, bp);
+               }
+               bno = nextbno;
+               /*
+                * If we've reached the end, stop.
+                */
+               if (bno == NULLFSBLOCK)
+                       break;
+
+               bp_release = 0;
+               bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
+               if (!bp) {
+                       bp_release = 1;
+                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                                               XFS_BMAP_BTREE_REF,
+                                               &xfs_bmbt_buf_ops);
+                       if (error)
+                               goto error_norelse;
+               }
+               block = XFS_BUF_TO_BLOCK(bp);
+       }
+       if (bp_release) {
+               bp_release = 0;
+               xfs_trans_brelse(NULL, bp);
+       }
+       return;
+
+error0:
+       xfs_warn(mp, "%s: at error0", __func__);
+       if (bp_release)
+               xfs_trans_brelse(NULL, bp);
+error_norelse:
+       xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
+               __func__, i);
+       panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
+       return;
  }
+#endif
  
  /*
   * Count fsblocks of the given fork.
@@ -5229,7 +5489,9 @@ xfs_bmap_count_tree(
         struct xfs_btree_block  *block, *nextblock;
         int                     numrecs;
  
-       if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
+       error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
+                                               &xfs_bmbt_buf_ops);
+       if (error)
                 return error;
         *count += 1;
         block = XFS_BUF_TO_BLOCK(bp);
@@ -5238,8 +5500,10 @@ xfs_bmap_count_tree(
                 /* Not at node above leaves, count this level of nodes */
                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
                 while (nextbno != NULLFSBLOCK) {
-                       if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
-                               0, &nbp, XFS_BMAP_BTREE_REF)))
+                       error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
+                                               XFS_BMAP_BTREE_REF,
+                                               &xfs_bmbt_buf_ops);
+                       if (error)
                                 return error;
                         *count += 1;
                         nextblock = XFS_BUF_TO_BLOCK(nbp);
@@ -5268,8 +5532,10 @@ xfs_bmap_count_tree(
                         if (nextbno == NULLFSBLOCK)
                                 break;
                         bno = nextbno;
-                       if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF)))
+                       error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                                               XFS_BMAP_BTREE_REF,
+                                               &xfs_bmbt_buf_ops);
+                       if (error)
                                 return error;
                         *count += 1;
                         block = XFS_BUF_TO_BLOCK(bp);
@@ -5315,3 +5581,16 @@ xfs_bmap_disk_count_leaves(
                 *count += xfs_bmbt_disk_get_blockcount(frp);
         }
  }
+
+/*
+ * Convert the given file system block to a disk block.  We have to treat it
+ * differently based on whether the file is a real time file or not, because the
+ * bmap code does.
+ */
+xfs_daddr_t
+xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
+{
+       return (XFS_IS_REALTIME_INODE(ip) ? \
+                (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
+                XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
+}
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c

index ff51fdda5e949f344df75af648965bb6b9be9cf8..836f52f6490b00ec7382d65c0bdfc14b5a989222 100644 (file)
--- a/libxfs/xfs_bmap_btree.c
+++ b/libxfs/xfs_bmap_btree.c
@@ -403,10 +403,10 @@ xfs_bmbt_to_bmdr(
         xfs_bmbt_key_t          *tkp;
         __be64                  *tpp;
  
-       ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
-       ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
-       ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
-       ASSERT(be16_to_cpu(rblock->bb_level) > 0);
+       ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
+       ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
+       ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
+       ASSERT(rblock->bb_level != 0);
         dblock->bb_level = rblock->bb_level;
         dblock->bb_numrecs = rblock->bb_numrecs;
         dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
@@ -687,6 +687,67 @@ xfs_bmbt_key_diff(
                                       cur->bc_rec.b.br_startoff;
  }
  
+static void
+xfs_bmbt_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       unsigned int            level;
+       int                     lblock_ok; /* block passes checks */
+
+       /* magic number and level verification.
+        *
+        * We don't know waht fork we belong to, so just verify that the level
+        * is less than the maximum of the two. Later checks will be more
+        * precise.
+        */
+       level = be16_to_cpu(block->bb_level);
+       lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) &&
+                   level < MAX(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]);
+
+       /* numrecs verification */
+       lblock_ok = lblock_ok &&
+               be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0];
+
+       /* sibling pointer verification */
+       lblock_ok = lblock_ok &&
+               block->bb_u.l.bb_leftsib &&
+               (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
+                XFS_FSB_SANITY_CHECK(mp,
+                       be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
+               block->bb_u.l.bb_rightsib &&
+               (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
+                XFS_FSB_SANITY_CHECK(mp,
+                       be64_to_cpu(block->bb_u.l.bb_rightsib)));
+
+       if (!lblock_ok) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+static void
+xfs_bmbt_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_bmbt_verify(bp);
+}
+
+static void
+xfs_bmbt_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_bmbt_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_bmbt_buf_ops = {
+       .verify_read = xfs_bmbt_read_verify,
+       .verify_write = xfs_bmbt_write_verify,
+};
+
+
  #ifdef DEBUG
  STATIC int
  xfs_bmbt_keys_inorder(
@@ -815,7 +876,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
         .init_rec_from_cur      = xfs_bmbt_init_rec_from_cur,
         .init_ptr_from_cur      = xfs_bmbt_init_ptr_from_cur,
         .key_diff               = xfs_bmbt_key_diff,
-
+       .buf_ops                = &xfs_bmbt_buf_ops,
  #ifdef DEBUG
         .keys_inorder           = xfs_bmbt_keys_inorder,
         .recs_inorder           = xfs_bmbt_recs_inorder,
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c

index 02854dbb70b116d822e21befb1d82f60ab39505e..c35269b67e485514511c47c13ebb45c22ac8897a 100644 (file)
--- a/libxfs/xfs_btree.c
+++ b/libxfs/xfs_btree.c
@@ -48,11 +48,11 @@ xfs_btree_check_lblock(
                 be16_to_cpu(block->bb_numrecs) <=
                         cur->bc_ops->get_maxrecs(cur, level) &&
                 block->bb_u.l.bb_leftsib &&
-               (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
+               (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
                  XFS_FSB_SANITY_CHECK(mp,
                         be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
                 block->bb_u.l.bb_rightsib &&
-               (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
+               (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
                  XFS_FSB_SANITY_CHECK(mp,
                         be64_to_cpu(block->bb_u.l.bb_rightsib)));
         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
@@ -87,10 +87,10 @@ xfs_btree_check_sblock(
                 be16_to_cpu(block->bb_level) == level &&
                 be16_to_cpu(block->bb_numrecs) <=
                         cur->bc_ops->get_maxrecs(cur, level) &&
-               (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
+               (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
                  be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
                 block->bb_u.s.bb_leftsib &&
-               (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
+               (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
                  be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
                 block->bb_u.s.bb_rightsib;
         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
@@ -250,16 +250,19 @@ xfs_btree_dup_cursor(
         for (i = 0; i < new->bc_nlevels; i++) {
                 new->bc_ptrs[i] = cur->bc_ptrs[i];
                 new->bc_ra[i] = cur->bc_ra[i];
-               if ((bp = cur->bc_bufs[i])) {
-                       if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                               XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
+               bp = cur->bc_bufs[i];
+               if (bp) {
+                       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                                                  XFS_BUF_ADDR(bp), mp->m_bsize,
+                                                  0, &bp,
+                                                  cur->bc_ops->buf_ops);
+                       if (error) {
                                 xfs_btree_del_cursor(new, error);
                                 *ncur = NULL;
                                 return error;
                         }
                         new->bc_bufs[i] = bp;
-                       ASSERT(bp);
-                       ASSERT(!XFS_BUF_GETERROR(bp));
+                       ASSERT(!xfs_buf_geterror(bp));
                 } else
                         new->bc_bufs[i] = NULL;
         }
@@ -450,8 +453,7 @@ xfs_btree_get_bufl(
         ASSERT(fsbno != NULLFSBLOCK);
         d = XFS_FSB_TO_DADDR(mp, fsbno);
         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-       ASSERT(bp);
-       ASSERT(!XFS_BUF_GETERROR(bp));
+       ASSERT(!xfs_buf_geterror(bp));
         return bp;
  }
  
@@ -474,8 +476,7 @@ xfs_btree_get_bufs(
         ASSERT(agbno != NULLAGBLOCK);
         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-       ASSERT(bp);
-       ASSERT(!XFS_BUF_GETERROR(bp));
+       ASSERT(!xfs_buf_geterror(bp));
         return bp;
  }
  
@@ -493,9 +494,9 @@ xfs_btree_islastblock(
         block = xfs_btree_get_block(cur, level, &bp);
         xfs_btree_check_block(cur, block, level, bp);
         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
+               return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
         else
-               return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
+               return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
  }
  
  /*
@@ -596,69 +597,29 @@ xfs_btree_offsets(
   * Get a buffer for the block, return it read in.
   * Long-form addressing.
   */
-int                                    /* error */
+int
  xfs_btree_read_bufl(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_fsblock_t   fsbno,          /* file system block number */
-       uint            lock,           /* lock flags for read_buf */
-       xfs_buf_t       **bpp,          /* buffer for fsbno */
-       int             refval)         /* ref count value for buffer */
-{
-       xfs_buf_t       *bp;            /* return value */
+       struct xfs_mount        *mp,            /* file system mount point */
+       struct xfs_trans        *tp,            /* transaction pointer */
+       xfs_fsblock_t           fsbno,          /* file system block number */
+       uint                    lock,           /* lock flags for read_buf */
+       struct xfs_buf          **bpp,          /* buffer for fsbno */
+       int                     refval,         /* ref count value for buffer */
+       const struct xfs_buf_ops *ops)
+{
+       struct xfs_buf          *bp;            /* return value */
         xfs_daddr_t             d;              /* real disk block address */
-       int             error;
+       int                     error;
  
         ASSERT(fsbno != NULLFSBLOCK);
         d = XFS_FSB_TO_DADDR(mp, fsbno);
-       if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
-                       mp->m_bsize, lock, &bp))) {
-               return error;
-       }
-       ASSERT(!bp || !XFS_BUF_GETERROR(bp));
-       if (bp != NULL) {
-               XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
-       }
-       *bpp = bp;
-       return 0;
-}
-
-/*
- * Get a buffer for the block, return it read in.
- * Short-form addressing.
- */
-int                                    /* error */
-xfs_btree_read_bufs(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_agnumber_t  agno,           /* allocation group number */
-       xfs_agblock_t   agbno,          /* allocation group block number */
-       uint            lock,           /* lock flags for read_buf */
-       xfs_buf_t       **bpp,          /* buffer for agno/agbno */
-       int             refval)         /* ref count value for buffer */
-{
-       xfs_buf_t       *bp;            /* return value */
-       xfs_daddr_t     d;              /* real disk block address */
-       int             error;
-
-       ASSERT(agno != NULLAGNUMBER);
-       ASSERT(agbno != NULLAGBLOCK);
-       d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-       if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
-                                       mp->m_bsize, lock, &bp))) {
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+                                  mp->m_bsize, lock, &bp, ops);
+       if (error)
                 return error;
-       }
-       ASSERT(!bp || !XFS_BUF_GETERROR(bp));
-       if (bp != NULL) {
-               switch (refval) {
-               case XFS_ALLOC_BTREE_REF:
-                       XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
-                       break;
-               case XFS_INO_BTREE_REF:
-                       XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
-                       break;
-               }
-       }
+       ASSERT(!xfs_buf_geterror(bp));
+       if (bp)
+               xfs_buf_set_ref(bp, refval);
         *bpp = bp;
         return 0;
  }
@@ -674,12 +635,14 @@ xfs_btree_readahead_lblock(
         xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
  
         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
-               xfs_btree_reada_bufl(cur->bc_mp, left, 1);
+               xfs_btree_reada_bufl(cur->bc_mp, left, 1,
+                                    cur->bc_ops->buf_ops);
                 rval++;
         }
  
         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
-               xfs_btree_reada_bufl(cur->bc_mp, right, 1);
+               xfs_btree_reada_bufl(cur->bc_mp, right, 1,
+                                    cur->bc_ops->buf_ops);
                 rval++;
         }
  
@@ -699,13 +662,13 @@ xfs_btree_readahead_sblock(
  
         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
-                                    left, 1);
+                                    left, 1, cur->bc_ops->buf_ops);
                 rval++;
         }
  
         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
-                                    right, 1);
+                                    right, 1, cur->bc_ops->buf_ops);
                 rval++;
         }
  
@@ -762,14 +725,14 @@ xfs_btree_setbuf(
  
         b = XFS_BUF_TO_BLOCK(bp);
         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
+               if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-               if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
+               if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
         } else {
-               if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
+               if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-               if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
+               if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
         }
  }
@@ -780,9 +743,9 @@ xfs_btree_ptr_is_null(
         union xfs_btree_ptr     *ptr)
  {
         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               return be64_to_cpu(ptr->l) == NULLDFSBNO;
+               return ptr->l == cpu_to_be64(NULLDFSBNO);
         else
-               return be32_to_cpu(ptr->s) == NULLAGBLOCK;
+               return ptr->s == cpu_to_be32(NULLAGBLOCK);
  }
  
  STATIC void
@@ -843,18 +806,22 @@ xfs_btree_set_sibling(
         }
  }
  
-STATIC void
+void
  xfs_btree_init_block(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       int                     numrecs,
-       struct xfs_btree_block  *new)   /* new block */
+       struct xfs_mount *mp,
+       struct xfs_buf  *bp,
+       __u32           magic,
+       __u16           level,
+       __u16           numrecs,
+       unsigned int    flags)
  {
-       new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
+       struct xfs_btree_block  *new = XFS_BUF_TO_BLOCK(bp);
+
+       new->bb_magic = cpu_to_be32(magic);
         new->bb_level = cpu_to_be16(level);
         new->bb_numrecs = cpu_to_be16(numrecs);
  
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+       if (flags & XFS_BTREE_LONG_PTRS) {
                 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
                 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
         } else {
@@ -863,6 +830,17 @@ xfs_btree_init_block(
         }
  }
  
+STATIC void
+xfs_btree_init_block_cur(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       int                     numrecs,
+       struct xfs_buf          *bp)
+{
+       xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum],
+                              level, numrecs, cur->bc_flags);
+}
+
  /*
   * Return true if ptr is the last record in the btree and
   * we need to track updateѕ to this record.  The decision
@@ -908,12 +886,12 @@ xfs_btree_ptr_to_daddr(
         union xfs_btree_ptr     *ptr)
  {
         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO);
+               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
  
                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
         } else {
                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
-               ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
+               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
  
                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
                                         be32_to_cpu(ptr->s));
@@ -928,13 +906,13 @@ xfs_btree_set_refs(
         switch (cur->bc_btnum) {
         case XFS_BTNUM_BNO:
         case XFS_BTNUM_CNT:
-               XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
+               xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
                 break;
         case XFS_BTNUM_INO:
-               XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
+               xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
                 break;
         case XFS_BTNUM_BMAP:
-               XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
+               xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
                 break;
         default:
                 ASSERT(0);
@@ -959,9 +937,10 @@ xfs_btree_get_buf_block(
         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
                                  mp->m_bsize, flags);
  
-       ASSERT(*bpp);
-       ASSERT(!XFS_BUF_GETERROR(*bpp));
+       if (!*bpp)
+               return ENOMEM;
  
+       (*bpp)->b_ops = cur->bc_ops->buf_ops;
         *block = XFS_BUF_TO_BLOCK(*bpp);
         return 0;
  }
@@ -988,20 +967,15 @@ xfs_btree_read_buf_block(
  
         d = xfs_btree_ptr_to_daddr(cur, ptr);
         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
-                                  mp->m_bsize, flags, bpp);
+                                  mp->m_bsize, flags, bpp,
+                                  cur->bc_ops->buf_ops);
         if (error)
                 return error;
  
-       ASSERT(*bpp != NULL);
-       ASSERT(!XFS_BUF_GETERROR(*bpp));
-
+       ASSERT(!xfs_buf_geterror(*bpp));
         xfs_btree_set_refs(cur, *bpp);
         *block = XFS_BUF_TO_BLOCK(*bpp);
-
-       error = xfs_btree_check_block(cur, *block, level, *bpp);
-       if (error)
-               xfs_trans_brelse(cur->bc_tp, *bpp);
-       return error;
+       return 0;
  }
  
  /*
@@ -2174,7 +2148,7 @@ xfs_btree_split(
                 goto error0;
  
         /* Fill in the btree header for the new right block. */
-       xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
+       xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp);
  
         /*
          * Split the entries between the old and the new block evenly.
@@ -2483,7 +2457,7 @@ xfs_btree_new_root(
                 nptr = 2;
         }
         /* Fill in the new block's btree header and log it. */
-       xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
+       xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp);
         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
                         !xfs_btree_ptr_is_null(cur, &rptr));
diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c

index 3eb34d65ce05059a751b656b2ba1cacefaa9a334..a31d35380140a6778d7b82dbf2637a8d3e203a0e 100644 (file)
--- a/libxfs/xfs_da_btree.c
+++ b/libxfs/xfs_da_btree.c
@@ -62,14 +62,92 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
  /*
   * Utility routines.
   */
-STATIC uint    xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
-STATIC int     xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
-STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
+STATIC uint    xfs_da_node_lasthash(struct xfs_buf *bp, int *count);
+STATIC int     xfs_da_node_order(struct xfs_buf *node1_bp,
+                                 struct xfs_buf *node2_bp);
  STATIC int     xfs_da_blk_unlink(xfs_da_state_t *state,
                                   xfs_da_state_blk_t *drop_blk,
                                   xfs_da_state_blk_t *save_blk);
  STATIC void    xfs_da_state_kill_altpath(xfs_da_state_t *state);
  
+static void
+xfs_da_node_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_da_node_hdr *hdr = bp->b_addr;
+       int                     block_ok = 0;
+
+       block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC);
+       block_ok = block_ok &&
+                       be16_to_cpu(hdr->level) > 0 &&
+                       be16_to_cpu(hdr->count) > 0 ;
+       if (!block_ok) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+
+}
+
+static void
+xfs_da_node_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_da_node_verify(bp);
+}
+
+/*
+ * leaf/node format detection on trees is sketchy, so a node read can be done on
+ * leaf level blocks when detection identifies the tree as a node format tree
+ * incorrectly. In this case, we need to swap the verifier to match the correct
+ * format of the block being read.
+ */
+static void
+xfs_da_node_read_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_da_blkinfo   *info = bp->b_addr;
+
+       switch (be16_to_cpu(info->magic)) {
+               case XFS_DA_NODE_MAGIC:
+                       xfs_da_node_verify(bp);
+                       break;
+               case XFS_ATTR_LEAF_MAGIC:
+                       bp->b_ops = &xfs_attr_leaf_buf_ops;
+                       bp->b_ops->verify_read(bp);
+                       return;
+               case XFS_DIR2_LEAFN_MAGIC:
+                       bp->b_ops = &xfs_dir2_leafn_buf_ops;
+                       bp->b_ops->verify_read(bp);
+                       return;
+               default:
+                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+                                            mp, info);
+                       xfs_buf_ioerror(bp, EFSCORRUPTED);
+                       break;
+       }
+}
+
+const struct xfs_buf_ops xfs_da_node_buf_ops = {
+       .verify_read = xfs_da_node_read_verify,
+       .verify_write = xfs_da_node_write_verify,
+};
+
+
+int
+xfs_da_node_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp,
+       int                     which_fork)
+{
+       return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
+                                       which_fork, &xfs_da_node_buf_ops);
+}
+
  /*========================================================================
   * Routines used for growing the Btree.
   *========================================================================*/
@@ -79,19 +157,21 @@ STATIC void        xfs_da_state_kill_altpath(xfs_da_state_t *state);
   */
  int
  xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
-                                xfs_dabuf_t **bpp, int whichfork)
+                                struct xfs_buf **bpp, int whichfork)
  {
         xfs_da_intnode_t *node;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error;
         xfs_trans_t *tp;
  
+       trace_xfs_da_node_create(args);
+
         tp = args->trans;
         error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
         if (error)
                 return(error);
         ASSERT(bp != NULL);
-       node = bp->data;
+       node = bp->b_addr;
         node->hdr.info.forw = 0;
         node->hdr.info.back = 0;
         node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
@@ -99,9 +179,10 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
         node->hdr.count = 0;
         node->hdr.level = cpu_to_be16(level);
  
-       xfs_da_log_buf(tp, bp,
+       xfs_trans_log_buf(tp, bp,
                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
  
+       bp->b_ops = &xfs_da_node_buf_ops;
         *bpp = bp;
         return(0);
  }
@@ -115,9 +196,11 @@ xfs_da_split(xfs_da_state_t *state)
  {
         xfs_da_state_blk_t *oldblk, *newblk, *addblk;
         xfs_da_intnode_t *node;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int max, action, error, i;
  
+       trace_xfs_da_split(state->args);
+
         /*
          * Walk back up the tree splitting/inserting/adjusting as necessary.
          * If we need to insert and there isn't room, split the node, then
@@ -156,10 +239,12 @@ xfs_da_split(xfs_da_state_t *state)
                         state->extravalid = 1;
                         if (state->inleaf) {
                                 state->extraafter = 0;  /* before newblk */
+                               trace_xfs_attr_leaf_split_before(state->args);
                                 error = xfs_attr_leaf_split(state, oldblk,
                                                             &state->extrablk);
                         } else {
                                 state->extraafter = 1;  /* after newblk */
+                               trace_xfs_attr_leaf_split_after(state->args);
                                 error = xfs_attr_leaf_split(state, newblk,
                                                             &state->extrablk);
                         }
@@ -176,7 +261,6 @@ xfs_da_split(xfs_da_state_t *state)
                 case XFS_DA_NODE_MAGIC:
                         error = xfs_da_node_split(state, oldblk, newblk, addblk,
                                                          max - i, &action);
-                       xfs_da_buf_done(addblk->bp);
                         addblk->bp = NULL;
                         if (error)
                                 return(error);  /* GROT: dir is inconsistent */
@@ -194,13 +278,6 @@ xfs_da_split(xfs_da_state_t *state)
                  * Update the btree to show the new hashval for this child.
                  */
                 xfs_da_fixhashpath(state, &state->path);
-               /*
-                * If we won't need this block again, it's getting dropped
-                * from the active path by the loop control, so we need
-                * to mark it done now.
-                */
-               if (i > 0 || !addblk)
-                       xfs_da_buf_done(oldblk->bp);
         }
         if (!addblk)
                 return(0);
@@ -212,8 +289,6 @@ xfs_da_split(xfs_da_state_t *state)
         oldblk = &state->path.blk[0];
         error = xfs_da_root_split(state, oldblk, addblk);
         if (error) {
-               xfs_da_buf_done(oldblk->bp);
-               xfs_da_buf_done(addblk->bp);
                 addblk->bp = NULL;
                 return(error);  /* GROT: dir is inconsistent */
         }
@@ -225,7 +300,7 @@ xfs_da_split(xfs_da_state_t *state)
          * and the original block 0 could be at any position in the list.
          */
  
-       node = oldblk->bp->data;
+       node = oldblk->bp->b_addr;
         if (node->hdr.info.forw) {
                 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
                         bp = addblk->bp;
@@ -233,13 +308,13 @@ xfs_da_split(xfs_da_state_t *state)
                         ASSERT(state->extravalid);
                         bp = state->extrablk.bp;
                 }
-               node = bp->data;
+               node = bp->b_addr;
                 node->hdr.info.back = cpu_to_be32(oldblk->blkno);
-               xfs_da_log_buf(state->args->trans, bp,
+               xfs_trans_log_buf(state->args->trans, bp,
                     XFS_DA_LOGRANGE(node, &node->hdr.info,
                     sizeof(node->hdr.info)));
         }
-       node = oldblk->bp->data;
+       node = oldblk->bp->b_addr;
         if (node->hdr.info.back) {
                 if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
                         bp = addblk->bp;
@@ -247,14 +322,12 @@ xfs_da_split(xfs_da_state_t *state)
                         ASSERT(state->extravalid);
                         bp = state->extrablk.bp;
                 }
-               node = bp->data;
+               node = bp->b_addr;
                 node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
-               xfs_da_log_buf(state->args->trans, bp,
+               xfs_trans_log_buf(state->args->trans, bp,
                     XFS_DA_LOGRANGE(node, &node->hdr.info,
                     sizeof(node->hdr.info)));
         }
-       xfs_da_buf_done(oldblk->bp);
-       xfs_da_buf_done(addblk->bp);
         addblk->bp = NULL;
         return(0);
  }
@@ -271,13 +344,15 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
         xfs_da_intnode_t *node, *oldroot;
         xfs_da_args_t *args;
         xfs_dablk_t blkno;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error, size;
         xfs_inode_t *dp;
         xfs_trans_t *tp;
         xfs_mount_t *mp;
         xfs_dir2_leaf_t *leaf;
  
+       trace_xfs_da_root_split(state->args);
+
         /*
          * Copy the existing (incorrect) block from the root node position
          * to a free space somewhere.
@@ -294,20 +369,21 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
         if (error)
                 return(error);
         ASSERT(bp != NULL);
-       node = bp->data;
-       oldroot = blk1->bp->data;
-       if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
+       node = bp->b_addr;
+       oldroot = blk1->bp->b_addr;
+       if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
                 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
                              (char *)oldroot);
         } else {
-               ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+               ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
                 leaf = (xfs_dir2_leaf_t *)oldroot;
                 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
                              (char *)leaf);
         }
         memcpy(node, oldroot, size);
-       xfs_da_log_buf(tp, bp, 0, size - 1);
-       xfs_da_buf_done(blk1->bp);
+       xfs_trans_log_buf(tp, bp, 0, size - 1);
+
+       bp->b_ops = blk1->bp->b_ops;
         blk1->bp = bp;
         blk1->blkno = blkno;
  
@@ -319,7 +395,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
         if (error)
                 return(error);
-       node = bp->data;
+       node = bp->b_addr;
         node->btree[0].hashval = cpu_to_be32(blk1->hashval);
         node->btree[0].before = cpu_to_be32(blk1->blkno);
         node->btree[1].hashval = cpu_to_be32(blk2->hashval);
@@ -327,7 +403,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
         node->hdr.count = cpu_to_be16(2);
  
  #ifdef DEBUG
-       if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
+       if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
                 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
                        blk1->blkno < mp->m_dirfreeblk);
                 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -336,10 +412,9 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
  #endif
  
         /* Header is already logged by xfs_da_node_create */
-       xfs_da_log_buf(tp, bp,
+       xfs_trans_log_buf(tp, bp,
                 XFS_DA_LOGRANGE(node, node->btree,
                         sizeof(xfs_da_node_entry_t) * 2));
-       xfs_da_buf_done(bp);
  
         return(0);
  }
@@ -358,8 +433,10 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
         int newcount, error;
         int useextra;
  
-       node = oldblk->bp->data;
-       ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+       trace_xfs_da_node_split(state->args);
+
+       node = oldblk->bp->b_addr;
+       ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
  
         /*
          * With V2 dirs the extra block is data or freespace.
@@ -405,7 +482,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
          *
          * If we had double-split op below us, then add the extra block too.
          */
-       node = oldblk->bp->data;
+       node = oldblk->bp->b_addr;
         if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
                 oldblk->index++;
                 xfs_da_node_add(state, oldblk, addblk);
@@ -444,8 +521,10 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
         int count, tmp;
         xfs_trans_t *tp;
  
-       node1 = blk1->bp->data;
-       node2 = blk2->bp->data;
+       trace_xfs_da_node_rebalance(state->args);
+
+       node1 = blk1->bp->b_addr;
+       node2 = blk2->bp->b_addr;
         /*
          * Figure out how many entries need to move, and in which direction.
          * Swap the nodes around if that makes it simpler.
@@ -458,8 +537,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 node1 = node2;
                 node2 = tmpnode;
         }
-       ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-       ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+       ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+       ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
         count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
         if (count == 0)
                 return;
@@ -499,7 +578,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
                 memcpy(btree_d, btree_s, tmp);
                 be16_add_cpu(&node1->hdr.count, count);
-               xfs_da_log_buf(tp, blk1->bp,
+               xfs_trans_log_buf(tp, blk1->bp,
                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
  
                 /*
@@ -516,9 +595,9 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
         /*
          * Log header of node 1 and all current bits of node 2.
          */
-       xfs_da_log_buf(tp, blk1->bp,
+       xfs_trans_log_buf(tp, blk1->bp,
                 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
-       xfs_da_log_buf(tp, blk2->bp,
+       xfs_trans_log_buf(tp, blk2->bp,
                 XFS_DA_LOGRANGE(node2, &node2->hdr,
                         sizeof(node2->hdr) +
                         sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
@@ -527,8 +606,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
          * Record the last hashval from each block for upward propagation.
          * (note: don't use the swapped node pointers)
          */
-       node1 = blk1->bp->data;
-       node2 = blk2->bp->data;
+       node1 = blk1->bp->b_addr;
+       node2 = blk2->bp->b_addr;
         blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
         blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
  
@@ -552,8 +631,10 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
         xfs_da_node_entry_t *btree;
         int tmp;
  
-       node = oldblk->bp->data;
-       ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+       trace_xfs_da_node_add(state->args);
+
+       node = oldblk->bp->b_addr;
+       ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
         ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
         ASSERT(newblk->blkno != 0);
         if (state->args->whichfork == XFS_DATA_FORK)
@@ -571,10 +652,10 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
         }
         btree->hashval = cpu_to_be32(newblk->hashval);
         btree->before = cpu_to_be32(newblk->blkno);
-       xfs_da_log_buf(state->args->trans, oldblk->bp,
+       xfs_trans_log_buf(state->args->trans, oldblk->bp,
                 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
         be16_add_cpu(&node->hdr.count, 1);
-       xfs_da_log_buf(state->args->trans, oldblk->bp,
+       xfs_trans_log_buf(state->args->trans, oldblk->bp,
                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
  
         /*
@@ -597,6 +678,8 @@ xfs_da_join(xfs_da_state_t *state)
         xfs_da_state_blk_t *drop_blk, *save_blk;
         int action, error;
  
+       trace_xfs_da_join(state->args);
+
         action = 0;
         drop_blk = &state->path.blk[ state->path.active-1 ];
         save_blk = &state->altpath.blk[ state->path.active-1 ];
@@ -670,6 +753,24 @@ xfs_da_join(xfs_da_state_t *state)
         return(error);
  }
  
+#ifdef DEBUG
+static void
+xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
+{
+       __be16  magic = blkinfo->magic;
+
+       if (level == 1) {
+               ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+                      magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+       } else
+               ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+       ASSERT(!blkinfo->forw);
+       ASSERT(!blkinfo->back);
+}
+#else  /* !DEBUG */
+#define        xfs_da_blkinfo_onlychild_validate(blkinfo, level)
+#endif /* !DEBUG */
+
  /*
   * We have only one entry in the root.  Copy the only remaining child of
   * the old root to block 0 as the new root node.
@@ -678,18 +779,18 @@ STATIC int
  xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
  {
         xfs_da_intnode_t *oldroot;
-       /* REFERENCED */
-       xfs_da_blkinfo_t *blkinfo;
         xfs_da_args_t *args;
         xfs_dablk_t child;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error;
  
+       trace_xfs_da_root_join(state->args);
+
         args = state->args;
         ASSERT(args != NULL);
         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
-       oldroot = root_blk->bp->data;
-       ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+       oldroot = root_blk->bp->b_addr;
+       ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
         ASSERT(!oldroot->hdr.info.forw);
         ASSERT(!oldroot->hdr.info.back);
  
@@ -705,22 +806,23 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
          */
         child = be32_to_cpu(oldroot->btree[0].before);
         ASSERT(child != 0);
-       error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
+       error = xfs_da_node_read(args->trans, args->dp, child, -1, &bp,
                                              args->whichfork);
         if (error)
                 return(error);
         ASSERT(bp != NULL);
-       blkinfo = bp->data;
-       if (be16_to_cpu(oldroot->hdr.level) == 1) {
-               ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
-                      be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
-       } else {
-               ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
-       }
-       ASSERT(!blkinfo->forw);
-       ASSERT(!blkinfo->back);
-       memcpy(root_blk->bp->data, bp->data, state->blocksize);
-       xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
+       xfs_da_blkinfo_onlychild_validate(bp->b_addr,
+                                       be16_to_cpu(oldroot->hdr.level));
+
+       /*
+        * This could be copying a leaf back into the root block in the case of
+        * there only being a single leaf block left in the tree. Hence we have
+        * to update the b_ops pointer as well to match the buffer type change
+        * that could occur.
+        */
+       memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
+       root_blk->bp->b_ops = bp->b_ops;
+       xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
         error = xfs_da_shrink_inode(args, child, bp);
         return(error);
  }
@@ -742,7 +844,9 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
         xfs_da_blkinfo_t *info;
         int count, forward, error, retval, i;
         xfs_dablk_t blkno;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
+
+       trace_xfs_da_node_toosmall(state->args);
  
         /*
          * Check for the degenerate case of the block being over 50% full.
@@ -750,8 +854,8 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
          * to coalesce with a sibling.
          */
         blk = &state->path.blk[ state->path.active-1 ];
-       info = blk->bp->data;
-       ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
+       info = blk->bp->b_addr;
+       ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
         node = (xfs_da_intnode_t *)info;
         count = be16_to_cpu(node->hdr.count);
         if (count > (state->node_ents >> 1)) {
@@ -800,7 +904,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
                         blkno = be32_to_cpu(info->back);
                 if (blkno == 0)
                         continue;
-               error = xfs_da_read_buf(state->args->trans, state->args->dp,
+               error = xfs_da_node_read(state->args->trans, state->args->dp,
                                         blkno, -1, &bp, state->args->whichfork);
                 if (error)
                         return(error);
@@ -810,10 +914,10 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
                 count  = state->node_ents;
                 count -= state->node_ents >> 2;
                 count -= be16_to_cpu(node->hdr.count);
-               node = bp->data;
-               ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+               node = bp->b_addr;
+               ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
                 count -= be16_to_cpu(node->hdr.count);
-               xfs_da_brelse(state->args->trans, bp);
+               xfs_trans_brelse(state->args->trans, bp);
                 if (count >= 0)
                         break;  /* fits with at least 25% to spare */
         }
@@ -865,6 +969,8 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
         xfs_dahash_t lasthash=0;
         int level, count;
  
+       trace_xfs_da_fixhashpath(state->args);
+
         level = path->active-1;
         blk = &path->blk[ level ];
         switch (blk->magic) {
@@ -885,14 +991,14 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
                 break;
         }
         for (blk--, level--; level >= 0; blk--, level--) {
-               node = blk->bp->data;
-               ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+               node = blk->bp->b_addr;
+               ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
                 btree = &node->btree[ blk->index ];
                 if (be32_to_cpu(btree->hashval) == lasthash)
                         break;
                 blk->hashval = lasthash;
                 btree->hashval = cpu_to_be32(lasthash);
-               xfs_da_log_buf(state->args->trans, blk->bp,
+               xfs_trans_log_buf(state->args->trans, blk->bp,
                                   XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
  
                 lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
@@ -909,7 +1015,9 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
         xfs_da_node_entry_t *btree;
         int tmp;
  
-       node = drop_blk->bp->data;
+       trace_xfs_da_node_remove(state->args);
+
+       node = drop_blk->bp->b_addr;
         ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
         ASSERT(drop_blk->index >= 0);
  
@@ -921,15 +1029,15 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
                 tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
                 memmove(btree, btree + 1, tmp);
-               xfs_da_log_buf(state->args->trans, drop_blk->bp,
+               xfs_trans_log_buf(state->args->trans, drop_blk->bp,
                     XFS_DA_LOGRANGE(node, btree, tmp));
                 btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
         }
         memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
-       xfs_da_log_buf(state->args->trans, drop_blk->bp,
+       xfs_trans_log_buf(state->args->trans, drop_blk->bp,
             XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
         be16_add_cpu(&node->hdr.count, -1);
-       xfs_da_log_buf(state->args->trans, drop_blk->bp,
+       xfs_trans_log_buf(state->args->trans, drop_blk->bp,
             XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
  
         /*
@@ -952,10 +1060,12 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
         int tmp;
         xfs_trans_t *tp;
  
-       drop_node = drop_blk->bp->data;
-       save_node = save_blk->bp->data;
-       ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
-       ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+       trace_xfs_da_node_unbalance(state->args);
+
+       drop_node = drop_blk->bp->b_addr;
+       save_node = save_blk->bp->b_addr;
+       ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+       ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
         tp = state->args->trans;
  
         /*
@@ -970,13 +1080,13 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
                 tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
                 memmove(btree, &save_node->btree[0], tmp);
                 btree = &save_node->btree[0];
-               xfs_da_log_buf(tp, save_blk->bp,
+               xfs_trans_log_buf(tp, save_blk->bp,
                         XFS_DA_LOGRANGE(save_node, btree,
                                 (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
                                 sizeof(xfs_da_node_entry_t)));
         } else {
                 btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
-               xfs_da_log_buf(tp, save_blk->bp,
+               xfs_trans_log_buf(tp, save_blk->bp,
                         XFS_DA_LOGRANGE(save_node, btree,
                                 be16_to_cpu(drop_node->hdr.count) *
                                 sizeof(xfs_da_node_entry_t)));
@@ -989,7 +1099,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
         memcpy(btree, &drop_node->btree[0], tmp);
         be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
  
-       xfs_da_log_buf(tp, save_blk->bp,
+       xfs_trans_log_buf(tp, save_blk->bp,
                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
                         sizeof(save_node->hdr)));
  
@@ -1040,14 +1150,14 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
                  * Read the next node down in the tree.
                  */
                 blk->blkno = blkno;
-               error = xfs_da_read_buf(args->trans, args->dp, blkno,
+               error = xfs_da_node_read(args->trans, args->dp, blkno,
                                         -1, &blk->bp, args->whichfork);
                 if (error) {
                         blk->blkno = 0;
                         state->path.active--;
                         return(error);
                 }
-               curr = blk->bp->data;
+               curr = blk->bp->b_addr;
                 blk->magic = be16_to_cpu(curr->magic);
                 ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
                        blk->magic == XFS_DIR2_LEAFN_MAGIC ||
@@ -1057,7 +1167,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
                  * Search an intermediate node for a match.
                  */
                 if (blk->magic == XFS_DA_NODE_MAGIC) {
-                       node = blk->bp->data;
+                       node = blk->bp->b_addr;
                         max = be16_to_cpu(node->hdr.count);
                         blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
  
@@ -1163,15 +1273,15 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
         xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
         xfs_da_args_t *args;
         int before=0, error;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
  
         /*
          * Set up environment.
          */
         args = state->args;
         ASSERT(args != NULL);
-       old_info = old_blk->bp->data;
-       new_info = new_blk->bp->data;
+       old_info = old_blk->bp->b_addr;
+       new_info = new_blk->bp->b_addr;
         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
                old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1198,48 +1308,48 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
                 /*
                  * Link new block in before existing block.
                  */
+               trace_xfs_da_link_before(args);
                 new_info->forw = cpu_to_be32(old_blk->blkno);
                 new_info->back = old_info->back;
                 if (old_info->back) {
-                       error = xfs_da_read_buf(args->trans, args->dp,
+                       error = xfs_da_node_read(args->trans, args->dp,
                                                 be32_to_cpu(old_info->back),
                                                 -1, &bp, args->whichfork);
                         if (error)
                                 return(error);
                         ASSERT(bp != NULL);
-                       tmp_info = bp->data;
+                       tmp_info = bp->b_addr;
                         ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
                         ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
                         tmp_info->forw = cpu_to_be32(new_blk->blkno);
-                       xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
-                       xfs_da_buf_done(bp);
+                       xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
                 }
                 old_info->back = cpu_to_be32(new_blk->blkno);
         } else {
                 /*
                  * Link new block in after existing block.
                  */
+               trace_xfs_da_link_after(args);
                 new_info->forw = old_info->forw;
                 new_info->back = cpu_to_be32(old_blk->blkno);
                 if (old_info->forw) {
-                       error = xfs_da_read_buf(args->trans, args->dp,
+                       error = xfs_da_node_read(args->trans, args->dp,
                                                 be32_to_cpu(old_info->forw),
                                                 -1, &bp, args->whichfork);
                         if (error)
                                 return(error);
                         ASSERT(bp != NULL);
-                       tmp_info = bp->data;
+                       tmp_info = bp->b_addr;
                         ASSERT(tmp_info->magic == old_info->magic);
                         ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
                         tmp_info->back = cpu_to_be32(new_blk->blkno);
-                       xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
-                       xfs_da_buf_done(bp);
+                       xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
                 }
                 old_info->forw = cpu_to_be32(new_blk->blkno);
         }
  
-       xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
-       xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
+       xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
+       xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
         return(0);
  }
  
@@ -1247,14 +1357,16 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
   * Compare two intermediate nodes for "order".
   */
  STATIC int
-xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
+xfs_da_node_order(
+       struct xfs_buf  *node1_bp,
+       struct xfs_buf  *node2_bp)
  {
         xfs_da_intnode_t *node1, *node2;
  
-       node1 = node1_bp->data;
-       node2 = node2_bp->data;
-       ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
-              (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
+       node1 = node1_bp->b_addr;
+       node2 = node2_bp->b_addr;
+       ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
+              node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
         if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
             ((be32_to_cpu(node2->btree[0].hashval) <
               be32_to_cpu(node1->btree[0].hashval)) ||
@@ -1269,12 +1381,14 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
   * Pick up the last hashvalue from an intermediate node.
   */
  STATIC uint
-xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
+xfs_da_node_lasthash(
+       struct xfs_buf  *bp,
+       int             *count)
  {
         xfs_da_intnode_t *node;
  
-       node = bp->data;
-       ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+       node = bp->b_addr;
+       ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
         if (count)
                 *count = be16_to_cpu(node->hdr.count);
         if (!node->hdr.count)
@@ -1291,7 +1405,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
  {
         xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
         xfs_da_args_t *args;
-       xfs_dabuf_t *bp;
+       struct xfs_buf *bp;
         int error;
  
         /*
@@ -1299,8 +1413,8 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
          */
         args = state->args;
         ASSERT(args != NULL);
-       save_info = save_blk->bp->data;
-       drop_info = drop_blk->bp->data;
+       save_info = save_blk->bp->b_addr;
+       drop_info = drop_blk->bp->b_addr;
         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
                save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1316,42 +1430,42 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
          * Unlink the leaf block from the doubly linked chain of leaves.
          */
         if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
+               trace_xfs_da_unlink_back(args);
                 save_info->back = drop_info->back;
                 if (drop_info->back) {
-                       error = xfs_da_read_buf(args->trans, args->dp,
+                       error = xfs_da_node_read(args->trans, args->dp,
                                                 be32_to_cpu(drop_info->back),
                                                 -1, &bp, args->whichfork);
                         if (error)
                                 return(error);
                         ASSERT(bp != NULL);
-                       tmp_info = bp->data;
+                       tmp_info = bp->b_addr;
                         ASSERT(tmp_info->magic == save_info->magic);
                         ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
                         tmp_info->forw = cpu_to_be32(save_blk->blkno);
-                       xfs_da_log_buf(args->trans, bp, 0,
+                       xfs_trans_log_buf(args->trans, bp, 0,
                                                     sizeof(*tmp_info) - 1);
-                       xfs_da_buf_done(bp);
                 }
         } else {
+               trace_xfs_da_unlink_forward(args);
                 save_info->forw = drop_info->forw;
                 if (drop_info->forw) {
-                       error = xfs_da_read_buf(args->trans, args->dp,
+                       error = xfs_da_node_read(args->trans, args->dp,
                                                 be32_to_cpu(drop_info->forw),
                                                 -1, &bp, args->whichfork);
                         if (error)
                                 return(error);
                         ASSERT(bp != NULL);
-                       tmp_info = bp->data;
+                       tmp_info = bp->b_addr;
                         ASSERT(tmp_info->magic == save_info->magic);
                         ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
                         tmp_info->back = cpu_to_be32(save_blk->blkno);
-                       xfs_da_log_buf(args->trans, bp, 0,
+                       xfs_trans_log_buf(args->trans, bp, 0,
                                                     sizeof(*tmp_info) - 1);
-                       xfs_da_buf_done(bp);
                 }
         }
  
-       xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
+       xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
         return(0);
  }
  
@@ -1374,6 +1488,8 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
         xfs_dablk_t blkno=0;
         int level, error;
  
+       trace_xfs_da_path_shift(state->args);
+
         /*
          * Roll up the Btree looking for the first block where our
          * current index is not at the edge of the block.  Note that
@@ -1386,8 +1502,8 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
         level = (path->active-1) - 1;   /* skip bottom layer in path */
         for (blk = &path->blk[level]; level >= 0; blk--, level--) {
                 ASSERT(blk->bp != NULL);
-               node = blk->bp->data;
-               ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+               node = blk->bp->b_addr;
+               ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
                 if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
                         blk->index++;
                         blkno = be32_to_cpu(node->btree[blk->index].before);
@@ -1414,21 +1530,21 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
                  * (if it's dirty, trans won't actually let go)
                  */
                 if (release)
-                       xfs_da_brelse(args->trans, blk->bp);
+                       xfs_trans_brelse(args->trans, blk->bp);
  
                 /*
                  * Read the next child block.
                  */
                 blk->blkno = blkno;
-               error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
-                                                    &blk->bp, args->whichfork);
+               error = xfs_da_node_read(args->trans, args->dp, blkno, -1,
+                                       &blk->bp, args->whichfork);
                 if (error)
                         return(error);
                 ASSERT(blk->bp != NULL);
-               info = blk->bp->data;
-               ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
-                      be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
-                      be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+               info = blk->bp->b_addr;
+               ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
+                      info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+                      info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
                 blk->magic = be16_to_cpu(info->magic);
                 if (blk->magic == XFS_DA_NODE_MAGIC) {
                         node = (xfs_da_intnode_t *)info;
@@ -1521,79 +1637,60 @@ const struct xfs_nameops xfs_default_nameops = {
         .compname       = xfs_da_compname
  };
  
-/*
- * Add a block to the btree ahead of the file.
- * Return the new block number to the caller.
- */
  int
-xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
+xfs_da_grow_inode_int(
+       struct xfs_da_args      *args,
+       xfs_fileoff_t           *bno,
+       int                     count)
  {
-       xfs_fileoff_t bno, b;
-       xfs_bmbt_irec_t map;
-       xfs_bmbt_irec_t *mapp;
-       xfs_inode_t *dp;
-       int nmap, error, w, count, c, got, i, mapi;
-       xfs_trans_t *tp;
-       xfs_mount_t *mp;
-       xfs_drfsbno_t   nblks;
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       w = args->whichfork;
-       tp = args->trans;
-       nblks = dp->i_d.di_nblocks;
+       struct xfs_trans        *tp = args->trans;
+       struct xfs_inode        *dp = args->dp;
+       int                     w = args->whichfork;
+       xfs_drfsbno_t           nblks = dp->i_d.di_nblocks;
+       struct xfs_bmbt_irec    map, *mapp;
+       int                     nmap, error, got, i, mapi;
  
-       /*
-        * For new directories adjust the file offset and block count.
-        */
-       if (w == XFS_DATA_FORK) {
-               bno = mp->m_dirleafblk;
-               count = mp->m_dirblkfsbs;
-       } else {
-               bno = 0;
-               count = 1;
-       }
         /*
          * Find a spot in the file space to put the new block.
          */
-       if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
+       error = xfs_bmap_first_unused(tp, dp, count, bno, w);
+       if (error)
                 return error;
-       if (w == XFS_DATA_FORK)
-               ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
+
         /*
          * Try mapping it in one filesystem block.
          */
         nmap = 1;
         ASSERT(args->firstblock != NULL);
-       if ((error = xfs_bmapi(tp, dp, bno, count,
-                       xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
-                       XFS_BMAPI_CONTIG,
+       error = xfs_bmapi_write(tp, dp, *bno, count,
+                       xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
                         args->firstblock, args->total, &map, &nmap,
-                       args->flist))) {
+                       args->flist);
+       if (error)
                 return error;
-       }
+
         ASSERT(nmap <= 1);
         if (nmap == 1) {
                 mapp = &map;
                 mapi = 1;
-       }
-       /*
-        * If we didn't get it and the block might work if fragmented,
-        * try without the CONTIG flag.  Loop until we get it all.
-        */
-       else if (nmap == 0 && count > 1) {
+       } else if (nmap == 0 && count > 1) {
+               xfs_fileoff_t           b;
+               int                     c;
+
+               /*
+                * If we didn't get it and the block might work if fragmented,
+                * try without the CONTIG flag.  Loop until we get it all.
+                */
                 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
-               for (b = bno, mapi = 0; b < bno + count; ) {
+               for (b = *bno, mapi = 0; b < *bno + count; ) {
                         nmap = MIN(XFS_BMAP_MAX_NMAP, count);
-                       c = (int)(bno + count - b);
-                       if ((error = xfs_bmapi(tp, dp, b, c,
-                                       xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
-                                       XFS_BMAPI_METADATA,
+                       c = (int)(*bno + count - b);
+                       error = xfs_bmapi_write(tp, dp, b, c,
+                                       xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
                                         args->firstblock, args->total,
-                                       &mapp[mapi], &nmap, args->flist))) {
-                               kmem_free(mapp);
-                               return error;
-                       }
+                                       &mapp[mapi], &nmap, args->flist);
+                       if (error)
+                               goto out_free_map;
                         if (nmap < 1)
                                 break;
                         mapi += nmap;
@@ -1604,24 +1701,55 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
                 mapi = 0;
                 mapp = NULL;
         }
+
         /*
          * Count the blocks we got, make sure it matches the total.
          */
         for (i = 0, got = 0; i < mapi; i++)
                 got += mapp[i].br_blockcount;
-       if (got != count || mapp[0].br_startoff != bno ||
+       if (got != count || mapp[0].br_startoff != *bno ||
             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
-           bno + count) {
-               if (mapp != &map)
-                       kmem_free(mapp);
-               return XFS_ERROR(ENOSPC);
+           *bno + count) {
+               error = XFS_ERROR(ENOSPC);
+               goto out_free_map;
         }
-       if (mapp != &map)
-               kmem_free(mapp);
+
         /* account for newly allocated blocks in reserved blocks total */
         args->total -= dp->i_d.di_nblocks - nblks;
-       *new_blkno = (xfs_dablk_t)bno;
-       return 0;
+
+out_free_map:
+       if (mapp != &map)
+               kmem_free(mapp);
+       return error;
+}
+
+/*
+ * Add a block to the btree ahead of the file.
+ * Return the new block number to the caller.
+ */
+int
+xfs_da_grow_inode(
+       struct xfs_da_args      *args,
+       xfs_dablk_t             *new_blkno)
+{
+       xfs_fileoff_t           bno;
+       int                     count;
+       int                     error;
+
+       trace_xfs_da_grow_inode(args);
+
+       if (args->whichfork == XFS_DATA_FORK) {
+               bno = args->dp->i_mount->m_dirleafblk;
+               count = args->dp->i_mount->m_dirblkfsbs;
+       } else {
+               bno = 0;
+               count = 1;
+       }
+
+       error = xfs_da_grow_inode_int(args, &bno, count);
+       if (!error)
+               *new_blkno = (xfs_dablk_t)bno;
+       return error;
  }
  
  /*
@@ -1633,11 +1761,13 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
   * a bmap btree split to do that.
   */
  STATIC int
-xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
-                     xfs_dabuf_t **dead_bufp)
+xfs_da_swap_lastblock(
+       xfs_da_args_t   *args,
+       xfs_dablk_t     *dead_blknop,
+       struct xfs_buf  **dead_bufp)
  {
         xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
-       xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
+       struct xfs_buf *dead_buf, *last_buf, *sib_buf, *par_buf;
         xfs_fileoff_t lastoff;
         xfs_inode_t *ip;
         xfs_trans_t *tp;
@@ -1648,6 +1778,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
         xfs_dir2_leaf_t *dead_leaf2;
         xfs_dahash_t dead_hash;
  
+       trace_xfs_da_swap_lastblock(args);
+
         dead_buf = *dead_bufp;
         dead_blkno = *dead_blknop;
         tp = args->trans;
@@ -1668,23 +1800,24 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
          * Read the last block in the btree space.
          */
         last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
-       if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
+       error = xfs_da_node_read(tp, ip, last_blkno, -1, &last_buf, w);
+       if (error)
                 return error;
         /*
          * Copy the last block into the dead buffer and log it.
          */
-       memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
-       xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
-       dead_info = dead_buf->data;
+       memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize);
+       xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
+       dead_info = dead_buf->b_addr;
         /*
          * Get values from the moved block.
          */
-       if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
+       if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
                 dead_level = 0;
                 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
         } else {
-               ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
+               ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
                 dead_node = (xfs_da_intnode_t *)dead_info;
                 dead_level = be16_to_cpu(dead_node->hdr.level);
                 dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
@@ -1694,9 +1827,10 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
          * If the moved block has a left sibling, fix up the pointers.
          */
         if ((sib_blkno = be32_to_cpu(dead_info->back))) {
-               if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
+               error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
+               if (error)
                         goto done;
-               sib_info = sib_buf->data;
+               sib_info = sib_buf->b_addr;
                 if (unlikely(
                     be32_to_cpu(sib_info->forw) != last_blkno ||
                     sib_info->magic != dead_info->magic)) {
@@ -1706,19 +1840,19 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
                         goto done;
                 }
                 sib_info->forw = cpu_to_be32(dead_blkno);
-               xfs_da_log_buf(tp, sib_buf,
+               xfs_trans_log_buf(tp, sib_buf,
                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
                                         sizeof(sib_info->forw)));
-               xfs_da_buf_done(sib_buf);
                 sib_buf = NULL;
         }
         /*
          * If the moved block has a right sibling, fix up the pointers.
          */
         if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
-               if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
+               error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
+               if (error)
                         goto done;
-               sib_info = sib_buf->data;
+               sib_info = sib_buf->b_addr;
                 if (unlikely(
                        be32_to_cpu(sib_info->back) != last_blkno ||
                        sib_info->magic != dead_info->magic)) {
@@ -1728,10 +1862,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
                         goto done;
                 }
                 sib_info->back = cpu_to_be32(dead_blkno);
-               xfs_da_log_buf(tp, sib_buf,
+               xfs_trans_log_buf(tp, sib_buf,
                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
                                         sizeof(sib_info->back)));
-               xfs_da_buf_done(sib_buf);
                 sib_buf = NULL;
         }
         par_blkno = mp->m_dirleafblk;
@@ -1740,11 +1873,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
          * Walk down the tree looking for the parent of the moved block.
          */
         for (;;) {
-               if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
+               error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w);
+               if (error)
                         goto done;
-               par_node = par_buf->data;
-               if (unlikely(
-                   be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
+               par_node = par_buf->b_addr;
+               if (unlikely(par_node->hdr.info.magic !=
+                   cpu_to_be16(XFS_DA_NODE_MAGIC) ||
                     (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
                                          XFS_ERRLEVEL_LOW, mp);
@@ -1766,7 +1900,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
                 par_blkno = be32_to_cpu(par_node->btree[entno].before);
                 if (level == dead_level + 1)
                         break;
-               xfs_da_brelse(tp, par_buf);
+               xfs_trans_brelse(tp, par_buf);
                 par_buf = NULL;
         }
         /*
@@ -1782,7 +1916,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
                 if (entno < be16_to_cpu(par_node->hdr.count))
                         break;
                 par_blkno = be32_to_cpu(par_node->hdr.info.forw);
-               xfs_da_brelse(tp, par_buf);
+               xfs_trans_brelse(tp, par_buf);
                 par_buf = NULL;
                 if (unlikely(par_blkno == 0)) {
                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
@@ -1790,12 +1924,13 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
                         error = XFS_ERROR(EFSCORRUPTED);
                         goto done;
                 }
-               if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
+               error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w);
+               if (error)
                         goto done;
-               par_node = par_buf->data;
+               par_node = par_buf->b_addr;
                 if (unlikely(
                     be16_to_cpu(par_node->hdr.level) != level ||
-                   be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
+                   par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
                                          XFS_ERRLEVEL_LOW, mp);
                         error = XFS_ERROR(EFSCORRUPTED);
@@ -1807,20 +1942,18 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
          * Update the parent entry pointing to the moved block.
          */
         par_node->btree[entno].before = cpu_to_be32(dead_blkno);
-       xfs_da_log_buf(tp, par_buf,
+       xfs_trans_log_buf(tp, par_buf,
                 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
                                 sizeof(par_node->btree[entno].before)));
-       xfs_da_buf_done(par_buf);
-       xfs_da_buf_done(dead_buf);
         *dead_blknop = last_blkno;
         *dead_bufp = last_buf;
         return 0;
  done:
         if (par_buf)
-               xfs_da_brelse(tp, par_buf);
+               xfs_trans_brelse(tp, par_buf);
         if (sib_buf)
-               xfs_da_brelse(tp, sib_buf);
-       xfs_da_brelse(tp, last_buf);
+               xfs_trans_brelse(tp, sib_buf);
+       xfs_trans_brelse(tp, last_buf);
         return error;
  }
  
@@ -1828,14 +1961,18 @@ done:
   * Remove a btree block from a directory or attribute.
   */
  int
-xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
-                   xfs_dabuf_t *dead_buf)
+xfs_da_shrink_inode(
+       xfs_da_args_t   *args,
+       xfs_dablk_t     dead_blkno,
+       struct xfs_buf  *dead_buf)
  {
         xfs_inode_t *dp;
         int done, error, w, count;
         xfs_trans_t *tp;
         xfs_mount_t *mp;
  
+       trace_xfs_da_shrink_inode(args);
+
         dp = args->dp;
         w = args->whichfork;
         tp = args->trans;
@@ -1862,7 +1999,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
                         break;
                 }
         }
-       xfs_da_binval(tp, dead_buf);
+       xfs_trans_binval(tp, dead_buf);
         return error;
  }
  
@@ -1894,36 +2031,75 @@ xfs_da_map_covers_blocks(
  }
  
  /*
- * Make a dabuf.
- * Used for get_buf, read_buf, read_bufr, and reada_buf.
+ * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
+ *
+ * For the single map case, it is assumed that the caller has provided a pointer
+ * to a valid xfs_buf_map.  For the multiple map case, this function will
+ * allocate the xfs_buf_map to hold all the maps and replace the caller's single
+ * map pointer with the allocated map.
   */
-int
-xfs_da_do_buf(
-       xfs_trans_t     *trans,
-       xfs_inode_t     *dp,
-       xfs_dablk_t     bno,
-       xfs_daddr_t     *mappedbnop,
-       xfs_dabuf_t     **bpp,
-       int             whichfork,
-       int             caller,
-       inst_t          *ra)
+static int
+xfs_buf_map_from_irec(
+       struct xfs_mount        *mp,
+       struct xfs_buf_map      **mapp,
+       unsigned int            *nmaps,
+       struct xfs_bmbt_irec    *irecs,
+       unsigned int            nirecs)
  {
-       xfs_buf_t       *bp = NULL;
-       xfs_buf_t       **bplist;
-       int             error=0;
-       int             i;
-       xfs_bmbt_irec_t map;
-       xfs_bmbt_irec_t *mapp;
-       xfs_daddr_t     mappedbno;
-       xfs_mount_t     *mp;
-       int             nbplist=0;
-       int             nfsb;
-       int             nmap;
-       xfs_dabuf_t     *rbp;
+       struct xfs_buf_map      *map;
+       int                     i;
+
+       ASSERT(*nmaps == 1);
+       ASSERT(nirecs >= 1);
+
+       if (nirecs > 1) {
+               map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP);
+               if (!map)
+                       return ENOMEM;
+               *mapp = map;
+       }
+
+       *nmaps = nirecs;
+       map = *mapp;
+       for (i = 0; i < *nmaps; i++) {
+               ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
+                      irecs[i].br_startblock != HOLESTARTBLOCK);
+               map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
+               map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
+       }
+       return 0;
+}
+
+/*
+ * Map the block we are given ready for reading. There are three possible return
+ * values:
+ *     -1 - will be returned if we land in a hole and mappedbno == -2 so the
+ *          caller knows not to execute a subsequent read.
+ *      0 - if we mapped the block successfully
+ *     >0 - positive error number if there was an error.
+ */
+static int
+xfs_dabuf_map(
+       struct xfs_trans        *trans,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       int                     whichfork,
+       struct xfs_buf_map      **map,
+       int                     *nmaps)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+       int                     nfsb;
+       int                     error = 0;
+       struct xfs_bmbt_irec    irec;
+       struct xfs_bmbt_irec    *irecs = &irec;
+       int                     nirecs;
+
+       ASSERT(map && *map);
+       ASSERT(*nmaps == 1);
  
-       mp = dp->i_mount;
         nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
-       mappedbno = *mappedbnop;
+
         /*
          * Caller doesn't have a mapping.  -2 means don't complain
          * if we land in a hole.
@@ -1932,139 +2108,154 @@ xfs_da_do_buf(
                 /*
                  * Optimize the one-block case.
                  */
-               if (nfsb == 1) {
-                       xfs_fsblock_t   fsb;
+               if (nfsb != 1)
+                       irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP);
  
-                       if ((error =
-                           xfs_bmapi_single(trans, dp, whichfork, &fsb,
-                                   (xfs_fileoff_t)bno))) {
-                               return error;
-                       }
-                       mapp = &map;
-                       if (fsb == NULLFSBLOCK) {
-                               nmap = 0;
-                       } else {
-                               map.br_startblock = fsb;
-                               map.br_startoff = (xfs_fileoff_t)bno;
-                               map.br_blockcount = 1;
-                               nmap = 1;
-                       }
-               } else {
-                       mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
-                       nmap = nfsb;
-                       if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
-                                       nfsb,
-                                       XFS_BMAPI_METADATA |
-                                               xfs_bmapi_aflag(whichfork),
-                                       NULL, 0, mapp, &nmap, NULL)))
-                               goto exit0;
-               }
+               nirecs = nfsb;
+               error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
+                                      &nirecs, xfs_bmapi_aflag(whichfork));
+               if (error)
+                       goto out;
         } else {
-               map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
-               map.br_startoff = (xfs_fileoff_t)bno;
-               map.br_blockcount = nfsb;
-               mapp = &map;
-               nmap = 1;
+               irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
+               irecs->br_startoff = (xfs_fileoff_t)bno;
+               irecs->br_blockcount = nfsb;
+               irecs->br_state = 0;
+               nirecs = 1;
         }
-       if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
-               error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
+
+       if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
+               error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED);
                 if (unlikely(error == EFSCORRUPTED)) {
                         if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
-                               cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
-                                       (long long)bno);
-                               cmn_err(CE_ALERT, "dir: inode %lld\n",
+                               int i;
+                               xfs_alert(mp, "%s: bno %lld dir: inode %lld",
+                                       __func__, (long long)bno,
                                         (long long)dp->i_ino);
-                               for (i = 0; i < nmap; i++) {
-                                       cmn_err(CE_ALERT,
-                                               "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n",
+                               for (i = 0; i < *nmaps; i++) {
+                                       xfs_alert(mp,
+"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
                                                 i,
-                                               (long long)mapp[i].br_startoff,
-                                               (long long)mapp[i].br_startblock,
-                                               (long long)mapp[i].br_blockcount,
-                                               mapp[i].br_state);
+                                               (long long)irecs[i].br_startoff,
+                                               (long long)irecs[i].br_startblock,
+                                               (long long)irecs[i].br_blockcount,
+                                               irecs[i].br_state);
                                 }
                         }
                         XFS_ERROR_REPORT("xfs_da_do_buf(1)",
                                          XFS_ERRLEVEL_LOW, mp);
                 }
-               goto exit0;
+               goto out;
         }
-       if (caller != 3 && nmap > 1) {
-               bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
-               nbplist = 0;
-       } else
-               bplist = NULL;
-       /*
-        * Turn the mapping(s) into buffer(s).
-        */
-       for (i = 0; i < nmap; i++) {
-               int     nmapped;
-
-               mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
-               if (i == 0)
-                       *mappedbnop = mappedbno;
-               nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
-               switch (caller) {
-               case 0:
-                       bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
-                               mappedbno, nmapped, 0);
-                       error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
-                       break;
-               case 1:
-               case 2:
-                       bp = NULL;
-                       error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
-                               mappedbno, nmapped, 0, &bp);
-                       break;
-               case 3:
-                       xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
+       error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
+out:
+       if (irecs != &irec)
+               kmem_free(irecs);
+       return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block.
+ */
+int
+xfs_da_get_buf(
+       struct xfs_trans        *trans,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp,
+       int                     whichfork)
+{
+       struct xfs_buf          *bp;
+       struct xfs_buf_map      map;
+       struct xfs_buf_map      *mapp;
+       int                     nmap;
+       int                     error;
+
+       *bpp = NULL;
+       mapp = &map;
+       nmap = 1;
+       error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+                               &mapp, &nmap);
+       if (error) {
+               /* mapping a hole is not an error, but we don't continue */
+               if (error == -1)
                         error = 0;
-                       bp = NULL;
-                       break;
-               }
-               if (error) {
-                       if (bp)
-                               xfs_trans_brelse(trans, bp);
-                       goto exit1;
-               }
-               if (!bp)
-                       continue;
-               if (caller == 1) {
-                       if (whichfork == XFS_ATTR_FORK) {
-                               XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
-                                               XFS_ATTR_BTREE_REF);
-                       } else {
-                               XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
-                                               XFS_DIR_BTREE_REF);
-                       }
-               }
-               if (bplist) {
-                       bplist[nbplist++] = bp;
-               }
+               goto out_free;
         }
-       /*
-        * Build a dabuf structure.
-        */
-       if (bplist) {
-               rbp = xfs_da_buf_make(nbplist, bplist, ra);
-       } else if (bp)
-               rbp = xfs_da_buf_make(1, &bp, ra);
+
+       bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
+                                   mapp, nmap, 0);
+       error = bp ? bp->b_error : XFS_ERROR(EIO);
+       if (error) {
+               xfs_trans_brelse(trans, bp);
+               goto out_free;
+       }
+
+       *bpp = bp;
+
+out_free:
+       if (mapp != &map)
+               kmem_free(mapp);
+
+       return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block, fill in the contents.
+ */
+int
+xfs_da_read_buf(
+       struct xfs_trans        *trans,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp,
+       int                     whichfork,
+       const struct xfs_buf_ops *ops)
+{
+       struct xfs_buf          *bp;
+       struct xfs_buf_map      map;
+       struct xfs_buf_map      *mapp;
+       int                     nmap;
+       int                     error;
+
+       *bpp = NULL;
+       mapp = &map;
+       nmap = 1;
+       error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+                               &mapp, &nmap);
+       if (error) {
+               /* mapping a hole is not an error, but we don't continue */
+               if (error == -1)
+                       error = 0;
+               goto out_free;
+       }
+
+       error = xfs_trans_read_buf_map(dp->i_mount, trans,
+                                       dp->i_mount->m_ddev_targp,
+                                       mapp, nmap, 0, &bp, ops);
+       if (error)
+               goto out_free;
+
+       if (whichfork == XFS_ATTR_FORK)
+               xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
         else
-               rbp = NULL;
+               xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
+
         /*
-        * For read_buf, check the magic number.
+        * This verification code will be moved to a CRC verification callback
+        * function so just leave it here unchanged until then.
          */
-       if (caller == 1) {
-               xfs_dir2_data_t         *data;
-               xfs_dir2_free_t         *free;
-               xfs_da_blkinfo_t        *info;
+       {
+               xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+               xfs_dir2_free_t         *free = bp->b_addr;
+               xfs_da_blkinfo_t        *info = bp->b_addr;
                 uint                    magic, magic1;
+               struct xfs_mount        *mp = dp->i_mount;
  
-               info = rbp->data;
-               data = rbp->data;
-               free = rbp->data;
                 magic = be16_to_cpu(info->magic);
-               magic1 = be32_to_cpu(data->hdr.magic);
+               magic1 = be32_to_cpu(hdr->magic);
                 if (unlikely(
                     XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
                                    (magic != XFS_ATTR_LEAF_MAGIC) &&
@@ -2072,71 +2263,23 @@ xfs_da_do_buf(
                                    (magic != XFS_DIR2_LEAFN_MAGIC) &&
                                    (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
                                    (magic1 != XFS_DIR2_DATA_MAGIC) &&
-                                  (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
+                                  (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
                                 mp, XFS_ERRTAG_DA_READ_BUF,
                                 XFS_RANDOM_DA_READ_BUF))) {
-                       trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
+                       trace_xfs_da_btree_corrupt(bp, _RET_IP_);
                         XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
                                              XFS_ERRLEVEL_LOW, mp, info);
                         error = XFS_ERROR(EFSCORRUPTED);
-                       xfs_da_brelse(trans, rbp);
-                       nbplist = 0;
-                       goto exit1;
+                       xfs_trans_brelse(trans, bp);
+                       goto out_free;
                 }
         }
-       if (bplist) {
-               kmem_free(bplist);
-       }
-       if (mapp != &map) {
-               kmem_free(mapp);
-       }
-       if (bpp)
-               *bpp = rbp;
-       return 0;
-exit1:
-       if (bplist) {
-               for (i = 0; i < nbplist; i++)
-                       xfs_trans_brelse(trans, bplist[i]);
-               kmem_free(bplist);
-       }
-exit0:
+       *bpp = bp;
+out_free:
         if (mapp != &map)
                 kmem_free(mapp);
-       if (bpp)
-               *bpp = NULL;
-       return error;
-}
  
-/*
- * Get a buffer for the dir/attr block.
- */
-int
-xfs_da_get_buf(
-       xfs_trans_t     *trans,
-       xfs_inode_t     *dp,
-       xfs_dablk_t     bno,
-       xfs_daddr_t             mappedbno,
-       xfs_dabuf_t     **bpp,
-       int             whichfork)
-{
-       return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
-                                                (inst_t *)__return_address);
-}
-
-/*
- * Get a buffer for the dir/attr block, fill in the contents.
- */
-int
-xfs_da_read_buf(
-       xfs_trans_t     *trans,
-       xfs_inode_t     *dp,
-       xfs_dablk_t     bno,
-       xfs_daddr_t             mappedbno,
-       xfs_dabuf_t     **bpp,
-       int             whichfork)
-{
-       return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
-               (inst_t *)__return_address);
+       return error;
  }
  
  /*
@@ -2144,23 +2287,42 @@ xfs_da_read_buf(
   */
  xfs_daddr_t
  xfs_da_reada_buf(
-       xfs_trans_t     *trans,
-       xfs_inode_t     *dp,
-       xfs_dablk_t     bno,
-       int             whichfork)
+       struct xfs_trans        *trans,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       int                     whichfork,
+       const struct xfs_buf_ops *ops)
  {
-       xfs_daddr_t             rval;
+       struct xfs_buf_map      map;
+       struct xfs_buf_map      *mapp;
+       int                     nmap;
+       int                     error;
  
-       rval = -1;
-       if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
-                       (inst_t *)__return_address))
+       mapp = &map;
+       nmap = 1;
+       error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+                               &mapp, &nmap);
+       if (error) {
+               /* mapping a hole is not an error, but we don't continue */
+               if (error == -1)
+                       error = 0;
+               goto out_free;
+       }
+
+       mappedbno = mapp[0].bm_bn;
+       xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
+
+out_free:
+       if (mapp != &map)
+               kmem_free(mapp);
+
+       if (error)
                 return -1;
-       else
-               return rval;
+       return mappedbno;
  }
  
  kmem_zone_t *xfs_da_state_zone;        /* anchor for state struct zone */
-kmem_zone_t *xfs_dabuf_zone;           /* dabuf zone */
  
  /*
   * Allocate a dir-state structure.
@@ -2180,13 +2342,8 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state)
  {
         int     i;
  
-       for (i = 0; i < state->altpath.active; i++) {
-               if (state->altpath.blk[i].bp) {
-                       if (state->altpath.blk[i].bp != state->path.blk[i].bp)
-                               xfs_da_buf_done(state->altpath.blk[i].bp);
-                       state->altpath.blk[i].bp = NULL;
-               }
-       }
+       for (i = 0; i < state->altpath.active; i++)
+               state->altpath.blk[i].bp = NULL;
         state->altpath.active = 0;
  }
  
@@ -2196,244 +2353,9 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state)
  void
  xfs_da_state_free(xfs_da_state_t *state)
  {
-       int     i;
-
         xfs_da_state_kill_altpath(state);
-       for (i = 0; i < state->path.active; i++) {
-               if (state->path.blk[i].bp)
-                       xfs_da_buf_done(state->path.blk[i].bp);
-       }
-       if (state->extravalid && state->extrablk.bp)
-               xfs_da_buf_done(state->extrablk.bp);
  #ifdef DEBUG
         memset((char *)state, 0, sizeof(*state));
  #endif /* DEBUG */
         kmem_zone_free(xfs_da_state_zone, state);
  }
-
-#ifdef XFS_DABUF_DEBUG
-xfs_dabuf_t    *xfs_dabuf_global_list;
-static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
-#endif
-
-/*
- * Create a dabuf.
- */
-/* ARGSUSED */
-STATIC xfs_dabuf_t *
-xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
-{
-       xfs_buf_t       *bp;
-       xfs_dabuf_t     *dabuf;
-       int             i;
-       int             off;
-
-       if (nbuf == 1)
-               dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
-       else
-               dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
-       dabuf->dirty = 0;
-#ifdef XFS_DABUF_DEBUG
-       dabuf->ra = ra;
-       dabuf->target = XFS_BUF_TARGET(bps[0]);
-       dabuf->blkno = XFS_BUF_ADDR(bps[0]);
-#endif
-       if (nbuf == 1) {
-               dabuf->nbuf = 1;
-               bp = bps[0];
-               dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
-               dabuf->data = XFS_BUF_PTR(bp);
-               dabuf->bps[0] = bp;
-       } else {
-               dabuf->nbuf = nbuf;
-               for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
-                       dabuf->bps[i] = bp = bps[i];
-                       dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
-               }
-               dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
-               for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
-                       bp = bps[i];
-                       memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
-                               XFS_BUF_COUNT(bp));
-               }
-       }
-#ifdef XFS_DABUF_DEBUG
-       {
-               xfs_dabuf_t     *p;
-
-               spin_lock(&xfs_dabuf_global_lock);
-               for (p = xfs_dabuf_global_list; p; p = p->next) {
-                       ASSERT(p->blkno != dabuf->blkno ||
-                              p->target != dabuf->target);
-               }
-               dabuf->prev = NULL;
-               if (xfs_dabuf_global_list)
-                       xfs_dabuf_global_list->prev = dabuf;
-               dabuf->next = xfs_dabuf_global_list;
-               xfs_dabuf_global_list = dabuf;
-               spin_unlock(&xfs_dabuf_global_lock);
-       }
-#endif
-       return dabuf;
-}
-
-/*
- * Un-dirty a dabuf.
- */
-STATIC void
-xfs_da_buf_clean(xfs_dabuf_t *dabuf)
-{
-       xfs_buf_t       *bp;
-       int             i;
-       int             off;
-
-       if (dabuf->dirty) {
-               ASSERT(dabuf->nbuf > 1);
-               dabuf->dirty = 0;
-               for (i = off = 0; i < dabuf->nbuf;
-                               i++, off += XFS_BUF_COUNT(bp)) {
-                       bp = dabuf->bps[i];
-                       memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
-                               XFS_BUF_COUNT(bp));
-               }
-       }
-}
-
-/*
- * Release a dabuf.
- */
-void
-xfs_da_buf_done(xfs_dabuf_t *dabuf)
-{
-       ASSERT(dabuf);
-       ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
-       if (dabuf->dirty)
-               xfs_da_buf_clean(dabuf);
-       if (dabuf->nbuf > 1)
-               kmem_free(dabuf->data);
-#ifdef XFS_DABUF_DEBUG
-       {
-               spin_lock(&xfs_dabuf_global_lock);
-               if (dabuf->prev)
-                       dabuf->prev->next = dabuf->next;
-               else
-                       xfs_dabuf_global_list = dabuf->next;
-               if (dabuf->next)
-                       dabuf->next->prev = dabuf->prev;
-               spin_unlock(&xfs_dabuf_global_lock);
-       }
-       memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
-#endif
-       if (dabuf->nbuf == 1)
-               kmem_zone_free(xfs_dabuf_zone, dabuf);
-       else
-               kmem_free(dabuf);
-}
-
-/*
- * Log transaction from a dabuf.
- */
-void
-xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
-{
-       xfs_buf_t       *bp;
-       uint            f;
-       int             i;
-       uint            l;
-       int             off;
-
-       ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
-       if (dabuf->nbuf == 1) {
-               ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
-               xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
-               return;
-       }
-       dabuf->dirty = 1;
-       ASSERT(first <= last);
-       for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
-               bp = dabuf->bps[i];
-               f = off;
-               l = f + XFS_BUF_COUNT(bp) - 1;
-               if (f < first)
-                       f = first;
-               if (l > last)
-                       l = last;
-               if (f <= l)
-                       xfs_trans_log_buf(tp, bp, f - off, l - off);
-               /*
-                * B_DONE is set by xfs_trans_log buf.
-                * If we don't set it on a new buffer (get not read)
-                * then if we don't put anything in the buffer it won't
-                * be set, and at commit it it released into the cache,
-                * and then a read will fail.
-                */
-               else if (!(XFS_BUF_ISDONE(bp)))
-                 XFS_BUF_DONE(bp);
-       }
-       ASSERT(last < off);
-}
-
-/*
- * Release dabuf from a transaction.
- * Have to free up the dabuf before the buffers are released,
- * since the synchronization on the dabuf is really the lock on the buffer.
- */
-void
-xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
-{
-       xfs_buf_t       *bp;
-       xfs_buf_t       **bplist;
-       int             i;
-       int             nbuf;
-
-       ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
-       if ((nbuf = dabuf->nbuf) == 1) {
-               bplist = &bp;
-               bp = dabuf->bps[0];
-       } else {
-               bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
-               memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
-       }
-       xfs_da_buf_done(dabuf);
-       for (i = 0; i < nbuf; i++)
-               xfs_trans_brelse(tp, bplist[i]);
-       if (bplist != &bp)
-               kmem_free(bplist);
-}
-
-/*
- * Invalidate dabuf from a transaction.
- */
-void
-xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
-{
-       xfs_buf_t       *bp;
-       xfs_buf_t       **bplist;
-       int             i;
-       int             nbuf;
-
-       ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
-       if ((nbuf = dabuf->nbuf) == 1) {
-               bplist = &bp;
-               bp = dabuf->bps[0];
-       } else {
-               bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
-               memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
-       }
-       xfs_da_buf_done(dabuf);
-       for (i = 0; i < nbuf; i++)
-               xfs_trans_binval(tp, bplist[i]);
-       if (bplist != &bp)
-               kmem_free(bplist);
-}
-
-/*
- * Get the first daddr from a dabuf.
- */
-xfs_daddr_t
-xfs_da_blkno(xfs_dabuf_t *dabuf)
-{
-       ASSERT(dabuf->nbuf);
-       ASSERT(dabuf->data);
-       return XFS_BUF_ADDR(dabuf->bps[0]);
-}
diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c

index d475118b5876807e3c2a558b60e931a0e7581174..6a4027f6bc13060d07f7d19891d29810592dee34 100644 (file)
--- a/libxfs/xfs_dir2.c
+++ b/libxfs/xfs_dir2.c
@@ -98,15 +98,15 @@ int
  xfs_dir_isempty(
         xfs_inode_t     *dp)
  {
-       xfs_dir2_sf_t   *sfp;
+       xfs_dir2_sf_hdr_t       *sfp;
  
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
         if (dp->i_d.di_size == 0)       /* might happen during shutdown. */
                 return 1;
         if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
                 return 0;
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       return !sfp->hdr.count;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       return !sfp->count;
  }
  
  /*
@@ -135,7 +135,7 @@ xfs_dir_ino_validate(
                 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
         if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
                         XFS_RANDOM_DIR_INO_VALIDATE))) {
-               xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
+               xfs_warn(mp, "Invalid inode number 0x%Lx",
                                 (unsigned long long) ino);
                 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
                 return XFS_ERROR(EFSCORRUPTED);
@@ -158,7 +158,7 @@ xfs_dir_init(
         memset((char *)&args, 0, sizeof(args));
         args.dp = dp;
         args.trans = tp;
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
         if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
                 return error;
         return xfs_dir2_sf_create(&args, pdp->i_ino);
@@ -181,7 +181,7 @@ xfs_dir_createname(
         int                     rval;
         int                     v;              /* type-checking value */
  
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
         if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
                 return rval;
         XFS_STATS_INC(xs_dir_create);
@@ -257,7 +257,7 @@ xfs_dir_lookup(
         int             rval;
         int             v;              /* type-checking value */
  
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
         XFS_STATS_INC(xs_dir_lookup);
  
         memset(&args, 0, sizeof(xfs_da_args_t));
@@ -312,7 +312,7 @@ xfs_dir_removename(
         int             rval;
         int             v;              /* type-checking value */
  
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
         XFS_STATS_INC(xs_dir_remove);
  
         memset(&args, 0, sizeof(xfs_da_args_t));
@@ -359,7 +359,7 @@ xfs_dir_replace(
         int             rval;
         int             v;              /* type-checking value */
  
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
  
         if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
                 return rval;
@@ -397,129 +397,34 @@ xfs_dir_replace(
  
  /*
   * Add a block to the directory.
- * This routine is for data and free blocks, not leaf/node blocks
- * which are handled by xfs_da_grow_inode.
+ *
+ * This routine is for data and free blocks, not leaf/node blocks which are
+ * handled by xfs_da_grow_inode.
   */
  int
  xfs_dir2_grow_inode(
-       xfs_da_args_t   *args,
-       int             space,          /* v2 dir's space XFS_DIR2_xxx_SPACE */
-       xfs_dir2_db_t   *dbp)           /* out: block number added */
+       struct xfs_da_args      *args,
+       int                     space,  /* v2 dir's space XFS_DIR2_xxx_SPACE */
+       xfs_dir2_db_t           *dbp)   /* out: block number added */
  {
-       xfs_fileoff_t   bno;            /* directory offset of new block */
-       int             count;          /* count of filesystem blocks */
-       xfs_inode_t     *dp;            /* incore directory inode */
-       int             error;
-       int             got;            /* blocks actually mapped */
-       int             i;
-       xfs_bmbt_irec_t map;            /* single structure for bmap */
-       int             mapi;           /* mapping index */
-       xfs_bmbt_irec_t *mapp;          /* bmap mapping structure(s) */
-       xfs_mount_t     *mp;
-       int             nmap;           /* number of bmap entries */
-       xfs_trans_t     *tp;
-       xfs_drfsbno_t   nblks;
+       struct xfs_inode        *dp = args->dp;
+       struct xfs_mount        *mp = dp->i_mount;
+       xfs_fileoff_t           bno;    /* directory offset of new block */
+       int                     count;  /* count of filesystem blocks */
+       int                     error;
  
         trace_xfs_dir2_grow_inode(args, space);
  
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-       nblks = dp->i_d.di_nblocks;
         /*
          * Set lowest possible block in the space requested.
          */
         bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
         count = mp->m_dirblkfsbs;
-       /*
-        * Find the first hole for our block.
-        */
-       if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)))
-               return error;
-       nmap = 1;
-       ASSERT(args->firstblock != NULL);
-       /*
-        * Try mapping the new block contiguously (one extent).
-        */
-       if ((error = xfs_bmapi(tp, dp, bno, count,
-                       XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
-                       args->firstblock, args->total, &map, &nmap,
-                       args->flist)))
-               return error;
-       ASSERT(nmap <= 1);
-       if (nmap == 1) {
-               mapp = &map;
-               mapi = 1;
-       }
-       /*
-        * Didn't work and this is a multiple-fsb directory block.
-        * Try again with contiguous flag turned on.
-        */
-       else if (nmap == 0 && count > 1) {
-               xfs_fileoff_t   b;      /* current file offset */
  
-               /*
-                * Space for maximum number of mappings.
-                */
-               mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
-               /*
-                * Iterate until we get to the end of our block.
-                */
-               for (b = bno, mapi = 0; b < bno + count; ) {
-                       int     c;      /* current fsb count */
-
-                       /*
-                        * Can't map more than MAX_NMAP at once.
-                        */
-                       nmap = MIN(XFS_BMAP_MAX_NMAP, count);
-                       c = (int)(bno + count - b);
-                       if ((error = xfs_bmapi(tp, dp, b, c,
-                                       XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
-                                       args->firstblock, args->total,
-                                       &mapp[mapi], &nmap, args->flist))) {
-                               kmem_free(mapp);
-                               return error;
-                       }
-                       if (nmap < 1)
-                               break;
-                       /*
-                        * Add this bunch into our table, go to the next offset.
-                        */
-                       mapi += nmap;
-                       b = mapp[mapi - 1].br_startoff +
-                           mapp[mapi - 1].br_blockcount;
-               }
-       }
-       /*
-        * Didn't work.
-        */
-       else {
-               mapi = 0;
-               mapp = NULL;
-       }
-       /*
-        * See how many fsb's we got.
-        */
-       for (i = 0, got = 0; i < mapi; i++)
-               got += mapp[i].br_blockcount;
-       /*
-        * Didn't get enough fsb's, or the first/last block's are wrong.
-        */
-       if (got != count || mapp[0].br_startoff != bno ||
-           mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
-           bno + count) {
-               if (mapp != &map)
-                       kmem_free(mapp);
-               return XFS_ERROR(ENOSPC);
-       }
-       /*
-        * Done with the temporary mapping table.
-        */
-       if (mapp != &map)
-               kmem_free(mapp);
+       error = xfs_da_grow_inode_int(args, &bno, count);
+       if (error)
+               return error;
  
-       /* account for newly allocated blocks in reserved blocks total */
-       args->total -= dp->i_d.di_nblocks - nblks;
         *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
  
         /*
@@ -531,7 +436,7 @@ xfs_dir2_grow_inode(
                 size = XFS_FSB_TO_B(mp, bno + count);
                 if (size > dp->i_d.di_size) {
                         dp->i_d.di_size = size;
-                       xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+                       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
                 }
         }
         return 0;
@@ -588,7 +493,7 @@ int
  xfs_dir2_shrink_inode(
         xfs_da_args_t   *args,
         xfs_dir2_db_t   db,
-       xfs_dabuf_t     *bp)
+       struct xfs_buf  *bp)
  {
         xfs_fileoff_t   bno;            /* directory file offset */
         xfs_dablk_t     da;             /* directory file offset */
@@ -630,7 +535,7 @@ xfs_dir2_shrink_inode(
         /*
          * Invalidate the buffer from the transaction.
          */
-       xfs_da_binval(tp, bp);
+       xfs_trans_binval(tp, bp);
         /*
          * If it's not a data block, we're done.
          */
diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c

index b614ea694fa1a7cce15926f883504bca1f4f07de..7397faa8e4cc751fbaf6e9a024350cb0de324e53 100644 (file)
--- a/libxfs/xfs_dir2_block.c
+++ b/libxfs/xfs_dir2_block.c
@@ -21,10 +21,10 @@
  /*
   * Local function prototypes.
   */
-static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, xfs_dabuf_t *bp, int first,
-                                   int last);
-static void xfs_dir2_block_log_tail(xfs_trans_t *tp, xfs_dabuf_t *bp);
-static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **bpp,
+static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp,
+                                   int first, int last);
+static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp);
+static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp,
                                      int *entno);
  static int xfs_dir2_block_sort(const void *a, const void *b);
  
@@ -40,6 +40,214 @@ xfs_dir_startup(void)
         xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
  }
  
+static void
+xfs_dir2_block_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+       int                     block_ok = 0;
+
+       block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+       block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0;
+
+       if (!block_ok) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+static void
+xfs_dir2_block_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_block_verify(bp);
+}
+
+static void
+xfs_dir2_block_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_block_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_dir2_block_buf_ops = {
+       .verify_read = xfs_dir2_block_read_verify,
+       .verify_write = xfs_dir2_block_write_verify,
+};
+
+static int
+xfs_dir2_block_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       struct xfs_buf          **bpp)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+
+       return xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp,
+                               XFS_DATA_FORK, &xfs_dir2_block_buf_ops);
+}
+
+static void
+xfs_dir2_block_need_space(
+       struct xfs_dir2_data_hdr        *hdr,
+       struct xfs_dir2_block_tail      *btp,
+       struct xfs_dir2_leaf_entry      *blp,
+       __be16                          **tagpp,
+       struct xfs_dir2_data_unused     **dupp,
+       struct xfs_dir2_data_unused     **enddupp,
+       int                             *compact,
+       int                             len)
+{
+       struct xfs_dir2_data_free       *bf;
+       __be16                          *tagp = NULL;
+       struct xfs_dir2_data_unused     *dup = NULL;
+       struct xfs_dir2_data_unused     *enddup = NULL;
+
+       *compact = 0;
+       bf = hdr->bestfree;
+
+       /*
+        * If there are stale entries we'll use one for the leaf.
+        */
+       if (btp->stale) {
+               if (be16_to_cpu(bf[0].length) >= len) {
+                       /*
+                        * The biggest entry enough to avoid compaction.
+                        */
+                       dup = (xfs_dir2_data_unused_t *)
+                             ((char *)hdr + be16_to_cpu(bf[0].offset));
+                       goto out;
+               }
+
+               /*
+                * Will need to compact to make this work.
+                * Tag just before the first leaf entry.
+                */
+               *compact = 1;
+               tagp = (__be16 *)blp - 1;
+
+               /* Data object just before the first leaf entry.  */
+               dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+
+               /*
+                * If it's not free then the data will go where the
+                * leaf data starts now, if it works at all.
+                */
+               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+                       if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) *
+                           (uint)sizeof(*blp) < len)
+                               dup = NULL;
+               } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len)
+                       dup = NULL;
+               else
+                       dup = (xfs_dir2_data_unused_t *)blp;
+               goto out;
+       }
+
+       /*
+        * no stale entries, so just use free space.
+        * Tag just before the first leaf entry.
+        */
+       tagp = (__be16 *)blp - 1;
+
+       /* Data object just before the first leaf entry.  */
+       enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+
+       /*
+        * If it's not free then can't do this add without cleaning up:
+        * the space before the first leaf entry needs to be free so it
+        * can be expanded to hold the pointer to the new entry.
+        */
+       if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+               /*
+                * Check out the biggest freespace and see if it's the same one.
+                */
+               dup = (xfs_dir2_data_unused_t *)
+                     ((char *)hdr + be16_to_cpu(bf[0].offset));
+               if (dup != enddup) {
+                       /*
+                        * Not the same free entry, just check its length.
+                        */
+                       if (be16_to_cpu(dup->length) < len)
+                               dup = NULL;
+                       goto out;
+               }
+
+               /*
+                * It is the biggest freespace, can it hold the leaf too?
+                */
+               if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
+                       /*
+                        * Yes, use the second-largest entry instead if it works.
+                        */
+                       if (be16_to_cpu(bf[1].length) >= len)
+                               dup = (xfs_dir2_data_unused_t *)
+                                     ((char *)hdr + be16_to_cpu(bf[1].offset));
+                       else
+                               dup = NULL;
+               }
+       }
+out:
+       *tagpp = tagp;
+       *dupp = dup;
+       *enddupp = enddup;
+}
+
+/*
+ * compact the leaf entries.
+ * Leave the highest-numbered stale entry stale.
+ * XXX should be the one closest to mid but mid is not yet computed.
+ */
+static void
+xfs_dir2_block_compact(
+       struct xfs_trans                *tp,
+       struct xfs_buf                  *bp,
+       struct xfs_dir2_data_hdr        *hdr,
+       struct xfs_dir2_block_tail      *btp,
+       struct xfs_dir2_leaf_entry      *blp,
+       int                             *needlog,
+       int                             *lfloghigh,
+       int                             *lfloglow)
+{
+       int                     fromidx;        /* source leaf index */
+       int                     toidx;          /* target leaf index */
+       int                     needscan = 0;
+       int                     highstale;      /* high stale index */
+
+       fromidx = toidx = be32_to_cpu(btp->count) - 1;
+       highstale = *lfloghigh = -1;
+       for (; fromidx >= 0; fromidx--) {
+               if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
+                       if (highstale == -1)
+                               highstale = toidx;
+                       else {
+                               if (*lfloghigh == -1)
+                                       *lfloghigh = toidx;
+                               continue;
+                       }
+               }
+               if (fromidx < toidx)
+                       blp[toidx] = blp[fromidx];
+               toidx--;
+       }
+       *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
+       *lfloghigh -= be32_to_cpu(btp->stale) - 1;
+       be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
+       xfs_dir2_data_make_free(tp, bp,
+               (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
+               (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
+               needlog, &needscan);
+       blp += be32_to_cpu(btp->stale) - 1;
+       btp->stale = cpu_to_be32(1);
+       /*
+        * If we now need to rebuild the bestfree map, do so.
+        * This needs to happen before the next call to use_free.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(tp->t_mountp, hdr, needlog);
+}
+
  /*
   * Add an entry to a block directory.
   */
@@ -47,10 +255,9 @@ int                                         /* error */
  xfs_dir2_block_addname(
         xfs_da_args_t           *args)          /* directory op arguments */
  {
-       xfs_dir2_data_free_t    *bf;            /* bestfree table in block */
-       xfs_dir2_block_t        *block;         /* directory block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       xfs_dabuf_t             *bp;            /* buffer for block */
+       struct xfs_buf          *bp;            /* buffer for block */
         xfs_dir2_block_tail_t   *btp;           /* block tail */
         int                     compact;        /* need to compact leaf ents */
         xfs_dir2_data_entry_t   *dep;           /* block data entry */
@@ -78,203 +285,72 @@ xfs_dir2_block_addname(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       /*
-        * Read the (one and only) directory block into dabuf bp.
-        */
-       if ((error =
-           xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) {
+
+       /* Read the (one and only) directory block into bp. */
+       error = xfs_dir2_block_read(tp, dp, &bp);
+       if (error)
                 return error;
-       }
-       ASSERT(bp != NULL);
-       block = bp->data;
-       /*
-        * Check the magic number, corrupted if wrong.
-        */
-       if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) {
-               XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
-                                    XFS_ERRLEVEL_LOW, mp, block);
-               xfs_da_brelse(tp, bp);
-               return XFS_ERROR(EFSCORRUPTED);
-       }
+
         len = xfs_dir2_data_entsize(args->namelen);
+
         /*
          * Set up pointers to parts of the block.
          */
-       bf = block->hdr.bestfree;
-       btp = xfs_dir2_block_tail_p(mp, block);
+       hdr = bp->b_addr;
+       btp = xfs_dir2_block_tail_p(mp, hdr);
         blp = xfs_dir2_block_leaf_p(btp);
+
         /*
-        * No stale entries?  Need space for entry and new leaf.
-        */
-       if (!btp->stale) {
-               /*
-                * Tag just before the first leaf entry.
-                */
-               tagp = (__be16 *)blp - 1;
-               /*
-                * Data object just before the first leaf entry.
-                */
-               enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
-               /*
-                * If it's not free then can't do this add without cleaning up:
-                * the space before the first leaf entry needs to be free so it
-                * can be expanded to hold the pointer to the new entry.
-                */
-               if (be16_to_cpu(enddup->freetag) != XFS_DIR2_DATA_FREE_TAG)
-                       dup = enddup = NULL;
-               /*
-                * Check out the biggest freespace and see if it's the same one.
-                */
-               else {
-                       dup = (xfs_dir2_data_unused_t *)
-                             ((char *)block + be16_to_cpu(bf[0].offset));
-                       if (dup == enddup) {
-                               /*
-                                * It is the biggest freespace, is it too small
-                                * to hold the new leaf too?
-                                */
-                               if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
-                                       /*
-                                        * Yes, we use the second-largest
-                                        * entry instead if it works.
-                                        */
-                                       if (be16_to_cpu(bf[1].length) >= len)
-                                               dup = (xfs_dir2_data_unused_t *)
-                                                     ((char *)block +
-                                                      be16_to_cpu(bf[1].offset));
-                                       else
-                                               dup = NULL;
-                               }
-                       } else {
-                               /*
-                                * Not the same free entry,
-                                * just check its length.
-                                */
-                               if (be16_to_cpu(dup->length) < len) {
-                                       dup = NULL;
-                               }
-                       }
-               }
-               compact = 0;
-       }
-       /*
-        * If there are stale entries we'll use one for the leaf.
-        * Is the biggest entry enough to avoid compaction?
+        * Find out if we can reuse stale entries or whether we need extra
+        * space for entry and new leaf.
          */
-       else if (be16_to_cpu(bf[0].length) >= len) {
-               dup = (xfs_dir2_data_unused_t *)
-                     ((char *)block + be16_to_cpu(bf[0].offset));
-               compact = 0;
-       }
+       xfs_dir2_block_need_space(hdr, btp, blp, &tagp, &dup,
+                                 &enddup, &compact, len);
+
         /*
-        * Will need to compact to make this work.
+        * Done everything we need for a space check now.
          */
-       else {
-               /*
-                * Tag just before the first leaf entry.
-                */
-               tagp = (__be16 *)blp - 1;
-               /*
-                * Data object just before the first leaf entry.
-                */
-               dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
-               /*
-                * If it's not free then the data will go where the
-                * leaf data starts now, if it works at all.
-                */
-               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) *
-                           (uint)sizeof(*blp) < len)
-                               dup = NULL;
-               } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len)
-                       dup = NULL;
-               else
-                       dup = (xfs_dir2_data_unused_t *)blp;
-               compact = 1;
+       if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
+               xfs_trans_brelse(tp, bp);
+               if (!dup)
+                       return XFS_ERROR(ENOSPC);
+               return 0;
         }
-       /*
-        * If this isn't a real add, we're done with the buffer.
-        */
-       if (args->op_flags & XFS_DA_OP_JUSTCHECK)
-               xfs_da_brelse(tp, bp);
+
         /*
          * If we don't have space for the new entry & leaf ...
          */
         if (!dup) {
-               /*
-                * Not trying to actually do anything, or don't have
-                * a space reservation: return no-space.
-                */
-               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
+               /* Don't have a space reservation: return no-space.  */
+               if (args->total == 0)
                         return XFS_ERROR(ENOSPC);
                 /*
                  * Convert to the next larger format.
                  * Then add the new entry in that format.
                  */
                 error = xfs_dir2_block_to_leaf(args, bp);
-               xfs_da_buf_done(bp);
                 if (error)
                         return error;
                 return xfs_dir2_leaf_addname(args);
         }
-       /*
-        * Just checking, and it would work, so say so.
-        */
-       if (args->op_flags & XFS_DA_OP_JUSTCHECK)
-               return 0;
+
         needlog = needscan = 0;
+
         /*
          * If need to compact the leaf entries, do it now.
-        * Leave the highest-numbered stale entry stale.
-        * XXX should be the one closest to mid but mid is not yet computed.
          */
-       if (compact) {
-               int     fromidx;                /* source leaf index */
-               int     toidx;                  /* target leaf index */
-
-               for (fromidx = toidx = be32_to_cpu(btp->count) - 1,
-                       highstale = lfloghigh = -1;
-                    fromidx >= 0;
-                    fromidx--) {
-                       if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) {
-                               if (highstale == -1)
-                                       highstale = toidx;
-                               else {
-                                       if (lfloghigh == -1)
-                                               lfloghigh = toidx;
-                                       continue;
-                               }
-                       }
-                       if (fromidx < toidx)
-                               blp[toidx] = blp[fromidx];
-                       toidx--;
-               }
-               lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
-               lfloghigh -= be32_to_cpu(btp->stale) - 1;
-               be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
-               xfs_dir2_data_make_free(tp, bp,
-                       (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
-                       (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
-                       &needlog, &needscan);
-               blp += be32_to_cpu(btp->stale) - 1;
-               btp->stale = cpu_to_be32(1);
+       if (compact)
+               xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog,
+                                     &lfloghigh, &lfloglow);
+       else if (btp->stale) {
                 /*
-                * If we now need to rebuild the bestfree map, do so.
-                * This needs to happen before the next call to use_free.
+                * Set leaf logging boundaries to impossible state.
+                * For the no-stale case they're set explicitly.
                  */
-               if (needscan) {
-                       xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
-                       needscan = 0;
-               }
-       }
-       /*
-        * Set leaf logging boundaries to impossible state.
-        * For the no-stale case they're set explicitly.
-        */
-       else if (btp->stale) {
                 lfloglow = be32_to_cpu(btp->count);
                 lfloghigh = -1;
         }
+
         /*
          * Find the slot that's first lower than our hash value, -1 if none.
          */
@@ -299,7 +375,7 @@ xfs_dir2_block_addname(
                  */
                 xfs_dir2_data_use_free(tp, bp, enddup,
                         (xfs_dir2_data_aoff_t)
-                       ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) -
+                       ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
                          sizeof(*blp)),
                         (xfs_dir2_data_aoff_t)sizeof(*blp),
                         &needlog, &needscan);
@@ -312,8 +388,7 @@ xfs_dir2_block_addname(
                  * This needs to happen before the next call to use_free.
                  */
                 if (needscan) {
-                       xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
-                               &needlog);
+                       xfs_dir2_data_freescan(mp, hdr, &needlog);
                         needscan = 0;
                 }
                 /*
@@ -334,12 +409,14 @@ xfs_dir2_block_addname(
         else {
                 for (lowstale = mid;
                      lowstale >= 0 &&
-                       be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
+                       blp[lowstale].address !=
+                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
                      lowstale--)
                         continue;
                 for (highstale = mid + 1;
                      highstale < be32_to_cpu(btp->count) &&
-                       be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
+                       blp[highstale].address !=
+                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR) &&
                         (lowstale < 0 || mid - lowstale > highstale - mid);
                      highstale++)
                         continue;
@@ -378,13 +455,13 @@ xfs_dir2_block_addname(
          */
         blp[mid].hashval = cpu_to_be32(args->hashval);
         blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-                               (char *)dep - (char *)block));
+                               (char *)dep - (char *)hdr));
         xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
         /*
          * Mark space for the data entry used.
          */
         xfs_dir2_data_use_free(tp, bp, dup,
-               (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
                 (xfs_dir2_data_aoff_t)len, &needlog, &needscan);
         /*
          * Create the new data entry.
@@ -393,18 +470,17 @@ xfs_dir2_block_addname(
         dep->namelen = args->namelen;
         memcpy(dep->name, args->name, args->namelen);
         tagp = xfs_dir2_data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)block);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
         /*
          * Clean up the bestfree array and log the header, tail, and entry.
          */
         if (needscan)
-               xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+               xfs_dir2_data_freescan(mp, hdr, &needlog);
         if (needlog)
                 xfs_dir2_data_log_header(tp, bp);
         xfs_dir2_block_log_tail(tp, bp);
         xfs_dir2_data_log_entry(tp, bp, dep);
         xfs_dir2_data_check(dp, bp);
-       xfs_da_buf_done(bp);
         return 0;
  }
  
@@ -414,21 +490,18 @@ xfs_dir2_block_addname(
  static void
  xfs_dir2_block_log_leaf(
         xfs_trans_t             *tp,            /* transaction structure */
-       xfs_dabuf_t             *bp,            /* block buffer */
+       struct xfs_buf          *bp,            /* block buffer */
         int                     first,          /* index of first logged leaf */
         int                     last)           /* index of last logged leaf */
  {
-       xfs_dir2_block_t        *block;         /* directory block structure */
-       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+       xfs_dir2_leaf_entry_t   *blp;
+       xfs_dir2_block_tail_t   *btp;
  
-       mp = tp->t_mountp;
-       block = bp->data;
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
         blp = xfs_dir2_block_leaf_p(btp);
-       xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
-               (uint)((char *)&blp[last + 1] - (char *)block - 1));
+       xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
+               (uint)((char *)&blp[last + 1] - (char *)hdr - 1));
  }
  
  /*
@@ -437,17 +510,14 @@ xfs_dir2_block_log_leaf(
  static void
  xfs_dir2_block_log_tail(
         xfs_trans_t             *tp,            /* transaction structure */
-       xfs_dabuf_t             *bp)            /* block buffer */
+       struct xfs_buf          *bp)            /* block buffer */
  {
-       xfs_dir2_block_t        *block;         /* directory block structure */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+       xfs_dir2_block_tail_t   *btp;
  
-       mp = tp->t_mountp;
-       block = bp->data;
-       btp = xfs_dir2_block_tail_p(mp, block);
-       xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
-               (uint)((char *)(btp + 1) - (char *)block - 1));
+       btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
+       xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
+               (uint)((char *)(btp + 1) - (char *)hdr - 1));
  }
  
  /*
@@ -458,9 +528,9 @@ int                                         /* error */
  xfs_dir2_block_lookup(
         xfs_da_args_t           *args)          /* dir lookup arguments */
  {
-       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       xfs_dabuf_t             *bp;            /* block buffer */
+       struct xfs_buf          *bp;            /* block buffer */
         xfs_dir2_block_tail_t   *btp;           /* block tail */
         xfs_dir2_data_entry_t   *dep;           /* block data entry */
         xfs_inode_t             *dp;            /* incore inode */
@@ -478,21 +548,21 @@ xfs_dir2_block_lookup(
                 return error;
         dp = args->dp;
         mp = dp->i_mount;
-       block = bp->data;
+       hdr = bp->b_addr;
         xfs_dir2_data_check(dp, bp);
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, hdr);
         blp = xfs_dir2_block_leaf_p(btp);
         /*
          * Get the offset from the leaf entry, to point to the data.
          */
-       dep = (xfs_dir2_data_entry_t *)((char *)block +
+       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
                 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
         /*
          * Fill in inode number, CI name if appropriate, release the block.
          */
         args->inumber = be64_to_cpu(dep->inumber);
         error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
-       xfs_da_brelse(args->trans, bp);
+       xfs_trans_brelse(args->trans, bp);
         return XFS_ERROR(error);
  }
  
@@ -502,13 +572,13 @@ xfs_dir2_block_lookup(
  static int                                     /* error */
  xfs_dir2_block_lookup_int(
         xfs_da_args_t           *args,          /* dir lookup arguments */
-       xfs_dabuf_t             **bpp,          /* returned block buffer */
+       struct xfs_buf          **bpp,          /* returned block buffer */
         int                     *entno)         /* returned entry number */
  {
         xfs_dir2_dataptr_t      addr;           /* data entry address */
-       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       xfs_dabuf_t             *bp;            /* block buffer */
+       struct xfs_buf          *bp;            /* block buffer */
         xfs_dir2_block_tail_t   *btp;           /* block tail */
         xfs_dir2_data_entry_t   *dep;           /* block data entry */
         xfs_inode_t             *dp;            /* incore inode */
@@ -524,17 +594,14 @@ xfs_dir2_block_lookup_int(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       /*
-        * Read the buffer, return error if we can't get it.
-        */
-       if ((error =
-           xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) {
+
+       error = xfs_dir2_block_read(tp, dp, &bp);
+       if (error)
                 return error;
-       }
-       ASSERT(bp != NULL);
-       block = bp->data;
+
+       hdr = bp->b_addr;
         xfs_dir2_data_check(dp, bp);
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, hdr);
         blp = xfs_dir2_block_leaf_p(btp);
         /*
          * Loop doing a binary search for our hash value.
@@ -551,7 +618,7 @@ xfs_dir2_block_lookup_int(
                         high = mid - 1;
                 if (low > high) {
                         ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-                       xfs_da_brelse(tp, bp);
+                       xfs_trans_brelse(tp, bp);
                         return XFS_ERROR(ENOENT);
                 }
         }
@@ -572,7 +639,7 @@ xfs_dir2_block_lookup_int(
                  * Get pointer to the entry from the leaf.
                  */
                 dep = (xfs_dir2_data_entry_t *)
-                       ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
+                       ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
                 /*
                  * Compare name and if it's an exact match, return the index
                  * and buffer. If it's the first case-insensitive match, store
@@ -599,7 +666,7 @@ xfs_dir2_block_lookup_int(
         /*
          * No match, release the buffer and return ENOENT.
          */
-       xfs_da_brelse(tp, bp);
+       xfs_trans_brelse(tp, bp);
         return XFS_ERROR(ENOENT);
  }
  
@@ -611,9 +678,9 @@ int                                         /* error */
  xfs_dir2_block_removename(
         xfs_da_args_t           *args)          /* directory operation args */
  {
-       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_leaf_entry_t   *blp;           /* block leaf pointer */
-       xfs_dabuf_t             *bp;            /* block buffer */
+       struct xfs_buf          *bp;            /* block buffer */
         xfs_dir2_block_tail_t   *btp;           /* block tail */
         xfs_dir2_data_entry_t   *dep;           /* block data entry */
         xfs_inode_t             *dp;            /* incore inode */
@@ -638,20 +705,20 @@ xfs_dir2_block_removename(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       block = bp->data;
-       btp = xfs_dir2_block_tail_p(mp, block);
+       hdr = bp->b_addr;
+       btp = xfs_dir2_block_tail_p(mp, hdr);
         blp = xfs_dir2_block_leaf_p(btp);
         /*
          * Point to the data entry using the leaf entry.
          */
         dep = (xfs_dir2_data_entry_t *)
-             ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+             ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
         /*
          * Mark the data entry's space free.
          */
         needlog = needscan = 0;
         xfs_dir2_data_make_free(tp, bp,
-               (xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
+               (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
                 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
         /*
          * Fix up the block tail.
@@ -667,18 +734,17 @@ xfs_dir2_block_removename(
          * Fix up bestfree, log the header if necessary.
          */
         if (needscan)
-               xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+               xfs_dir2_data_freescan(mp, hdr, &needlog);
         if (needlog)
                 xfs_dir2_data_log_header(tp, bp);
         xfs_dir2_data_check(dp, bp);
         /*
          * See if the size as a shortform is good enough.
          */
-       if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
-           XFS_IFORK_DSIZE(dp)) {
-               xfs_da_buf_done(bp);
+       size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
+       if (size > XFS_IFORK_DSIZE(dp))
                 return 0;
-       }
+
         /*
          * If it works, do the conversion.
          */
@@ -693,9 +759,9 @@ int                                         /* error */
  xfs_dir2_block_replace(
         xfs_da_args_t           *args)          /* directory operation args */
  {
-       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       xfs_dabuf_t             *bp;            /* block buffer */
+       struct xfs_buf          *bp;            /* block buffer */
         xfs_dir2_block_tail_t   *btp;           /* block tail */
         xfs_dir2_data_entry_t   *dep;           /* block data entry */
         xfs_inode_t             *dp;            /* incore inode */
@@ -714,14 +780,14 @@ xfs_dir2_block_replace(
         }
         dp = args->dp;
         mp = dp->i_mount;
-       block = bp->data;
-       btp = xfs_dir2_block_tail_p(mp, block);
+       hdr = bp->b_addr;
+       btp = xfs_dir2_block_tail_p(mp, hdr);
         blp = xfs_dir2_block_leaf_p(btp);
         /*
          * Point to the data entry we need to change.
          */
         dep = (xfs_dir2_data_entry_t *)
-             ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+             ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
         ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
         /*
          * Change the inode number to the new value.
@@ -729,7 +795,6 @@ xfs_dir2_block_replace(
         dep->inumber = cpu_to_be64(args->inumber);
         xfs_dir2_data_log_entry(args->trans, bp, dep);
         xfs_dir2_data_check(dp, bp);
-       xfs_da_buf_done(bp);
         return 0;
  }
  
@@ -756,11 +821,11 @@ xfs_dir2_block_sort(
  int                                            /* error */
  xfs_dir2_leaf_to_block(
         xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dabuf_t             *lbp,           /* leaf buffer */
-       xfs_dabuf_t             *dbp)           /* data buffer */
+       struct xfs_buf          *lbp,           /* leaf buffer */
+       struct xfs_buf          *dbp)           /* data buffer */
  {
         __be16                  *bestsp;        /* leaf bests table */
-       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_block_tail_t   *btp;           /* block tail */
         xfs_inode_t             *dp;            /* incore directory inode */
         xfs_dir2_data_unused_t  *dup;           /* unused data entry */
@@ -783,8 +848,8 @@ xfs_dir2_leaf_to_block(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       leaf = lbp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+       leaf = lbp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
         /*
          * If there are data blocks other than the first one, take this
@@ -795,48 +860,46 @@ xfs_dir2_leaf_to_block(
         while (dp->i_d.di_size > mp->m_dirblksize) {
                 bestsp = xfs_dir2_leaf_bests_p(ltp);
                 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
-                   mp->m_dirblksize - (uint)sizeof(block->hdr)) {
+                   mp->m_dirblksize - (uint)sizeof(*hdr)) {
                         if ((error =
                             xfs_dir2_leaf_trim_data(args, lbp,
                                     (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
-                               goto out;
-               } else {
-                       error = 0;
-                       goto out;
-               }
+                               return error;
+               } else
+                       return 0;
         }
         /*
          * Read the data block if we don't already have it, give up if it fails.
          */
-       if (dbp == NULL &&
-           (error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp,
-                   XFS_DATA_FORK))) {
-               goto out;
+       if (!dbp) {
+               error = xfs_dir2_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp);
+               if (error)
+                       return error;
         }
-       block = dbp->data;
-       ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+       hdr = dbp->b_addr;
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
         /*
          * Size of the "leaf" area in the block.
          */
-       size = (uint)sizeof(block->tail) +
+       size = (uint)sizeof(xfs_dir2_block_tail_t) +
                (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
         /*
          * Look at the last data entry.
          */
-       tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1;
-       dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+       tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1;
+       dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
         /*
          * If it's not free or is too short we can't do it.
          */
         if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG ||
-           be16_to_cpu(dup->length) < size) {
-               error = 0;
-               goto out;
-       }
+           be16_to_cpu(dup->length) < size)
+               return 0;
+
         /*
          * Start converting it to block form.
          */
-       block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+       dbp->b_ops = &xfs_dir2_block_buf_ops;
+       hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
         needlog = 1;
         needscan = 0;
         /*
@@ -847,7 +910,7 @@ xfs_dir2_leaf_to_block(
         /*
          * Initialize the block tail.
          */
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, hdr);
         btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
         btp->stale = 0;
         xfs_dir2_block_log_tail(tp, dbp);
@@ -856,7 +919,8 @@ xfs_dir2_leaf_to_block(
          */
         lep = xfs_dir2_block_leaf_p(btp);
         for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
-               if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
+               if (leaf->ents[from].address ==
+                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                         continue;
                 lep[to++] = leaf->ents[from];
         }
@@ -866,32 +930,24 @@ xfs_dir2_leaf_to_block(
          * Scan the bestfree if we need it and log the data block header.
          */
         if (needscan)
-               xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+               xfs_dir2_data_freescan(mp, hdr, &needlog);
         if (needlog)
                 xfs_dir2_data_log_header(tp, dbp);
         /*
          * Pitch the old leaf block.
          */
         error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp);
-       lbp = NULL;
-       if (error) {
-               goto out;
-       }
+       if (error)
+               return error;
+
         /*
          * Now see if the resulting block can be shrunken to shortform.
          */
-       if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
-           XFS_IFORK_DSIZE(dp)) {
-               error = 0;
-               goto out;
-       }
+       size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
+       if (size > XFS_IFORK_DSIZE(dp))
+               return 0;
+
         return xfs_dir2_block_to_sf(args, dbp, size, &sfh);
-out:
-       if (lbp)
-               xfs_da_buf_done(lbp);
-       if (dbp)
-               xfs_da_buf_done(dbp);
-       return error;
  }
  
  /*
@@ -902,12 +958,10 @@ xfs_dir2_sf_to_block(
         xfs_da_args_t           *args)          /* operation arguments */
  {
         xfs_dir2_db_t           blkno;          /* dir-relative block # (0) */
-       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       xfs_dabuf_t             *bp;            /* block buffer */
+       struct xfs_buf          *bp;            /* block buffer */
         xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
-       char                    *buf;           /* sf buffer */
-       int                     buf_len;
         xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     dummy;          /* trash */
@@ -921,7 +975,8 @@ xfs_dir2_sf_to_block(
         int                     newoffset;      /* offset from current entry */
         int                     offset;         /* target block offset */
         xfs_dir2_sf_entry_t     *sfep;          /* sf entry pointer */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t       *oldsfp;        /* old shortform header  */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform header  */
         __be16                  *tagp;          /* end of data entry */
         xfs_trans_t             *tp;            /* transaction pointer */
         struct xfs_name         name;
@@ -939,32 +994,30 @@ xfs_dir2_sf_to_block(
                 ASSERT(XFS_FORCED_SHUTDOWN(mp));
                 return XFS_ERROR(EIO);
         }
+
+       oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+
         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
         ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
+
         /*
-        * Copy the directory into the stack buffer.
+        * Copy the directory into a temporary buffer.
          * Then pitch the incore inode data so we can make extents.
          */
+       sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
+       memcpy(sfp, oldsfp, dp->i_df.if_bytes);
  
-       buf_len = dp->i_df.if_bytes;
-       buf = kmem_alloc(buf_len, KM_SLEEP);
-
-       memcpy(buf, sfp, buf_len);
-       xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
+       xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
         dp->i_d.di_size = 0;
         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-       /*
-        * Reset pointer - old sfp is gone.
-        */
-       sfp = (xfs_dir2_sf_t *)buf;
+
         /*
          * Add block 0 to the inode.
          */
         error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
         if (error) {
-               kmem_free(buf);
+               kmem_free(sfp);
                 return error;
         }
         /*
@@ -972,21 +1025,22 @@ xfs_dir2_sf_to_block(
          */
         error = xfs_dir2_data_init(args, blkno, &bp);
         if (error) {
-               kmem_free(buf);
+               kmem_free(sfp);
                 return error;
         }
-       block = bp->data;
-       block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+       bp->b_ops = &xfs_dir2_block_buf_ops;
+       hdr = bp->b_addr;
+       hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
         /*
          * Compute size of block "tail" area.
          */
         i = (uint)sizeof(*btp) +
-           (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+           (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
         /*
          * The whole thing is initialized to free by the init routine.
          * Say we're using the leaf and tail area.
          */
-       dup = (xfs_dir2_data_unused_t *)block->u;
+       dup = (xfs_dir2_data_unused_t *)(hdr + 1);
         needlog = needscan = 0;
         xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
                 &needscan);
@@ -994,50 +1048,51 @@ xfs_dir2_sf_to_block(
         /*
          * Fill in the tail.
          */
-       btp = xfs_dir2_block_tail_p(mp, block);
-       btp->count = cpu_to_be32(sfp->hdr.count + 2);   /* ., .. */
+       btp = xfs_dir2_block_tail_p(mp, hdr);
+       btp->count = cpu_to_be32(sfp->count + 2);       /* ., .. */
         btp->stale = 0;
         blp = xfs_dir2_block_leaf_p(btp);
-       endoffset = (uint)((char *)blp - (char *)block);
+       endoffset = (uint)((char *)blp - (char *)hdr);
         /*
          * Remove the freespace, we'll manage it.
          */
         xfs_dir2_data_use_free(tp, bp, dup,
-               (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
                 be16_to_cpu(dup->length), &needlog, &needscan);
         /*
          * Create entry for .
          */
         dep = (xfs_dir2_data_entry_t *)
-             ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
+             ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
         dep->inumber = cpu_to_be64(dp->i_ino);
         dep->namelen = 1;
         dep->name[0] = '.';
         tagp = xfs_dir2_data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)block);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
         xfs_dir2_data_log_entry(tp, bp, dep);
         blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
         blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-                               (char *)dep - (char *)block));
+                               (char *)dep - (char *)hdr));
         /*
          * Create entry for ..
          */
         dep = (xfs_dir2_data_entry_t *)
-               ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
-       dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
+               ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
+       dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
         dep->namelen = 2;
         dep->name[0] = dep->name[1] = '.';
         tagp = xfs_dir2_data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)block);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
         xfs_dir2_data_log_entry(tp, bp, dep);
         blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
         blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-                               (char *)dep - (char *)block));
+                               (char *)dep - (char *)hdr));
         offset = XFS_DIR2_DATA_FIRST_OFFSET;
         /*
          * Loop over existing entries, stuff them in.
          */
-       if ((i = 0) == sfp->hdr.count)
+       i = 0;
+       if (!sfp->count)
                 sfep = NULL;
         else
                 sfep = xfs_dir2_sf_firstentry(sfp);
@@ -1057,43 +1112,40 @@ xfs_dir2_sf_to_block(
                  * There should be a hole here, make one.
                  */
                 if (offset < newoffset) {
-                       dup = (xfs_dir2_data_unused_t *)
-                             ((char *)block + offset);
+                       dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
                         dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                         dup->length = cpu_to_be16(newoffset - offset);
                         *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
-                               ((char *)dup - (char *)block));
+                               ((char *)dup - (char *)hdr));
                         xfs_dir2_data_log_unused(tp, bp, dup);
-                       (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
-                               dup, &dummy);
+                       xfs_dir2_data_freeinsert(hdr, dup, &dummy);
                         offset += be16_to_cpu(dup->length);
                         continue;
                 }
                 /*
                  * Copy a real entry.
                  */
-               dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
-               dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
-                               xfs_dir2_sf_inumberp(sfep)));
+               dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
+               dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep));
                 dep->namelen = sfep->namelen;
                 memcpy(dep->name, sfep->name, dep->namelen);
                 tagp = xfs_dir2_data_entry_tag_p(dep);
-               *tagp = cpu_to_be16((char *)dep - (char *)block);
+               *tagp = cpu_to_be16((char *)dep - (char *)hdr);
                 xfs_dir2_data_log_entry(tp, bp, dep);
                 name.name = sfep->name;
                 name.len = sfep->namelen;
                 blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
                                                         hashname(&name));
                 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-                                                (char *)dep - (char *)block));
-               offset = (int)((char *)(tagp + 1) - (char *)block);
-               if (++i == sfp->hdr.count)
+                                                (char *)dep - (char *)hdr));
+               offset = (int)((char *)(tagp + 1) - (char *)hdr);
+               if (++i == sfp->count)
                         sfep = NULL;
                 else
                         sfep = xfs_dir2_sf_nextentry(sfp, sfep);
         }
         /* Done with the temporary buffer */
-       kmem_free(buf);
+       kmem_free(sfp);
         /*
          * Sort the leaf entries by hash value.
          */
@@ -1106,6 +1158,5 @@ xfs_dir2_sf_to_block(
         xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
         xfs_dir2_block_log_tail(tp, bp);
         xfs_dir2_data_check(dp, bp);
-       xfs_da_buf_done(bp);
         return 0;
  }
diff --git a/libxfs/xfs_dir2_data.c b/libxfs/xfs_dir2_data.c

index d89b5b1f09c9bd2a6296e1063de95ad84b43e52b..eb8673999d2fc4459ea0c819fe01757ffad51d2b 100644 (file)
--- a/libxfs/xfs_dir2_data.c
+++ b/libxfs/xfs_dir2_data.c
@@ -18,23 +18,21 @@
  
  #include <xfs.h>
  
-
-#ifdef DEBUG
  /*
   * Check the consistency of the data block.
   * The input can also be a block-format directory.
- * Pop an assert if we find anything bad.
+ * Return 0 is the buffer is good, otherwise an error.
   */
-void
-xfs_dir2_data_check(
-       xfs_inode_t             *dp,            /* incore inode pointer */
-       xfs_dabuf_t             *bp)            /* data block's buffer */
+int
+__xfs_dir2_data_check(
+       struct xfs_inode        *dp,            /* incore inode pointer */
+       struct xfs_buf          *bp)            /* data block's buffer */
  {
         xfs_dir2_dataptr_t      addr;           /* addr for leaf lookup */
         xfs_dir2_data_free_t    *bf;            /* bestfree table */
         xfs_dir2_block_tail_t   *btp=NULL;      /* block tail */
         int                     count;          /* count of entries found */
-       xfs_dir2_data_t         *d;             /* data block pointer */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
         xfs_dir2_data_entry_t   *dep;           /* data entry */
         xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
         xfs_dir2_data_unused_t  *dup;           /* unused entry */
@@ -49,36 +47,46 @@ xfs_dir2_data_check(
         int                     stale;          /* count of stale leaves */
         struct xfs_name         name;
  
-       mp = dp->i_mount;
-       d = bp->data;
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-       bf = d->hdr.bestfree;
-       p = (char *)d->u;
-       if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-               btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+       mp = bp->b_target->bt_mount;
+       hdr = bp->b_addr;
+       bf = hdr->bestfree;
+       p = (char *)(hdr + 1);
+
+       switch (be32_to_cpu(hdr->magic)) {
+       case XFS_DIR2_BLOCK_MAGIC:
+               btp = xfs_dir2_block_tail_p(mp, hdr);
                 lep = xfs_dir2_block_leaf_p(btp);
                 endp = (char *)lep;
-       } else
-               endp = (char *)d + mp->m_dirblksize;
+               break;
+       case XFS_DIR2_DATA_MAGIC:
+               endp = (char *)hdr + mp->m_dirblksize;
+               break;
+       default:
+               XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+
         count = lastfree = freeseen = 0;
         /*
          * Account for zero bestfree entries.
          */
         if (!bf[0].length) {
-               ASSERT(!bf[0].offset);
+               XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
                 freeseen |= 1 << 0;
         }
         if (!bf[1].length) {
-               ASSERT(!bf[1].offset);
+               XFS_WANT_CORRUPTED_RETURN(!bf[1].offset);
                 freeseen |= 1 << 1;
         }
         if (!bf[2].length) {
-               ASSERT(!bf[2].offset);
+               XFS_WANT_CORRUPTED_RETURN(!bf[2].offset);
                 freeseen |= 1 << 2;
         }
-       ASSERT(be16_to_cpu(bf[0].length) >= be16_to_cpu(bf[1].length));
-       ASSERT(be16_to_cpu(bf[1].length) >= be16_to_cpu(bf[2].length));
+
+       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >=
+                                               be16_to_cpu(bf[1].length));
+       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >=
+                                               be16_to_cpu(bf[2].length));
         /*
          * Loop over the data/unused entries.
          */
@@ -90,17 +98,20 @@ xfs_dir2_data_check(
                  * doesn't need to be there.
                  */
                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       ASSERT(lastfree == 0);
-                       ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
-                              (char *)dup - (char *)d);
-                       dfp = xfs_dir2_data_freefind(d, dup);
+                       XFS_WANT_CORRUPTED_RETURN(lastfree == 0);
+                       XFS_WANT_CORRUPTED_RETURN(
+                               be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
+                                              (char *)dup - (char *)hdr);
+                       dfp = xfs_dir2_data_freefind(hdr, dup);
                         if (dfp) {
                                 i = (int)(dfp - bf);
-                               ASSERT((freeseen & (1 << i)) == 0);
+                               XFS_WANT_CORRUPTED_RETURN(
+                                       (freeseen & (1 << i)) == 0);
                                 freeseen |= 1 << i;
                         } else {
-                               ASSERT(be16_to_cpu(dup->length) <=
-                                      be16_to_cpu(bf[2].length));
+                               XFS_WANT_CORRUPTED_RETURN(
+                                       be16_to_cpu(dup->length) <=
+                                               be16_to_cpu(bf[2].length));
                         }
                         p += be16_to_cpu(dup->length);
                         lastfree = 1;
@@ -113,16 +124,18 @@ xfs_dir2_data_check(
                  * The linear search is crude but this is DEBUG code.
                  */
                 dep = (xfs_dir2_data_entry_t *)p;
-               ASSERT(dep->namelen != 0);
-               ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
-               ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
-                      (char *)dep - (char *)d);
+               XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0);
+               XFS_WANT_CORRUPTED_RETURN(
+                       !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
+               XFS_WANT_CORRUPTED_RETURN(
+                       be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
+                                              (char *)dep - (char *)hdr);
                 count++;
                 lastfree = 0;
-               if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+               if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
                         addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                 (xfs_dir2_data_aoff_t)
-                               ((char *)dep - (char *)d));
+                               ((char *)dep - (char *)hdr));
                         name.name = dep->name;
                         name.len = dep->namelen;
                         hash = mp->m_dirnameops->hashname(&name);
@@ -131,26 +144,122 @@ xfs_dir2_data_check(
                                     be32_to_cpu(lep[i].hashval) == hash)
                                         break;
                         }
-                       ASSERT(i < be32_to_cpu(btp->count));
+                       XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
                 }
                 p += xfs_dir2_data_entsize(dep->namelen);
         }
         /*
          * Need to have seen all the entries and all the bestfree slots.
          */
-       ASSERT(freeseen == 7);
-       if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+       XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
+       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
                 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
-                       if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR)
+                       if (lep[i].address ==
+                           cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                                 stale++;
                         if (i > 0)
-                               ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
+                               XFS_WANT_CORRUPTED_RETURN(
+                                       be32_to_cpu(lep[i].hashval) >=
+                                               be32_to_cpu(lep[i - 1].hashval));
                 }
-               ASSERT(count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
-               ASSERT(stale == be32_to_cpu(btp->stale));
+               XFS_WANT_CORRUPTED_RETURN(count ==
+                       be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
+               XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale));
+       }
+       return 0;
+}
+
+static void
+xfs_dir2_data_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+       int                     block_ok = 0;
+
+       block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+       block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0;
+
+       if (!block_ok) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+/*
+ * Readahead of the first block of the directory when it is opened is completely
+ * oblivious to the format of the directory. Hence we can either get a block
+ * format buffer or a data format buffer on readahead.
+ */
+static void
+xfs_dir2_data_reada_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+
+       switch (be32_to_cpu(hdr->magic)) {
+       case XFS_DIR2_BLOCK_MAGIC:
+               bp->b_ops = &xfs_dir2_block_buf_ops;
+               bp->b_ops->verify_read(bp);
+               return;
+       case XFS_DIR2_DATA_MAGIC:
+               xfs_dir2_data_verify(bp);
+               return;
+       default:
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               break;
         }
  }
-#endif
+
+static void
+xfs_dir2_data_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_data_verify(bp);
+}
+
+static void
+xfs_dir2_data_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_data_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_dir2_data_buf_ops = {
+       .verify_read = xfs_dir2_data_read_verify,
+       .verify_write = xfs_dir2_data_write_verify,
+};
+
+static const struct xfs_buf_ops xfs_dir2_data_reada_buf_ops = {
+       .verify_read = xfs_dir2_data_reada_verify,
+       .verify_write = xfs_dir2_data_write_verify,
+};
+
+
+int
+xfs_dir2_data_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mapped_bno,
+       struct xfs_buf          **bpp)
+{
+       return xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
+                               XFS_DATA_FORK, &xfs_dir2_data_buf_ops);
+}
+
+int
+xfs_dir2_data_readahead(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mapped_bno)
+{
+       return xfs_da_reada_buf(tp, dp, bno, mapped_bno,
+                               XFS_DATA_FORK, &xfs_dir2_data_reada_buf_ops);
+}
  
  /*
   * Given a data block and an unused entry from that block,
@@ -158,7 +267,7 @@ xfs_dir2_data_check(
   */
  xfs_dir2_data_free_t *
  xfs_dir2_data_freefind(
-       xfs_dir2_data_t         *d,             /* data block */
+       xfs_dir2_data_hdr_t     *hdr,           /* data block */
         xfs_dir2_data_unused_t  *dup)           /* data unused entry */
  {
         xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
@@ -168,17 +277,17 @@ xfs_dir2_data_freefind(
         int                     seenzero;       /* saw a 0 bestfree entry */
  #endif
  
-       off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d);
+       off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
  #if defined(DEBUG) && defined(__KERNEL__)
         /*
          * Validate some consistency in the bestfree table.
          * Check order, non-overlapping entries, and if we find the
          * one we're looking for it has to be exact.
          */
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-       for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
-            dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+       for (dfp = &hdr->bestfree[0], seenzero = matched = 0;
+            dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
              dfp++) {
                 if (!dfp->offset) {
                         ASSERT(!dfp->length);
@@ -194,7 +303,7 @@ xfs_dir2_data_freefind(
                 else
                         ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
                 ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
-               if (dfp > &d->hdr.bestfree[0])
+               if (dfp > &hdr->bestfree[0])
                         ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
         }
  #endif
@@ -203,13 +312,13 @@ xfs_dir2_data_freefind(
          * it can't be there since they're sorted.
          */
         if (be16_to_cpu(dup->length) <
-           be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
+           be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
                 return NULL;
         /*
          * Look at the three bestfree entries for our guy.
          */
-       for (dfp = &d->hdr.bestfree[0];
-            dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+       for (dfp = &hdr->bestfree[0];
+            dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
              dfp++) {
                 if (!dfp->offset)
                         return NULL;
@@ -227,7 +336,7 @@ xfs_dir2_data_freefind(
   */
  xfs_dir2_data_free_t *                         /* entry inserted */
  xfs_dir2_data_freeinsert(
-       xfs_dir2_data_t         *d,             /* data block pointer */
+       xfs_dir2_data_hdr_t     *hdr,           /* data block pointer */
         xfs_dir2_data_unused_t  *dup,           /* unused space */
         int                     *loghead)       /* log the data header (out) */
  {
@@ -235,12 +344,13 @@ xfs_dir2_data_freeinsert(
         xfs_dir2_data_free_t    new;            /* new bestfree entry */
  
  #ifdef __KERNEL__
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
  #endif
-       dfp = d->hdr.bestfree;
+       dfp = hdr->bestfree;
         new.length = dup->length;
-       new.offset = cpu_to_be16((char *)dup - (char *)d);
+       new.offset = cpu_to_be16((char *)dup - (char *)hdr);
+
         /*
          * Insert at position 0, 1, or 2; or not at all.
          */
@@ -270,36 +380,36 @@ xfs_dir2_data_freeinsert(
   */
  STATIC void
  xfs_dir2_data_freeremove(
-       xfs_dir2_data_t         *d,             /* data block pointer */
+       xfs_dir2_data_hdr_t     *hdr,           /* data block header */
         xfs_dir2_data_free_t    *dfp,           /* bestfree entry pointer */
         int                     *loghead)       /* out: log data header */
  {
  #ifdef __KERNEL__
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
  #endif
         /*
          * It's the first entry, slide the next 2 up.
          */
-       if (dfp == &d->hdr.bestfree[0]) {
-               d->hdr.bestfree[0] = d->hdr.bestfree[1];
-               d->hdr.bestfree[1] = d->hdr.bestfree[2];
+       if (dfp == &hdr->bestfree[0]) {
+               hdr->bestfree[0] = hdr->bestfree[1];
+               hdr->bestfree[1] = hdr->bestfree[2];
         }
         /*
          * It's the second entry, slide the 3rd entry up.
          */
-       else if (dfp == &d->hdr.bestfree[1])
-               d->hdr.bestfree[1] = d->hdr.bestfree[2];
+       else if (dfp == &hdr->bestfree[1])
+               hdr->bestfree[1] = hdr->bestfree[2];
         /*
          * Must be the last entry.
          */
         else
-               ASSERT(dfp == &d->hdr.bestfree[2]);
+               ASSERT(dfp == &hdr->bestfree[2]);
         /*
          * Clear the 3rd entry, must be zero now.
          */
-       d->hdr.bestfree[2].length = 0;
-       d->hdr.bestfree[2].offset = 0;
+       hdr->bestfree[2].length = 0;
+       hdr->bestfree[2].offset = 0;
         *loghead = 1;
  }
  
@@ -309,7 +419,7 @@ xfs_dir2_data_freeremove(
  void
  xfs_dir2_data_freescan(
         xfs_mount_t             *mp,            /* filesystem mount point */
-       xfs_dir2_data_t         *d,             /* data block pointer */
+       xfs_dir2_data_hdr_t     *hdr,           /* data block header */
         int                     *loghead)       /* out: log data header */
  {
         xfs_dir2_block_tail_t   *btp;           /* block tail */
@@ -319,23 +429,23 @@ xfs_dir2_data_freescan(
         char                    *p;             /* current entry pointer */
  
  #ifdef __KERNEL__
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
  #endif
         /*
          * Start by clearing the table.
          */
-       memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree));
+       memset(hdr->bestfree, 0, sizeof(hdr->bestfree));
         *loghead = 1;
         /*
          * Set up pointers.
          */
-       p = (char *)d->u;
-       if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-               btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+       p = (char *)(hdr + 1);
+       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
+               btp = xfs_dir2_block_tail_p(mp, hdr);
                 endp = (char *)xfs_dir2_block_leaf_p(btp);
         } else
-               endp = (char *)d + mp->m_dirblksize;
+               endp = (char *)hdr + mp->m_dirblksize;
         /*
          * Loop over the block's entries.
          */
@@ -345,9 +455,9 @@ xfs_dir2_data_freescan(
                  * If it's a free entry, insert it.
                  */
                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       ASSERT((char *)dup - (char *)d ==
+                       ASSERT((char *)dup - (char *)hdr ==
                                be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
-                       xfs_dir2_data_freeinsert(d, dup, loghead);
+                       xfs_dir2_data_freeinsert(hdr, dup, loghead);
                         p += be16_to_cpu(dup->length);
                 }
                 /*
@@ -355,7 +465,7 @@ xfs_dir2_data_freescan(
                  */
                 else {
                         dep = (xfs_dir2_data_entry_t *)p;
-                       ASSERT((char *)dep - (char *)d ==
+                       ASSERT((char *)dep - (char *)hdr ==
                                be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
                         p += xfs_dir2_data_entsize(dep->namelen);
                 }
@@ -370,10 +480,10 @@ int                                               /* error */
  xfs_dir2_data_init(
         xfs_da_args_t           *args,          /* directory operation args */
         xfs_dir2_db_t           blkno,          /* logical dir block number */
-       xfs_dabuf_t             **bpp)          /* output block buffer */
+       struct xfs_buf          **bpp)          /* output block buffer */
  {
-       xfs_dabuf_t             *bp;            /* block buffer */
-       xfs_dir2_data_t         *d;             /* pointer to block */
+       struct xfs_buf          *bp;            /* block buffer */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
         xfs_inode_t             *dp;            /* incore directory inode */
         xfs_dir2_data_unused_t  *dup;           /* unused entry pointer */
         int                     error;          /* error return value */
@@ -390,30 +500,31 @@ xfs_dir2_data_init(
          */
         error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
                 XFS_DATA_FORK);
-       if (error) {
+       if (error)
                 return error;
-       }
-       ASSERT(bp != NULL);
+       bp->b_ops = &xfs_dir2_data_buf_ops;
+
         /*
          * Initialize the header.
          */
-       d = bp->data;
-       d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
-       d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr));
+       hdr = bp->b_addr;
+       hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+       hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
         for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
-               d->hdr.bestfree[i].length = 0;
-               d->hdr.bestfree[i].offset = 0;
+               hdr->bestfree[i].length = 0;
+               hdr->bestfree[i].offset = 0;
         }
+
         /*
          * Set up an unused entry for the block's body.
          */
-       dup = &d->u[0].unused;
+       dup = (xfs_dir2_data_unused_t *)(hdr + 1);
         dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
  
-       t=mp->m_dirblksize - (uint)sizeof(d->hdr);
-       d->hdr.bestfree[0].length = cpu_to_be16(t);
+       t = mp->m_dirblksize - (uint)sizeof(*hdr);
+       hdr->bestfree[0].length = cpu_to_be16(t);
         dup->length = cpu_to_be16(t);
-       *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
+       *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
         /*
          * Log it and return it.
          */
@@ -428,18 +539,18 @@ xfs_dir2_data_init(
   */
  void
  xfs_dir2_data_log_entry(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp,            /* block buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
         xfs_dir2_data_entry_t   *dep)           /* data entry pointer */
  {
-       xfs_dir2_data_t         *d;             /* data block pointer */
+       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
  
-       d = bp->data;
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-       xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
+       xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
                 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
-                      (char *)d - 1));
+                      (char *)hdr - 1));
  }
  
  /*
@@ -447,16 +558,15 @@ xfs_dir2_data_log_entry(
   */
  void
  xfs_dir2_data_log_header(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp)            /* block buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp)
  {
-       xfs_dir2_data_t         *d;             /* data block pointer */
+       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
  
-       d = bp->data;
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-       xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d),
-               (uint)(sizeof(d->hdr) - 1));
+       xfs_trans_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
  }
  
  /*
@@ -464,27 +574,27 @@ xfs_dir2_data_log_header(
   */
  void
  xfs_dir2_data_log_unused(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp,            /* block buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
         xfs_dir2_data_unused_t  *dup)           /* data unused pointer */
  {
-       xfs_dir2_data_t         *d;             /* data block pointer */
+       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
  
-       d = bp->data;
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
         /*
          * Log the first part of the unused entry.
          */
-       xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d),
+       xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
                 (uint)((char *)&dup->length + sizeof(dup->length) -
-                      1 - (char *)d));
+                      1 - (char *)hdr));
         /*
          * Log the end (tag) of the unused entry.
          */
-       xfs_da_log_buf(tp, bp,
-               (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
-               (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
+       xfs_trans_log_buf(tp, bp,
+               (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
+               (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
                        sizeof(xfs_dir2_data_off_t) - 1));
  }
  
@@ -494,14 +604,14 @@ xfs_dir2_data_log_unused(
   */
  void
  xfs_dir2_data_make_free(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp,            /* block buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
         xfs_dir2_data_aoff_t    offset,         /* starting byte offset */
         xfs_dir2_data_aoff_t    len,            /* length in bytes */
         int                     *needlogp,      /* out: log header */
         int                     *needscanp)     /* out: regen bestfree */
  {
-       xfs_dir2_data_t         *d;             /* data block pointer */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block pointer */
         xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
         char                    *endptr;        /* end of data area */
         xfs_mount_t             *mp;            /* filesystem mount point */
@@ -511,28 +621,29 @@ xfs_dir2_data_make_free(
         xfs_dir2_data_unused_t  *prevdup;       /* unused entry before us */
  
         mp = tp->t_mountp;
-       d = bp->data;
+       hdr = bp->b_addr;
+
         /*
          * Figure out where the end of the data area is.
          */
-       if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC)
-               endptr = (char *)d + mp->m_dirblksize;
+       if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC))
+               endptr = (char *)hdr + mp->m_dirblksize;
         else {
                 xfs_dir2_block_tail_t   *btp;   /* block tail */
  
-               ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-               btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+               ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+               btp = xfs_dir2_block_tail_p(mp, hdr);
                 endptr = (char *)xfs_dir2_block_leaf_p(btp);
         }
         /*
          * If this isn't the start of the block, then back up to
          * the previous entry and see if it's free.
          */
-       if (offset > sizeof(d->hdr)) {
+       if (offset > sizeof(*hdr)) {
                 __be16                  *tagp;  /* tag just before us */
  
-               tagp = (__be16 *)((char *)d + offset) - 1;
-               prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp));
+               tagp = (__be16 *)((char *)hdr + offset) - 1;
+               prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
                 if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
                         prevdup = NULL;
         } else
@@ -541,9 +652,9 @@ xfs_dir2_data_make_free(
          * If this isn't the end of the block, see if the entry after
          * us is free.
          */
-       if ((char *)d + offset + len < endptr) {
+       if ((char *)hdr + offset + len < endptr) {
                 postdup =
-                       (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+                       (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
                 if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
                         postdup = NULL;
         } else
@@ -560,21 +671,21 @@ xfs_dir2_data_make_free(
                 /*
                  * See if prevdup and/or postdup are in bestfree table.
                  */
-               dfp = xfs_dir2_data_freefind(d, prevdup);
-               dfp2 = xfs_dir2_data_freefind(d, postdup);
+               dfp = xfs_dir2_data_freefind(hdr, prevdup);
+               dfp2 = xfs_dir2_data_freefind(hdr, postdup);
                 /*
                  * We need a rescan unless there are exactly 2 free entries
                  * namely our two.  Then we know what's happening, otherwise
                  * since the third bestfree is there, there might be more
                  * entries.
                  */
-               needscan = (d->hdr.bestfree[2].length != 0);
+               needscan = (hdr->bestfree[2].length != 0);
                 /*
                  * Fix up the new big freespace.
                  */
                 be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
                 *xfs_dir2_data_unused_tag_p(prevdup) =
-                       cpu_to_be16((char *)prevdup - (char *)d);
+                       cpu_to_be16((char *)prevdup - (char *)hdr);
                 xfs_dir2_data_log_unused(tp, bp, prevdup);
                 if (!needscan) {
                         /*
@@ -584,18 +695,18 @@ xfs_dir2_data_make_free(
                          * Remove entry 1 first then entry 0.
                          */
                         ASSERT(dfp && dfp2);
-                       if (dfp == &d->hdr.bestfree[1]) {
-                               dfp = &d->hdr.bestfree[0];
+                       if (dfp == &hdr->bestfree[1]) {
+                               dfp = &hdr->bestfree[0];
                                 ASSERT(dfp2 == dfp);
-                               dfp2 = &d->hdr.bestfree[1];
+                               dfp2 = &hdr->bestfree[1];
                         }
-                       xfs_dir2_data_freeremove(d, dfp2, needlogp);
-                       xfs_dir2_data_freeremove(d, dfp, needlogp);
+                       xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
+                       xfs_dir2_data_freeremove(hdr, dfp, needlogp);
                         /*
                          * Now insert the new entry.
                          */
-                       dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
-                       ASSERT(dfp == &d->hdr.bestfree[0]);
+                       dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
+                       ASSERT(dfp == &hdr->bestfree[0]);
                         ASSERT(dfp->length == prevdup->length);
                         ASSERT(!dfp[1].length);
                         ASSERT(!dfp[2].length);
@@ -605,10 +716,10 @@ xfs_dir2_data_make_free(
          * The entry before us is free, merge with it.
          */
         else if (prevdup) {
-               dfp = xfs_dir2_data_freefind(d, prevdup);
+               dfp = xfs_dir2_data_freefind(hdr, prevdup);
                 be16_add_cpu(&prevdup->length, len);
                 *xfs_dir2_data_unused_tag_p(prevdup) =
-                       cpu_to_be16((char *)prevdup - (char *)d);
+                       cpu_to_be16((char *)prevdup - (char *)hdr);
                 xfs_dir2_data_log_unused(tp, bp, prevdup);
                 /*
                  * If the previous entry was in the table, the new entry
@@ -616,27 +727,27 @@ xfs_dir2_data_make_free(
                  * the old one and add the new one.
                  */
                 if (dfp) {
-                       xfs_dir2_data_freeremove(d, dfp, needlogp);
-                       (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+                       xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+                       xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
                 }
                 /*
                  * Otherwise we need a scan if the new entry is big enough.
                  */
                 else {
                         needscan = be16_to_cpu(prevdup->length) >
-                                  be16_to_cpu(d->hdr.bestfree[2].length);
+                                  be16_to_cpu(hdr->bestfree[2].length);
                 }
         }
         /*
          * The following entry is free, merge with it.
          */
         else if (postdup) {
-               dfp = xfs_dir2_data_freefind(d, postdup);
-               newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+               dfp = xfs_dir2_data_freefind(hdr, postdup);
+               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
                 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                 newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
                 *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)d);
+                       cpu_to_be16((char *)newdup - (char *)hdr);
                 xfs_dir2_data_log_unused(tp, bp, newdup);
                 /*
                  * If the following entry was in the table, the new entry
@@ -644,28 +755,28 @@ xfs_dir2_data_make_free(
                  * the old one and add the new one.
                  */
                 if (dfp) {
-                       xfs_dir2_data_freeremove(d, dfp, needlogp);
-                       (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                       xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+                       xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
                 }
                 /*
                  * Otherwise we need a scan if the new entry is big enough.
                  */
                 else {
                         needscan = be16_to_cpu(newdup->length) >
-                                  be16_to_cpu(d->hdr.bestfree[2].length);
+                                  be16_to_cpu(hdr->bestfree[2].length);
                 }
         }
         /*
          * Neither neighbor is free.  Make a new entry.
          */
         else {
-               newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
                 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                 newdup->length = cpu_to_be16(len);
                 *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)d);
+                       cpu_to_be16((char *)newdup - (char *)hdr);
                 xfs_dir2_data_log_unused(tp, bp, newdup);
-               (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+               xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
         }
         *needscanp = needscan;
  }
@@ -675,15 +786,15 @@ xfs_dir2_data_make_free(
   */
  void
  xfs_dir2_data_use_free(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp,            /* data block buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
         xfs_dir2_data_unused_t  *dup,           /* unused entry */
         xfs_dir2_data_aoff_t    offset,         /* starting offset to use */
         xfs_dir2_data_aoff_t    len,            /* length to use */
         int                     *needlogp,      /* out: need to log header */
         int                     *needscanp)     /* out: need regen bestfree */
  {
-       xfs_dir2_data_t         *d;             /* data block */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
         xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
         int                     matchback;      /* matches end of freespace */
         int                     matchfront;     /* matches start of freespace */
@@ -692,24 +803,24 @@ xfs_dir2_data_use_free(
         xfs_dir2_data_unused_t  *newdup2;       /* another new unused entry */
         int                     oldlen;         /* old unused entry's length */
  
-       d = bp->data;
-       ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-              be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+       hdr = bp->b_addr;
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
         ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
-       ASSERT(offset >= (char *)dup - (char *)d);
-       ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
-       ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+       ASSERT(offset >= (char *)dup - (char *)hdr);
+       ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
+       ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
         /*
          * Look up the entry in the bestfree table.
          */
-       dfp = xfs_dir2_data_freefind(d, dup);
+       dfp = xfs_dir2_data_freefind(hdr, dup);
         oldlen = be16_to_cpu(dup->length);
-       ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
+       ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length));
         /*
          * Check for alignment with front and back of the entry.
          */
-       matchfront = (char *)dup - (char *)d == offset;
-       matchback = (char *)dup + oldlen - (char *)d == offset + len;
+       matchfront = (char *)dup - (char *)hdr == offset;
+       matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
         ASSERT(*needscanp == 0);
         needscan = 0;
         /*
@@ -718,9 +829,9 @@ xfs_dir2_data_use_free(
          */
         if (matchfront && matchback) {
                 if (dfp) {
-                       needscan = (d->hdr.bestfree[2].offset != 0);
+                       needscan = (hdr->bestfree[2].offset != 0);
                         if (!needscan)
-                               xfs_dir2_data_freeremove(d, dfp, needlogp);
+                               xfs_dir2_data_freeremove(hdr, dfp, needlogp);
                 }
         }
         /*
@@ -728,27 +839,27 @@ xfs_dir2_data_use_free(
          * Make a new entry with the remaining freespace.
          */
         else if (matchfront) {
-               newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
                 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                 newdup->length = cpu_to_be16(oldlen - len);
                 *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)d);
+                       cpu_to_be16((char *)newdup - (char *)hdr);
                 xfs_dir2_data_log_unused(tp, bp, newdup);
                 /*
                  * If it was in the table, remove it and add the new one.
                  */
                 if (dfp) {
-                       xfs_dir2_data_freeremove(d, dfp, needlogp);
-                       dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                       xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+                       dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
                         ASSERT(dfp != NULL);
                         ASSERT(dfp->length == newdup->length);
-                       ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
+                       ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
                         /*
                          * If we got inserted at the last slot,
                          * that means we don't know if there was a better
                          * choice for the last slot, or not.  Rescan.
                          */
-                       needscan = dfp == &d->hdr.bestfree[2];
+                       needscan = dfp == &hdr->bestfree[2];
                 }
         }
         /*
@@ -757,25 +868,25 @@ xfs_dir2_data_use_free(
          */
         else if (matchback) {
                 newdup = dup;
-               newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
+               newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
                 *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)d);
+                       cpu_to_be16((char *)newdup - (char *)hdr);
                 xfs_dir2_data_log_unused(tp, bp, newdup);
                 /*
                  * If it was in the table, remove it and add the new one.
                  */
                 if (dfp) {
-                       xfs_dir2_data_freeremove(d, dfp, needlogp);
-                       dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                       xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+                       dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
                         ASSERT(dfp != NULL);
                         ASSERT(dfp->length == newdup->length);
-                       ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
+                       ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
                         /*
                          * If we got inserted at the last slot,
                          * that means we don't know if there was a better
                          * choice for the last slot, or not.  Rescan.
                          */
-                       needscan = dfp == &d->hdr.bestfree[2];
+                       needscan = dfp == &hdr->bestfree[2];
                 }
         }
         /*
@@ -784,15 +895,15 @@ xfs_dir2_data_use_free(
          */
         else {
                 newdup = dup;
-               newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
+               newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
                 *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)d);
+                       cpu_to_be16((char *)newdup - (char *)hdr);
                 xfs_dir2_data_log_unused(tp, bp, newdup);
-               newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+               newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
                 newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                 newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
                 *xfs_dir2_data_unused_tag_p(newdup2) =
-                       cpu_to_be16((char *)newdup2 - (char *)d);
+                       cpu_to_be16((char *)newdup2 - (char *)hdr);
                 xfs_dir2_data_log_unused(tp, bp, newdup2);
                 /*
                  * If the old entry was in the table, we need to scan
@@ -803,13 +914,12 @@ xfs_dir2_data_use_free(
                  * the 2 new will work.
                  */
                 if (dfp) {
-                       needscan = (d->hdr.bestfree[2].length != 0);
+                       needscan = (hdr->bestfree[2].length != 0);
                         if (!needscan) {
-                               xfs_dir2_data_freeremove(d, dfp, needlogp);
-                               (void)xfs_dir2_data_freeinsert(d, newdup,
-                                       needlogp);
-                               (void)xfs_dir2_data_freeinsert(d, newdup2,
-                                       needlogp);
+                               xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+                               xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
+                               xfs_dir2_data_freeinsert(hdr, newdup2,
+                                                        needlogp);
                         }
                 }
         }
diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c

index 9ce2320f41b399cf17dcc3a89f50916dd51a4488..d303813c2199208dcb50d7b4ea5e9aadadf1540b 100644 (file)
--- a/libxfs/xfs_dir2_leaf.c
+++ b/libxfs/xfs_dir2_leaf.c
@@ -22,16 +22,93 @@
   * Local function declarations.
   */
  #ifdef DEBUG
-static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
+static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
  #else
  #define        xfs_dir2_leaf_check(dp, bp)
  #endif
-static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
-                                   int *indexp, xfs_dabuf_t **dbpp);
-static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
+                                   int *indexp, struct xfs_buf **dbpp);
+static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
                                     int first, int last);
-static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
+static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
  
+static void
+xfs_dir2_leaf_verify(
+       struct xfs_buf          *bp,
+       __be16                  magic)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir2_leaf_hdr *hdr = bp->b_addr;
+       int                     block_ok = 0;
+
+       block_ok = hdr->info.magic == magic;
+       if (!block_ok) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+static void
+xfs_dir2_leaf1_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
+}
+
+static void
+xfs_dir2_leaf1_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
+}
+
+void
+xfs_dir2_leafn_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+}
+
+void
+xfs_dir2_leafn_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+}
+
+static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = {
+       .verify_read = xfs_dir2_leaf1_read_verify,
+       .verify_write = xfs_dir2_leaf1_write_verify,
+};
+
+const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = {
+       .verify_read = xfs_dir2_leafn_read_verify,
+       .verify_write = xfs_dir2_leafn_write_verify,
+};
+
+static int
+xfs_dir2_leaf_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp)
+{
+       return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
+                               XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops);
+}
+
+int
+xfs_dir2_leafn_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp)
+{
+       return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
+                               XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops);
+}
  
  /*
   * Convert a block form directory to a leaf form directory.
@@ -39,16 +116,16 @@ static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
  int                                            /* error */
  xfs_dir2_block_to_leaf(
         xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dabuf_t             *dbp)           /* input block's buffer */
+       struct xfs_buf          *dbp)           /* input block's buffer */
  {
         __be16                  *bestsp;        /* leaf's bestsp entries */
         xfs_dablk_t             blkno;          /* leaf block's bno */
-       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_leaf_entry_t   *blp;           /* block's leaf entries */
         xfs_dir2_block_tail_t   *btp;           /* block's tail */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return code */
-       xfs_dabuf_t             *lbp;           /* leaf block's buffer */
+       struct xfs_buf          *lbp;           /* leaf block's buffer */
         xfs_dir2_db_t           ldb;            /* leaf block's bno */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         xfs_dir2_leaf_tail_t    *ltp;           /* leaf's tail */
@@ -79,10 +156,10 @@ xfs_dir2_block_to_leaf(
                 return error;
         }
         ASSERT(lbp != NULL);
-       leaf = lbp->data;
-       block = dbp->data;
+       leaf = lbp->b_addr;
+       hdr = dbp->b_addr;
         xfs_dir2_data_check(dp, dbp);
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, hdr);
         blp = xfs_dir2_block_leaf_p(btp);
         /*
          * Set the counts in the leaf header.
@@ -102,23 +179,24 @@ xfs_dir2_block_to_leaf(
          * tail be free.
          */
         xfs_dir2_data_make_free(tp, dbp,
-               (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
-               (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
+               (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
+               (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
                                        (char *)blp),
                 &needlog, &needscan);
         /*
          * Fix up the block header, make it a data block.
          */
-       block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+       dbp->b_ops = &xfs_dir2_data_buf_ops;
+       hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
         if (needscan)
-               xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+               xfs_dir2_data_freescan(mp, hdr, &needlog);
         /*
          * Set up leaf tail and bests table.
          */
         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
         ltp->bestcount = cpu_to_be32(1);
         bestsp = xfs_dir2_leaf_bests_p(ltp);
-       bestsp[0] =  block->hdr.bestfree[0].length;
+       bestsp[0] =  hdr->bestfree[0].length;
         /*
          * Log the data header and leaf bests table.
          */
@@ -127,10 +205,134 @@ xfs_dir2_block_to_leaf(
         xfs_dir2_leaf_check(dp, lbp);
         xfs_dir2_data_check(dp, dbp);
         xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
-       xfs_da_buf_done(lbp);
         return 0;
  }
  
+STATIC void
+xfs_dir2_leaf_find_stale(
+       struct xfs_dir2_leaf    *leaf,
+       int                     index,
+       int                     *lowstale,
+       int                     *highstale)
+{
+       /*
+        * Find the first stale entry before our index, if any.
+        */
+       for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
+               if (leaf->ents[*lowstale].address ==
+                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                       break;
+       }
+
+       /*
+        * Find the first stale entry at or after our index, if any.
+        * Stop if the result would require moving more entries than using
+        * lowstale.
+        */
+       for (*highstale = index;
+            *highstale < be16_to_cpu(leaf->hdr.count);
+            ++*highstale) {
+               if (leaf->ents[*highstale].address ==
+                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                       break;
+               if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
+                       break;
+       }
+}
+
+struct xfs_dir2_leaf_entry *
+xfs_dir2_leaf_find_entry(
+       xfs_dir2_leaf_t         *leaf,          /* leaf structure */
+       int                     index,          /* leaf table position */
+       int                     compact,        /* need to compact leaves */
+       int                     lowstale,       /* index of prev stale leaf */
+       int                     highstale,      /* index of next stale leaf */
+       int                     *lfloglow,      /* low leaf logging index */
+       int                     *lfloghigh)     /* high leaf logging index */
+{
+       if (!leaf->hdr.stale) {
+               xfs_dir2_leaf_entry_t   *lep;   /* leaf entry table pointer */
+
+               /*
+                * Now we need to make room to insert the leaf entry.
+                *
+                * If there are no stale entries, just insert a hole at index.
+                */
+               lep = &leaf->ents[index];
+               if (index < be16_to_cpu(leaf->hdr.count))
+                       memmove(lep + 1, lep,
+                               (be16_to_cpu(leaf->hdr.count) - index) *
+                                sizeof(*lep));
+
+               /*
+                * Record low and high logging indices for the leaf.
+                */
+               *lfloglow = index;
+               *lfloghigh = be16_to_cpu(leaf->hdr.count);
+               be16_add_cpu(&leaf->hdr.count, 1);
+               return lep;
+       }
+
+       /*
+        * There are stale entries.
+        *
+        * We will use one of them for the new entry.  It's probably not at
+        * the right location, so we'll have to shift some up or down first.
+        *
+        * If we didn't compact before, we need to find the nearest stale
+        * entries before and after our insertion point.
+        */
+       if (compact == 0)
+               xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
+
+       /*
+        * If the low one is better, use it.
+        */
+       if (lowstale >= 0 &&
+           (highstale == be16_to_cpu(leaf->hdr.count) ||
+            index - lowstale - 1 < highstale - index)) {
+               ASSERT(index - lowstale - 1 >= 0);
+               ASSERT(leaf->ents[lowstale].address ==
+                      cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+
+               /*
+                * Copy entries up to cover the stale entry and make room
+                * for the new entry.
+                */
+               if (index - lowstale - 1 > 0) {
+                       memmove(&leaf->ents[lowstale],
+                               &leaf->ents[lowstale + 1],
+                               (index - lowstale - 1) *
+                               sizeof(xfs_dir2_leaf_entry_t));
+               }
+               *lfloglow = MIN(lowstale, *lfloglow);
+               *lfloghigh = MAX(index - 1, *lfloghigh);
+               be16_add_cpu(&leaf->hdr.stale, -1);
+               return &leaf->ents[index - 1];
+       }
+
+       /*
+        * The high one is better, so use that one.
+        */
+       ASSERT(highstale - index >= 0);
+       ASSERT(leaf->ents[highstale].address ==
+              cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+
+       /*
+        * Copy entries down to cover the stale entry and make room for the
+        * new entry.
+        */
+       if (highstale - index > 0) {
+               memmove(&leaf->ents[index + 1],
+                       &leaf->ents[index],
+                       (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
+       }
+       *lfloglow = MIN(index, *lfloglow);
+       *lfloghigh = MAX(highstale, *lfloghigh);
+       be16_add_cpu(&leaf->hdr.stale, -1);
+       return &leaf->ents[index];
+}
+
  /*
   * Add an entry to a leaf form directory.
   */
@@ -140,8 +342,8 @@ xfs_dir2_leaf_addname(
  {
         __be16                  *bestsp;        /* freespace table in leaf */
         int                     compact;        /* need to compact leaves */
-       xfs_dir2_data_t         *data;          /* data block structure */
-       xfs_dabuf_t             *dbp;           /* data block buffer */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       struct xfs_buf          *dbp;           /* data block buffer */
         xfs_dir2_data_entry_t   *dep;           /* data block entry */
         xfs_inode_t             *dp;            /* incore directory inode */
         xfs_dir2_data_unused_t  *dup;           /* data unused entry */
@@ -150,7 +352,7 @@ xfs_dir2_leaf_addname(
         int                     highstale;      /* index of next stale leaf */
         int                     i;              /* temporary, index */
         int                     index;          /* leaf table position */
-       xfs_dabuf_t             *lbp;           /* leaf's buffer */
+       struct xfs_buf          *lbp;           /* leaf's buffer */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         int                     length;         /* length of new entry */
         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry table pointer */
@@ -171,15 +373,11 @@ xfs_dir2_leaf_addname(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       /*
-        * Read the leaf block.
-        */
-       error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
-               XFS_DATA_FORK);
-       if (error) {
+
+       error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
+       if (error)
                 return error;
-       }
-       ASSERT(lbp != NULL);
+
         /*
          * Look up the entry by hash value and name.
          * We know it's not there, our caller has already done a lookup.
@@ -187,7 +385,7 @@ xfs_dir2_leaf_addname(
          * But if there are dup hash values the index is of the first of those.
          */
         index = xfs_dir2_leaf_search_hash(args, lbp);
-       leaf = lbp->data;
+       leaf = lbp->b_addr;
         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
         bestsp = xfs_dir2_leaf_bests_p(ltp);
         length = xfs_dir2_data_entsize(args->namelen);
@@ -204,7 +402,7 @@ xfs_dir2_leaf_addname(
                         continue;
                 i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
                 ASSERT(i < be32_to_cpu(ltp->bestcount));
-               ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
+               ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
                 if (be16_to_cpu(bestsp[i]) >= length) {
                         use_block = i;
                         break;
@@ -218,7 +416,8 @@ xfs_dir2_leaf_addname(
                         /*
                          * Remember a block we see that's missing.
                          */
-                       if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1)
+                       if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
+                           use_block == -1)
                                 use_block = i;
                         else if (be16_to_cpu(bestsp[i]) >= length) {
                                 use_block = i;
@@ -229,14 +428,17 @@ xfs_dir2_leaf_addname(
         /*
          * How many bytes do we need in the leaf block?
          */
-       needbytes =
-               (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) +
-               (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
+       needbytes = 0;
+       if (!leaf->hdr.stale)
+               needbytes += sizeof(xfs_dir2_leaf_entry_t);
+       if (use_block == -1)
+               needbytes += sizeof(xfs_dir2_data_off_t);
+
         /*
          * Now kill use_block if it refers to a missing block, so we
          * can use it as an indication of allocation needed.
          */
-       if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF)
+       if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
                 use_block = -1;
         /*
          * If we don't have enough free bytes but we can make enough
@@ -257,14 +459,13 @@ xfs_dir2_leaf_addname(
                  */
                 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
                                                         args->total == 0) {
-                       xfs_da_brelse(tp, lbp);
+                       xfs_trans_brelse(tp, lbp);
                         return XFS_ERROR(ENOSPC);
                 }
                 /*
                  * Convert to node form.
                  */
                 error = xfs_dir2_leaf_to_node(args, lbp);
-               xfs_da_buf_done(lbp);
                 if (error)
                         return error;
                 /*
@@ -282,7 +483,7 @@ xfs_dir2_leaf_addname(
          * a new data block.
          */
         if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
-               xfs_da_brelse(tp, lbp);
+               xfs_trans_brelse(tp, lbp);
                 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
         }
         /*
@@ -290,7 +491,7 @@ xfs_dir2_leaf_addname(
          * changed anything.
          */
         if (args->total == 0 && use_block == -1) {
-               xfs_da_brelse(tp, lbp);
+               xfs_trans_brelse(tp, lbp);
                 return XFS_ERROR(ENOSPC);
         }
         /*
@@ -321,14 +522,14 @@ xfs_dir2_leaf_addname(
                  */
                 if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
                                 &use_block))) {
-                       xfs_da_brelse(tp, lbp);
+                       xfs_trans_brelse(tp, lbp);
                         return error;
                 }
                 /*
                  * Initialize the block.
                  */
                 if ((error = xfs_dir2_data_init(args, use_block, &dbp))) {
-                       xfs_da_brelse(tp, lbp);
+                       xfs_trans_brelse(tp, lbp);
                         return error;
                 }
                 /*
@@ -348,37 +549,36 @@ xfs_dir2_leaf_addname(
                  */
                 else
                         xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
-               data = dbp->data;
-               bestsp[use_block] = data->hdr.bestfree[0].length;
+               hdr = dbp->b_addr;
+               bestsp[use_block] = hdr->bestfree[0].length;
                 grown = 1;
-       }
-       /*
-        * Already had space in some data block.
-        * Just read that one in.
-        */
-       else {
-               if ((error =
-                   xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
-                           -1, &dbp, XFS_DATA_FORK))) {
-                       xfs_da_brelse(tp, lbp);
+       } else {
+               /*
+                * Already had space in some data block.
+                * Just read that one in.
+                */
+               error = xfs_dir2_data_read(tp, dp,
+                                          xfs_dir2_db_to_da(mp, use_block),
+                                          -1, &dbp);
+               if (error) {
+                       xfs_trans_brelse(tp, lbp);
                         return error;
                 }
-               data = dbp->data;
+               hdr = dbp->b_addr;
                 grown = 0;
         }
-       xfs_dir2_data_check(dp, dbp);
         /*
          * Point to the biggest freespace in our data block.
          */
         dup = (xfs_dir2_data_unused_t *)
-             ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
+             ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
         ASSERT(be16_to_cpu(dup->length) >= length);
         needscan = needlog = 0;
         /*
          * Mark the initial part of our freespace in use for the new entry.
          */
         xfs_dir2_data_use_free(tp, dbp, dup,
-               (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
                 &needlog, &needscan);
         /*
          * Initialize our new entry (at last).
@@ -388,12 +588,12 @@ xfs_dir2_leaf_addname(
         dep->namelen = args->namelen;
         memcpy(dep->name, args->name, dep->namelen);
         tagp = xfs_dir2_data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)data);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
         /*
          * Need to scan fix up the bestfree table.
          */
         if (needscan)
-               xfs_dir2_data_freescan(mp, data, &needlog);
+               xfs_dir2_data_freescan(mp, hdr, &needlog);
         /*
          * Need to log the data block's header.
          */
@@ -404,107 +604,15 @@ xfs_dir2_leaf_addname(
          * If the bests table needs to be changed, do it.
          * Log the change unless we've already done that.
          */
-       if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
-               bestsp[use_block] = data->hdr.bestfree[0].length;
+       if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
+               bestsp[use_block] = hdr->bestfree[0].length;
                 if (!grown)
                         xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
         }
-       /*
-        * Now we need to make room to insert the leaf entry.
-        * If there are no stale entries, we just insert a hole at index.
-        */
-       if (!leaf->hdr.stale) {
-               /*
-                * lep is still good as the index leaf entry.
-                */
-               if (index < be16_to_cpu(leaf->hdr.count))
-                       memmove(lep + 1, lep,
-                               (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
-               /*
-                * Record low and high logging indices for the leaf.
-                */
-               lfloglow = index;
-               lfloghigh = be16_to_cpu(leaf->hdr.count);
-               be16_add_cpu(&leaf->hdr.count, 1);
-       }
-       /*
-        * There are stale entries.
-        * We will use one of them for the new entry.
-        * It's probably not at the right location, so we'll have to
-        * shift some up or down first.
-        */
-       else {
-               /*
-                * If we didn't compact before, we need to find the nearest
-                * stale entries before and after our insertion point.
-                */
-               if (compact == 0) {
-                       /*
-                        * Find the first stale entry before the insertion
-                        * point, if any.
-                        */
-                       for (lowstale = index - 1;
-                            lowstale >= 0 &&
-                               be32_to_cpu(leaf->ents[lowstale].address) !=
-                               XFS_DIR2_NULL_DATAPTR;
-                            lowstale--)
-                               continue;
-                       /*
-                        * Find the next stale entry at or after the insertion
-                        * point, if any.   Stop if we go so far that the
-                        * lowstale entry would be better.
-                        */
-                       for (highstale = index;
-                            highstale < be16_to_cpu(leaf->hdr.count) &&
-                               be32_to_cpu(leaf->ents[highstale].address) !=
-                               XFS_DIR2_NULL_DATAPTR &&
-                               (lowstale < 0 ||
-                                index - lowstale - 1 >= highstale - index);
-                            highstale++)
-                               continue;
-               }
-               /*
-                * If the low one is better, use it.
-                */
-               if (lowstale >= 0 &&
-                   (highstale == be16_to_cpu(leaf->hdr.count) ||
-                    index - lowstale - 1 < highstale - index)) {
-                       ASSERT(index - lowstale - 1 >= 0);
-                       ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
-                              XFS_DIR2_NULL_DATAPTR);
-                       /*
-                        * Copy entries up to cover the stale entry
-                        * and make room for the new entry.
-                        */
-                       if (index - lowstale - 1 > 0)
-                               memmove(&leaf->ents[lowstale],
-                                       &leaf->ents[lowstale + 1],
-                                       (index - lowstale - 1) * sizeof(*lep));
-                       lep = &leaf->ents[index - 1];
-                       lfloglow = MIN(lowstale, lfloglow);
-                       lfloghigh = MAX(index - 1, lfloghigh);
-               }
-               /*
-                * The high one is better, so use that one.
-                */
-               else {
-                       ASSERT(highstale - index >= 0);
-                       ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
-                              XFS_DIR2_NULL_DATAPTR);
-                       /*
-                        * Copy entries down to cover the stale entry
-                        * and make room for the new entry.
-                        */
-                       if (highstale - index > 0)
-                               memmove(&leaf->ents[index + 1],
-                                       &leaf->ents[index],
-                                       (highstale - index) * sizeof(*lep));
-                       lep = &leaf->ents[index];
-                       lfloglow = MIN(index, lfloglow);
-                       lfloghigh = MAX(highstale, lfloghigh);
-               }
-               be16_add_cpu(&leaf->hdr.stale, -1);
-       }
+
+       lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
+                                      highstale, &lfloglow, &lfloghigh);
+
         /*
          * Fill in the new leaf entry.
          */
@@ -517,9 +625,7 @@ xfs_dir2_leaf_addname(
         xfs_dir2_leaf_log_header(tp, lbp);
         xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
         xfs_dir2_leaf_check(dp, lbp);
-       xfs_da_buf_done(lbp);
         xfs_dir2_data_check(dp, dbp);
-       xfs_da_buf_done(dbp);
         return 0;
  }
  
@@ -530,8 +636,8 @@ xfs_dir2_leaf_addname(
   */
  STATIC void
  xfs_dir2_leaf_check(
-       xfs_inode_t             *dp,            /* incore directory inode */
-       xfs_dabuf_t             *bp)            /* leaf's buffer */
+       struct xfs_inode        *dp,            /* incore directory inode */
+       struct xfs_buf          *bp)            /* leaf's buffer */
  {
         int                     i;              /* leaf index */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
@@ -539,9 +645,9 @@ xfs_dir2_leaf_check(
         xfs_mount_t             *mp;            /* filesystem mount point */
         int                     stale;          /* count of stale leaves */
  
-       leaf = bp->data;
+       leaf = bp->b_addr;
         mp = dp->i_mount;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
         /*
          * This value is not restrictive enough.
          * Should factor in the size of the bests table as well.
@@ -561,7 +667,7 @@ xfs_dir2_leaf_check(
                 if (i + 1 < be16_to_cpu(leaf->hdr.count))
                         ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
                                be32_to_cpu(leaf->ents[i + 1].hashval));
-               if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+               if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                         stale++;
         }
         ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -575,14 +681,14 @@ xfs_dir2_leaf_check(
  void
  xfs_dir2_leaf_compact(
         xfs_da_args_t   *args,          /* operation arguments */
-       xfs_dabuf_t     *bp)            /* leaf buffer */
+       struct xfs_buf  *bp)            /* leaf buffer */
  {
         int             from;           /* source leaf index */
         xfs_dir2_leaf_t *leaf;          /* leaf structure */
         int             loglow;         /* first leaf entry to log */
         int             to;             /* target leaf index */
  
-       leaf = bp->data;
+       leaf = bp->b_addr;
         if (!leaf->hdr.stale) {
                 return;
         }
@@ -590,7 +696,8 @@ xfs_dir2_leaf_compact(
          * Compress out the stale entries in place.
          */
         for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
-               if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
+               if (leaf->ents[from].address ==
+                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                         continue;
                 /*
                  * Only actually copy the entries that are different.
@@ -623,7 +730,7 @@ xfs_dir2_leaf_compact(
   */
  void
  xfs_dir2_leaf_compact_x1(
-       xfs_dabuf_t     *bp,            /* leaf buffer */
+       struct xfs_buf  *bp,            /* leaf buffer */
         int             *indexp,        /* insertion index */
         int             *lowstalep,     /* out: stale entry before us */
         int             *highstalep,    /* out: stale entry after us */
@@ -639,27 +746,12 @@ xfs_dir2_leaf_compact_x1(
         int             newindex=0;     /* new insertion index */
         int             to;             /* destination copy index */
  
-       leaf = bp->data;
+       leaf = bp->b_addr;
         ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
         index = *indexp;
-       /*
-        * Find the first stale entry before our index, if any.
-        */
-       for (lowstale = index - 1;
-            lowstale >= 0 &&
-               be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
-            lowstale--)
-               continue;
-       /*
-        * Find the first stale entry at or after our index, if any.
-        * Stop if the answer would be worse than lowstale.
-        */
-       for (highstale = index;
-            highstale < be16_to_cpu(leaf->hdr.count) &&
-               be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
-               (lowstale < 0 || index - lowstale > highstale - index);
-            highstale++)
-               continue;
+
+       xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
+
         /*
          * Pick the better of lowstale and highstale.
          */
@@ -680,7 +772,8 @@ xfs_dir2_leaf_compact_x1(
                 if (index == from)
                         newindex = to;
                 if (from != keepstale &&
-                   be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) {
+                   leaf->ents[from].address ==
+                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
                         if (from == to)
                                 *lowlogp = to;
                         continue;
@@ -730,10 +823,10 @@ int
  xfs_dir2_leaf_init(
         xfs_da_args_t           *args,          /* operation arguments */
         xfs_dir2_db_t           bno,            /* directory block number */
-       xfs_dabuf_t             **bpp,          /* out: leaf buffer */
+       struct xfs_buf          **bpp,          /* out: leaf buffer */
         int                     magic)          /* magic number for block */
  {
-       xfs_dabuf_t             *bp;            /* leaf buffer */
+       struct xfs_buf          *bp;            /* leaf buffer */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return code */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
@@ -751,15 +844,14 @@ xfs_dir2_leaf_init(
          * Get the buffer for the block.
          */
         error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
-               XFS_DATA_FORK);
-       if (error) {
+                              XFS_DATA_FORK);
+       if (error)
                 return error;
-       }
-       ASSERT(bp != NULL);
-       leaf = bp->data;
+
         /*
          * Initialize the header.
          */
+       leaf = bp->b_addr;
         leaf->hdr.info.magic = cpu_to_be16(magic);
         leaf->hdr.info.forw = 0;
         leaf->hdr.info.back = 0;
@@ -772,10 +864,12 @@ xfs_dir2_leaf_init(
          * the block.
          */
         if (magic == XFS_DIR2_LEAF1_MAGIC) {
+               bp->b_ops = &xfs_dir2_leaf1_buf_ops;
                 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
                 ltp->bestcount = 0;
                 xfs_dir2_leaf_log_tail(tp, bp);
-       }
+       } else
+               bp->b_ops = &xfs_dir2_leafn_buf_ops;
         *bpp = bp;
         return 0;
  }
@@ -786,7 +880,7 @@ xfs_dir2_leaf_init(
  static void
  xfs_dir2_leaf_log_bests(
         xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp,            /* leaf buffer */
+       struct xfs_buf          *bp,            /* leaf buffer */
         int                     first,          /* first entry to log */
         int                     last)           /* last entry to log */
  {
@@ -795,12 +889,12 @@ xfs_dir2_leaf_log_bests(
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
         ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
         firstb = xfs_dir2_leaf_bests_p(ltp) + first;
         lastb = xfs_dir2_leaf_bests_p(ltp) + last;
-       xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
+       xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
                 (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
  }
  
@@ -810,7 +904,7 @@ xfs_dir2_leaf_log_bests(
  void
  xfs_dir2_leaf_log_ents(
         xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp,            /* leaf buffer */
+       struct xfs_buf          *bp,            /* leaf buffer */
         int                     first,          /* first entry to log */
         int                     last)           /* last entry to log */
  {
@@ -818,12 +912,12 @@ xfs_dir2_leaf_log_ents(
         xfs_dir2_leaf_entry_t   *lastlep;       /* pointer to last entry */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
-              be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
         firstlep = &leaf->ents[first];
         lastlep = &leaf->ents[last];
-       xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
+       xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
                 (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
  }
  
@@ -832,15 +926,15 @@ xfs_dir2_leaf_log_ents(
   */
  void
  xfs_dir2_leaf_log_header(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp)            /* leaf buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp)
  {
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
-              be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
-       xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+       xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
                 (uint)(sizeof(leaf->hdr) - 1));
  }
  
@@ -849,18 +943,18 @@ xfs_dir2_leaf_log_header(
   */
  STATIC void
  xfs_dir2_leaf_log_tail(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp)            /* leaf buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp)
  {
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
         xfs_mount_t             *mp;            /* filesystem mount point */
  
         mp = tp->t_mountp;
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
-       xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
+       xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
                 (uint)(mp->m_dirblksize - 1));
  }
  
@@ -873,12 +967,12 @@ int
  xfs_dir2_leaf_lookup(
         xfs_da_args_t           *args)          /* operation arguments */
  {
-       xfs_dabuf_t             *dbp;           /* data block buffer */
+       struct xfs_buf          *dbp;           /* data block buffer */
         xfs_dir2_data_entry_t   *dep;           /* data block entry */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return code */
         int                     index;          /* found entry index */
-       xfs_dabuf_t             *lbp;           /* leaf buffer */
+       struct xfs_buf          *lbp;           /* leaf buffer */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
         xfs_trans_t             *tp;            /* transaction pointer */
@@ -894,7 +988,7 @@ xfs_dir2_leaf_lookup(
         tp = args->trans;
         dp = args->dp;
         xfs_dir2_leaf_check(dp, lbp);
-       leaf = lbp->data;
+       leaf = lbp->b_addr;
         /*
          * Get to the leaf entry and contained data entry address.
          */
@@ -903,15 +997,15 @@ xfs_dir2_leaf_lookup(
          * Point to the data entry.
          */
         dep = (xfs_dir2_data_entry_t *)
-             ((char *)dbp->data +
+             ((char *)dbp->b_addr +
                xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
         /*
          * Return the found inode number & CI name if appropriate
          */
         args->inumber = be64_to_cpu(dep->inumber);
         error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
-       xfs_da_brelse(tp, dbp);
-       xfs_da_brelse(tp, lbp);
+       xfs_trans_brelse(tp, dbp);
+       xfs_trans_brelse(tp, lbp);
         return XFS_ERROR(error);
  }
  
@@ -924,17 +1018,17 @@ xfs_dir2_leaf_lookup(
  static int                                     /* error */
  xfs_dir2_leaf_lookup_int(
         xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dabuf_t             **lbpp,         /* out: leaf buffer */
+       struct xfs_buf          **lbpp,         /* out: leaf buffer */
         int                     *indexp,        /* out: index in leaf block */
-       xfs_dabuf_t             **dbpp)         /* out: data buffer */
+       struct xfs_buf          **dbpp)         /* out: data buffer */
  {
         xfs_dir2_db_t           curdb = -1;     /* current data block number */
-       xfs_dabuf_t             *dbp = NULL;    /* data buffer */
+       struct xfs_buf          *dbp = NULL;    /* data buffer */
         xfs_dir2_data_entry_t   *dep;           /* data entry */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return code */
         int                     index;          /* index in leaf block */
-       xfs_dabuf_t             *lbp;           /* leaf buffer */
+       struct xfs_buf          *lbp;           /* leaf buffer */
         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         xfs_mount_t             *mp;            /* filesystem mount point */
@@ -946,15 +1040,13 @@ xfs_dir2_leaf_lookup_int(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       /*
-        * Read the leaf block into the buffer.
-        */
-       error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
-                                                       XFS_DATA_FORK);
+
+       error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
         if (error)
                 return error;
+
         *lbpp = lbp;
-       leaf = lbp->data;
+       leaf = lbp->b_addr;
         xfs_dir2_leaf_check(dp, lbp);
         /*
          * Look for the first leaf entry with our hash value.
@@ -982,21 +1074,20 @@ xfs_dir2_leaf_lookup_int(
                  */
                 if (newdb != curdb) {
                         if (dbp)
-                               xfs_da_brelse(tp, dbp);
-                       error = xfs_da_read_buf(tp, dp,
-                                               xfs_dir2_db_to_da(mp, newdb),
-                                               -1, &dbp, XFS_DATA_FORK);
+                               xfs_trans_brelse(tp, dbp);
+                       error = xfs_dir2_data_read(tp, dp,
+                                                  xfs_dir2_db_to_da(mp, newdb),
+                                                  -1, &dbp);
                         if (error) {
-                               xfs_da_brelse(tp, lbp);
+                               xfs_trans_brelse(tp, lbp);
                                 return error;
                         }
-                       xfs_dir2_data_check(dp, dbp);
                         curdb = newdb;
                 }
                 /*
                  * Point to the data entry.
                  */
-               dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
+               dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
                         xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
                 /*
                  * Compare name and if it's an exact match, return the index
@@ -1024,12 +1115,12 @@ xfs_dir2_leaf_lookup_int(
         if (args->cmpresult == XFS_CMP_CASE) {
                 ASSERT(cidb != -1);
                 if (cidb != curdb) {
-                       xfs_da_brelse(tp, dbp);
-                       error = xfs_da_read_buf(tp, dp,
-                                               xfs_dir2_db_to_da(mp, cidb),
-                                               -1, &dbp, XFS_DATA_FORK);
+                       xfs_trans_brelse(tp, dbp);
+                       error = xfs_dir2_data_read(tp, dp,
+                                                  xfs_dir2_db_to_da(mp, cidb),
+                                                  -1, &dbp);
                         if (error) {
-                               xfs_da_brelse(tp, lbp);
+                               xfs_trans_brelse(tp, lbp);
                                 return error;
                         }
                 }
@@ -1041,8 +1132,8 @@ xfs_dir2_leaf_lookup_int(
          */
         ASSERT(cidb == -1);
         if (dbp)
-               xfs_da_brelse(tp, dbp);
-       xfs_da_brelse(tp, lbp);
+               xfs_trans_brelse(tp, dbp);
+       xfs_trans_brelse(tp, lbp);
         return XFS_ERROR(ENOENT);
  }
  
@@ -1054,15 +1145,15 @@ xfs_dir2_leaf_removename(
         xfs_da_args_t           *args)          /* operation arguments */
  {
         __be16                  *bestsp;        /* leaf block best freespace */
-       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
         xfs_dir2_db_t           db;             /* data block number */
-       xfs_dabuf_t             *dbp;           /* data block buffer */
+       struct xfs_buf          *dbp;           /* data block buffer */
         xfs_dir2_data_entry_t   *dep;           /* data entry structure */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return code */
         xfs_dir2_db_t           i;              /* temporary data block # */
         int                     index;          /* index into leaf entries */
-       xfs_dabuf_t             *lbp;           /* leaf buffer */
+       struct xfs_buf          *lbp;           /* leaf buffer */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
@@ -1083,8 +1174,8 @@ xfs_dir2_leaf_removename(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       leaf = lbp->data;
-       data = dbp->data;
+       leaf = lbp->b_addr;
+       hdr = dbp->b_addr;
         xfs_dir2_data_check(dp, dbp);
         /*
          * Point to the leaf entry, use that to point to the data entry.
@@ -1092,9 +1183,9 @@ xfs_dir2_leaf_removename(
         lep = &leaf->ents[index];
         db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
         dep = (xfs_dir2_data_entry_t *)
-             ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+             ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
         needscan = needlog = 0;
-       oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
+       oldbest = be16_to_cpu(hdr->bestfree[0].length);
         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
         bestsp = xfs_dir2_leaf_bests_p(ltp);
         ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
@@ -1102,7 +1193,7 @@ xfs_dir2_leaf_removename(
          * Mark the former data entry unused.
          */
         xfs_dir2_data_make_free(tp, dbp,
-               (xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
+               (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
                 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
         /*
          * We just mark the leaf entry stale by putting a null in it.
@@ -1116,23 +1207,23 @@ xfs_dir2_leaf_removename(
          * log the data block header if necessary.
          */
         if (needscan)
-               xfs_dir2_data_freescan(mp, data, &needlog);
+               xfs_dir2_data_freescan(mp, hdr, &needlog);
         if (needlog)
                 xfs_dir2_data_log_header(tp, dbp);
         /*
          * If the longest freespace in the data block has changed,
          * put the new value in the bests table and log that.
          */
-       if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) {
-               bestsp[db] = data->hdr.bestfree[0].length;
+       if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
+               bestsp[db] = hdr->bestfree[0].length;
                 xfs_dir2_leaf_log_bests(tp, lbp, db, db);
         }
         xfs_dir2_data_check(dp, dbp);
         /*
          * If the data block is now empty then get rid of the data block.
          */
-       if (be16_to_cpu(data->hdr.bestfree[0].length) ==
-           mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+       if (be16_to_cpu(hdr->bestfree[0].length) ==
+           mp->m_dirblksize - (uint)sizeof(*hdr)) {
                 ASSERT(db != mp->m_dirdatablk);
                 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
                         /*
@@ -1141,12 +1232,9 @@ xfs_dir2_leaf_removename(
                          * Just go on, returning success, leaving the
                          * empty block in place.
                          */
-                       if (error == ENOSPC && args->total == 0) {
-                               xfs_da_buf_done(dbp);
+                       if (error == ENOSPC && args->total == 0)
                                 error = 0;
-                       }
                         xfs_dir2_leaf_check(dp, lbp);
-                       xfs_da_buf_done(lbp);
                         return error;
                 }
                 dbp = NULL;
@@ -1159,7 +1247,7 @@ xfs_dir2_leaf_removename(
                          * Look for the last active entry (i).
                          */
                         for (i = db - 1; i > 0; i--) {
-                               if (be16_to_cpu(bestsp[i]) != NULLDATAOFF)
+                               if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
                                         break;
                         }
                         /*
@@ -1177,10 +1265,9 @@ xfs_dir2_leaf_removename(
         /*
          * If the data block was not the first one, drop it.
          */
-       else if (db != mp->m_dirdatablk && dbp != NULL) {
-               xfs_da_buf_done(dbp);
+       else if (db != mp->m_dirdatablk)
                 dbp = NULL;
-       }
+
         xfs_dir2_leaf_check(dp, lbp);
         /*
          * See if we can convert to block form.
@@ -1195,12 +1282,12 @@ int                                             /* error */
  xfs_dir2_leaf_replace(
         xfs_da_args_t           *args)          /* operation arguments */
  {
-       xfs_dabuf_t             *dbp;           /* data block buffer */
+       struct xfs_buf          *dbp;           /* data block buffer */
         xfs_dir2_data_entry_t   *dep;           /* data block entry */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return code */
         int                     index;          /* index of leaf entry */
-       xfs_dabuf_t             *lbp;           /* leaf buffer */
+       struct xfs_buf          *lbp;           /* leaf buffer */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
         xfs_trans_t             *tp;            /* transaction pointer */
@@ -1214,7 +1301,7 @@ xfs_dir2_leaf_replace(
                 return error;
         }
         dp = args->dp;
-       leaf = lbp->data;
+       leaf = lbp->b_addr;
         /*
          * Point to the leaf entry, get data address from it.
          */
@@ -1223,7 +1310,7 @@ xfs_dir2_leaf_replace(
          * Point to the data entry.
          */
         dep = (xfs_dir2_data_entry_t *)
-             ((char *)dbp->data +
+             ((char *)dbp->b_addr +
                xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
         ASSERT(args->inumber != be64_to_cpu(dep->inumber));
         /*
@@ -1232,9 +1319,8 @@ xfs_dir2_leaf_replace(
         dep->inumber = cpu_to_be64(args->inumber);
         tp = args->trans;
         xfs_dir2_data_log_entry(tp, dbp, dep);
-       xfs_da_buf_done(dbp);
         xfs_dir2_leaf_check(dp, lbp);
-       xfs_da_brelse(tp, lbp);
+       xfs_trans_brelse(tp, lbp);
         return 0;
  }
  
@@ -1246,7 +1332,7 @@ xfs_dir2_leaf_replace(
  int                                            /* index value */
  xfs_dir2_leaf_search_hash(
         xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dabuf_t             *lbp)           /* leaf buffer */
+       struct xfs_buf          *lbp)           /* leaf buffer */
  {
         xfs_dahash_t            hash=0;         /* hash from this entry */
         xfs_dahash_t            hashwant;       /* hash value looking for */
@@ -1256,7 +1342,7 @@ xfs_dir2_leaf_search_hash(
         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
         int                     mid=0;          /* current leaf index */
  
-       leaf = lbp->data;
+       leaf = lbp->b_addr;
  #ifndef __KERNEL__
         if (!leaf->hdr.count)
                 return 0;
@@ -1299,14 +1385,11 @@ xfs_dir2_leaf_search_hash(
  int                                            /* error */
  xfs_dir2_leaf_trim_data(
         xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dabuf_t             *lbp,           /* leaf buffer */
+       struct xfs_buf          *lbp,           /* leaf buffer */
         xfs_dir2_db_t           db)             /* data block number */
  {
         __be16                  *bestsp;        /* leaf bests table */
-#ifdef DEBUG
-       xfs_dir2_data_t         *data;          /* data block structure */
-#endif
-       xfs_dabuf_t             *dbp;           /* data block buffer */
+       struct xfs_buf          *dbp;           /* data block buffer */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return value */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
@@ -1320,30 +1403,30 @@ xfs_dir2_leaf_trim_data(
         /*
          * Read the offending data block.  We need its buffer.
          */
-       if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
-                       XFS_DATA_FORK))) {
+       error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
+       if (error)
                 return error;
-       }
-#ifdef DEBUG
-       data = dbp->data;
-       ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
-#endif
-       /* this seems to be an error
-        * data is only valid if DEBUG is defined?
-        * RMC 09/08/1999
-        */
  
-       leaf = lbp->data;
+       leaf = lbp->b_addr;
         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
-       ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
-              mp->m_dirblksize - (uint)sizeof(data->hdr));
+
+#ifdef DEBUG
+{
+       struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+       ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
+              mp->m_dirblksize - (uint)sizeof(*hdr));
         ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
+}
+#endif
+
         /*
          * Get rid of the data block.
          */
         if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
                 ASSERT(error != ENOSPC);
-               xfs_da_brelse(tp, dbp);
+               xfs_trans_brelse(tp, dbp);
                 return error;
         }
         /*
@@ -1357,6 +1440,20 @@ xfs_dir2_leaf_trim_data(
         return 0;
  }
  
+static inline size_t
+xfs_dir2_leaf_size(
+       struct xfs_dir2_leaf_hdr        *hdr,
+       int                             counts)
+{
+       int                     entries;
+
+       entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
+       return sizeof(xfs_dir2_leaf_hdr_t) +
+           entries * sizeof(xfs_dir2_leaf_entry_t) +
+           counts * sizeof(xfs_dir2_data_off_t) +
+           sizeof(xfs_dir2_leaf_tail_t);
+}
+
  /*
   * Convert node form directory to leaf form directory.
   * The root of the node form dir needs to already be a LEAFN block.
@@ -1369,10 +1466,10 @@ xfs_dir2_node_to_leaf(
         xfs_da_args_t           *args;          /* operation arguments */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return code */
-       xfs_dabuf_t             *fbp;           /* buffer for freespace block */
+       struct xfs_buf          *fbp;           /* buffer for freespace block */
         xfs_fileoff_t           fo;             /* freespace file offset */
         xfs_dir2_free_t         *free;          /* freespace structure */
-       xfs_dabuf_t             *lbp;           /* buffer for leaf block */
+       struct xfs_buf          *lbp;           /* buffer for leaf block */
         xfs_dir2_leaf_tail_t    *ltp;           /* tail of leaf structure */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
         xfs_mount_t             *mp;            /* filesystem mount point */
@@ -1426,30 +1523,28 @@ xfs_dir2_node_to_leaf(
         if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
                 return 0;
         lbp = state->path.blk[0].bp;
-       leaf = lbp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       leaf = lbp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
         /*
          * Read the freespace block.
          */
-       if ((error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp,
-                       XFS_DATA_FORK))) {
+       error = xfs_dir2_free_read(tp, dp,  mp->m_dirfreeblk, &fbp);
+       if (error)
                 return error;
-       }
-       free = fbp->data;
-       ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+       free = fbp->b_addr;
+       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
         ASSERT(!free->hdr.firstdb);
+
         /*
          * Now see if the leafn and free data will fit in a leaf1.
          * If not, release the buffer and give up.
          */
-       if ((uint)sizeof(leaf->hdr) +
-           (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) +
-           be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
-           (uint)sizeof(leaf->tail) >
-           mp->m_dirblksize) {
-               xfs_da_brelse(tp, fbp);
+       if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
+                       mp->m_dirblksize) {
+               xfs_trans_brelse(tp, fbp);
                 return 0;
         }
+
         /*
          * If the leaf has any stale entries in it, compress them out.
          * The compact routine will log the header.
@@ -1458,7 +1553,10 @@ xfs_dir2_node_to_leaf(
                 xfs_dir2_leaf_compact(args, lbp);
         else
                 xfs_dir2_leaf_log_header(tp, lbp);
+
+       lbp->b_ops = &xfs_dir2_leaf1_buf_ops;
         leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
+
         /*
          * Set up the leaf tail from the freespace block.
          */
@@ -1468,7 +1566,7 @@ xfs_dir2_node_to_leaf(
          * Set up the leaf bests table.
          */
         memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
-               be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
+               be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
         xfs_dir2_leaf_log_tail(tp, lbp);
         xfs_dir2_leaf_check(dp, lbp);
diff --git a/libxfs/xfs_dir2_node.c b/libxfs/xfs_dir2_node.c

index e4e20d626f7702583a0164eee14891deb120c853..649f67764ca55803202a83a89ba0ed236c358506 100644 (file)
--- a/libxfs/xfs_dir2_node.c
+++ b/libxfs/xfs_dir2_node.c
@@ -21,40 +21,108 @@
  /*
   * Function declarations.
   */
-static void xfs_dir2_free_log_header(xfs_trans_t *tp, xfs_dabuf_t *bp);
-static int xfs_dir2_leafn_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index);
+static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args,
+                             int index);
  #ifdef DEBUG
-static void xfs_dir2_leafn_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
+static void xfs_dir2_leafn_check(struct xfs_inode *dp, struct xfs_buf *bp);
  #else
  #define        xfs_dir2_leafn_check(dp, bp)
  #endif
-static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, xfs_dabuf_t *bp_s,
-                                   int start_s, xfs_dabuf_t *bp_d, int start_d,
-                                   int count);
+static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, struct xfs_buf *bp_s,
+                                   int start_s, struct xfs_buf *bp_d,
+                                   int start_d, int count);
  static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
                                      xfs_da_state_blk_t *blk1,
                                      xfs_da_state_blk_t *blk2);
-static int xfs_dir2_leafn_remove(xfs_da_args_t *args, xfs_dabuf_t *bp,
+static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
                                  int index, xfs_da_state_blk_t *dblk,
                                  int *rval);
  static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
                                      xfs_da_state_blk_t *fblk);
  
+static void
+xfs_dir2_free_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir2_free_hdr *hdr = bp->b_addr;
+       int                     block_ok = 0;
+
+       block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC);
+       if (!block_ok) {
+               XFS_CORRUPTION_ERROR("xfs_dir2_free_verify magic",
+                                    XFS_ERRLEVEL_LOW, mp, hdr);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+static void
+xfs_dir2_free_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_free_verify(bp);
+}
+
+static void
+xfs_dir2_free_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_dir2_free_verify(bp);
+}
+
+static const struct xfs_buf_ops xfs_dir2_free_buf_ops = {
+       .verify_read = xfs_dir2_free_read_verify,
+       .verify_write = xfs_dir2_free_write_verify,
+};
+
+
+static int
+__xfs_dir2_free_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp)
+{
+       return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
+                               XFS_DATA_FORK, &xfs_dir2_free_buf_ops);
+}
+
+int
+xfs_dir2_free_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       struct xfs_buf          **bpp)
+{
+       return __xfs_dir2_free_read(tp, dp, fbno, -1, bpp);
+}
+
+static int
+xfs_dir2_free_try_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       struct xfs_buf          **bpp)
+{
+       return __xfs_dir2_free_read(tp, dp, fbno, -2, bpp);
+}
+
  /*
   * Log entries from a freespace block.
   */
  STATIC void
  xfs_dir2_free_log_bests(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp,            /* freespace buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
         int                     first,          /* first entry to log */
         int                     last)           /* last entry to log */
  {
         xfs_dir2_free_t         *free;          /* freespace structure */
  
-       free = bp->data;
-       ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
-       xfs_da_log_buf(tp, bp,
+       free = bp->b_addr;
+       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+       xfs_trans_log_buf(tp, bp,
                 (uint)((char *)&free->bests[first] - (char *)free),
                 (uint)((char *)&free->bests[last] - (char *)free +
                        sizeof(free->bests[0]) - 1));
@@ -65,14 +133,14 @@ xfs_dir2_free_log_bests(
   */
  static void
  xfs_dir2_free_log_header(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_dabuf_t             *bp)            /* freespace buffer */
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp)
  {
         xfs_dir2_free_t         *free;          /* freespace structure */
  
-       free = bp->data;
-       ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
-       xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
+       free = bp->b_addr;
+       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
+       xfs_trans_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
                 (uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
  }
  
@@ -84,11 +152,11 @@ xfs_dir2_free_log_header(
  int                                            /* error */
  xfs_dir2_leaf_to_node(
         xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dabuf_t             *lbp)           /* leaf buffer */
+       struct xfs_buf          *lbp)           /* leaf buffer */
  {
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return value */
-       xfs_dabuf_t             *fbp;           /* freespace buffer */
+       struct xfs_buf          *fbp;           /* freespace buffer */
         xfs_dir2_db_t           fdb;            /* freespace block number */
         xfs_dir2_free_t         *free;          /* freespace structure */
         __be16                  *from;          /* pointer to freespace entry */
@@ -116,13 +184,14 @@ xfs_dir2_leaf_to_node(
         /*
          * Get the buffer for the new freespace block.
          */
-       if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
-                       XFS_DATA_FORK))) {
+       error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
+                               XFS_DATA_FORK);
+       if (error)
                 return error;
-       }
-       ASSERT(fbp != NULL);
-       free = fbp->data;
-       leaf = lbp->data;
+       fbp->b_ops = &xfs_dir2_free_buf_ops;
+
+       free = fbp->b_addr;
+       leaf = lbp->b_addr;
         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
         /*
          * Initialize the freespace block header.
@@ -142,14 +211,16 @@ xfs_dir2_leaf_to_node(
                 *to = cpu_to_be16(off);
         }
         free->hdr.nused = cpu_to_be32(n);
+
+       lbp->b_ops = &xfs_dir2_leafn_buf_ops;
         leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
+
         /*
          * Log everything.
          */
         xfs_dir2_leaf_log_header(tp, lbp);
         xfs_dir2_free_log_header(tp, fbp);
         xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1);
-       xfs_da_buf_done(fbp);
         xfs_dir2_leafn_check(dp, lbp);
         return 0;
  }
@@ -160,7 +231,7 @@ xfs_dir2_leaf_to_node(
   */
  static int                                     /* error */
  xfs_dir2_leafn_add(
-       xfs_dabuf_t             *bp,            /* leaf buffer */
+       struct xfs_buf          *bp,            /* leaf buffer */
         xfs_da_args_t           *args,          /* operation arguments */
         int                     index)          /* insertion pt for new entry */
  {
@@ -180,7 +251,7 @@ xfs_dir2_leafn_add(
         dp = args->dp;
         mp = dp->i_mount;
         tp = args->trans;
-       leaf = bp->data;
+       leaf = bp->b_addr;
  
         /*
          * Quick check just to make sure we are not going to index
@@ -224,89 +295,13 @@ xfs_dir2_leafn_add(
                 lfloglow = be16_to_cpu(leaf->hdr.count);
                 lfloghigh = -1;
         }
-       /*
-        * No stale entries, just insert a space for the new entry.
-        */
-       if (!leaf->hdr.stale) {
-               lep = &leaf->ents[index];
-               if (index < be16_to_cpu(leaf->hdr.count))
-                       memmove(lep + 1, lep,
-                               (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
-               lfloglow = index;
-               lfloghigh = be16_to_cpu(leaf->hdr.count);
-               be16_add_cpu(&leaf->hdr.count, 1);
-       }
-       /*
-        * There are stale entries.  We'll use one for the new entry.
-        */
-       else {
-               /*
-                * If we didn't do a compact then we need to figure out
-                * which stale entry will be used.
-                */
-               if (compact == 0) {
-                       /*
-                        * Find first stale entry before our insertion point.
-                        */
-                       for (lowstale = index - 1;
-                            lowstale >= 0 &&
-                               be32_to_cpu(leaf->ents[lowstale].address) !=
-                               XFS_DIR2_NULL_DATAPTR;
-                            lowstale--)
-                               continue;
-                       /*
-                        * Find next stale entry after insertion point.
-                        * Stop looking if the answer would be worse than
-                        * lowstale already found.
-                        */
-                       for (highstale = index;
-                            highstale < be16_to_cpu(leaf->hdr.count) &&
-                               be32_to_cpu(leaf->ents[highstale].address) !=
-                               XFS_DIR2_NULL_DATAPTR &&
-                               (lowstale < 0 ||
-                                index - lowstale - 1 >= highstale - index);
-                            highstale++)
-                               continue;
-               }
-               /*
-                * Using the low stale entry.
-                * Shift entries up toward the stale slot.
-                */
-               if (lowstale >= 0 &&
-                   (highstale == be16_to_cpu(leaf->hdr.count) ||
-                    index - lowstale - 1 < highstale - index)) {
-                       ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
-                              XFS_DIR2_NULL_DATAPTR);
-                       ASSERT(index - lowstale - 1 >= 0);
-                       if (index - lowstale - 1 > 0)
-                               memmove(&leaf->ents[lowstale],
-                                       &leaf->ents[lowstale + 1],
-                                       (index - lowstale - 1) * sizeof(*lep));
-                       lep = &leaf->ents[index - 1];
-                       lfloglow = MIN(lowstale, lfloglow);
-                       lfloghigh = MAX(index - 1, lfloghigh);
-               }
-               /*
-                * Using the high stale entry.
-                * Shift entries down toward the stale slot.
-                */
-               else {
-                       ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
-                              XFS_DIR2_NULL_DATAPTR);
-                       ASSERT(highstale - index >= 0);
-                       if (highstale - index > 0)
-                               memmove(&leaf->ents[index + 1],
-                                       &leaf->ents[index],
-                                       (highstale - index) * sizeof(*lep));
-                       lep = &leaf->ents[index];
-                       lfloglow = MIN(index, lfloglow);
-                       lfloghigh = MAX(highstale, lfloghigh);
-               }
-               be16_add_cpu(&leaf->hdr.stale, -1);
-       }
+
         /*
          * Insert the new entry, log everything.
          */
+       lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
+                                      highstale, &lfloglow, &lfloghigh);
+
         lep->hashval = cpu_to_be32(args->hashval);
         lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
                                 args->blkno, args->index));
@@ -322,24 +317,24 @@ xfs_dir2_leafn_add(
   */
  void
  xfs_dir2_leafn_check(
-       xfs_inode_t     *dp,                    /* incore directory inode */
-       xfs_dabuf_t     *bp)                    /* leaf buffer */
+       struct xfs_inode *dp,
+       struct xfs_buf  *bp)
  {
         int             i;                      /* leaf index */
         xfs_dir2_leaf_t *leaf;                  /* leaf structure */
         xfs_mount_t     *mp;                    /* filesystem mount point */
         int             stale;                  /* count of stale leaves */
  
-       leaf = bp->data;
+       leaf = bp->b_addr;
         mp = dp->i_mount;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
         ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
         for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
                 if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
                         ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
                                be32_to_cpu(leaf->ents[i + 1].hashval));
                 }
-               if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+               if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                         stale++;
         }
         ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -352,13 +347,13 @@ xfs_dir2_leafn_check(
   */
  xfs_dahash_t                                   /* hash value */
  xfs_dir2_leafn_lasthash(
-       xfs_dabuf_t     *bp,                    /* leaf buffer */
+       struct xfs_buf  *bp,                    /* leaf buffer */
         int             *count)                 /* count of entries in leaf */
  {
         xfs_dir2_leaf_t *leaf;                  /* leaf structure */
  
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
         if (count)
                 *count = be16_to_cpu(leaf->hdr.count);
         if (!leaf->hdr.count)
@@ -372,12 +367,12 @@ xfs_dir2_leafn_lasthash(
   */
  STATIC int
  xfs_dir2_leafn_lookup_for_addname(
-       xfs_dabuf_t             *bp,            /* leaf buffer */
+       struct xfs_buf          *bp,            /* leaf buffer */
         xfs_da_args_t           *args,          /* operation arguments */
         int                     *indexp,        /* out: leaf entry index */
         xfs_da_state_t          *state)         /* state to fill in */
  {
-       xfs_dabuf_t             *curbp = NULL;  /* current data/free buffer */
+       struct xfs_buf          *curbp = NULL;  /* current data/free buffer */
         xfs_dir2_db_t           curdb = -1;     /* current data block number */
         xfs_dir2_db_t           curfdb = -1;    /* current free block number */
         xfs_inode_t             *dp;            /* incore directory inode */
@@ -396,8 +391,8 @@ xfs_dir2_leafn_lookup_for_addname(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
  #ifdef __KERNEL__
         ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
  #endif
@@ -413,8 +408,8 @@ xfs_dir2_leafn_lookup_for_addname(
                 /* If so, it's a free block buffer, get the block number. */
                 curbp = state->extrablk.bp;
                 curfdb = state->extrablk.blkno;
-               free = curbp->data;
-               ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+               free = curbp->b_addr;
+               ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
         }
         length = xfs_dir2_data_entsize(args->namelen);
         /*
@@ -455,20 +450,18 @@ xfs_dir2_leafn_lookup_for_addname(
                                  * If we had one before, drop it.
                                  */
                                 if (curbp)
-                                       xfs_da_brelse(tp, curbp);
-                               /*
-                                * Read the free block.
-                                */
-                               error = xfs_da_read_buf(tp, dp,
+                                       xfs_trans_brelse(tp, curbp);
+
+                               error = xfs_dir2_free_read(tp, dp,
                                                 xfs_dir2_db_to_da(mp, newfdb),
-                                               -1, &curbp, XFS_DATA_FORK);
+                                               &curbp);
                                 if (error)
                                         return error;
-                               free = curbp->data;
+                               free = curbp->b_addr;
                                 ASSERT(be32_to_cpu(free->hdr.magic) ==
                                         XFS_DIR2_FREE_MAGIC);
                                 ASSERT((be32_to_cpu(free->hdr.firstdb) %
-                                       XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
+                                       xfs_dir2_free_max_bests(mp)) == 0);
                                 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
                                 ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
                                         be32_to_cpu(free->hdr.nvalid));
@@ -480,11 +473,12 @@ xfs_dir2_leafn_lookup_for_addname(
                         /*
                          * If it has room, return it.
                          */
-                       if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
+                       if (unlikely(free->bests[fi] ==
+                           cpu_to_be16(NULLDATAOFF))) {
                                 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
                                                         XFS_ERRLEVEL_LOW, mp);
                                 if (curfdb != newfdb)
-                                       xfs_da_brelse(tp, curbp);
+                                       xfs_trans_brelse(tp, curbp);
                                 return XFS_ERROR(EFSCORRUPTED);
                         }
                         curfdb = newfdb;
@@ -519,12 +513,12 @@ out:
   */
  STATIC int
  xfs_dir2_leafn_lookup_for_entry(
-       xfs_dabuf_t             *bp,            /* leaf buffer */
+       struct xfs_buf          *bp,            /* leaf buffer */
         xfs_da_args_t           *args,          /* operation arguments */
         int                     *indexp,        /* out: leaf entry index */
         xfs_da_state_t          *state)         /* state to fill in */
  {
-       xfs_dabuf_t             *curbp = NULL;  /* current data/free buffer */
+       struct xfs_buf          *curbp = NULL;  /* current data/free buffer */
         xfs_dir2_db_t           curdb = -1;     /* current data block number */
         xfs_dir2_data_entry_t   *dep;           /* data block entry */
         xfs_inode_t             *dp;            /* incore directory inode */
@@ -540,8 +534,8 @@ xfs_dir2_leafn_lookup_for_entry(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
  #ifdef __KERNEL__
         ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
  #endif
@@ -585,7 +579,7 @@ xfs_dir2_leafn_lookup_for_entry(
                          */
                         if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
                                                 curdb != state->extrablk.blkno))
-                               xfs_da_brelse(tp, curbp);
+                               xfs_trans_brelse(tp, curbp);
                         /*
                          * If needing the block that is saved with a CI match,
                          * use it otherwise read in the new data block.
@@ -595,9 +589,9 @@ xfs_dir2_leafn_lookup_for_entry(
                                 ASSERT(state->extravalid);
                                 curbp = state->extrablk.bp;
                         } else {
-                               error = xfs_da_read_buf(tp, dp,
+                               error = xfs_dir2_data_read(tp, dp,
                                                 xfs_dir2_db_to_da(mp, newdb),
-                                               -1, &curbp, XFS_DATA_FORK);
+                                               -1, &curbp);
                                 if (error)
                                         return error;
                         }
@@ -607,7 +601,7 @@ xfs_dir2_leafn_lookup_for_entry(
                 /*
                  * Point to the data entry.
                  */
-               dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
+               dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr +
                         xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
                 /*
                  * Compare the entry and if it's an exact match, return
@@ -619,7 +613,7 @@ xfs_dir2_leafn_lookup_for_entry(
                         /* If there is a CI match block, drop it */
                         if (args->cmpresult != XFS_CMP_DIFFERENT &&
                                                 curdb != state->extrablk.blkno)
-                               xfs_da_brelse(tp, state->extrablk.bp);
+                               xfs_trans_brelse(tp, state->extrablk.bp);
                         args->cmpresult = cmp;
                         args->inumber = be64_to_cpu(dep->inumber);
                         *indexp = index;
@@ -627,8 +621,9 @@ xfs_dir2_leafn_lookup_for_entry(
                         state->extrablk.bp = curbp;
                         state->extrablk.blkno = curdb;
                         state->extrablk.index = (int)((char *)dep -
-                                                       (char *)curbp->data);
+                                                       (char *)curbp->b_addr);
                         state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+                       curbp->b_ops = &xfs_dir2_data_buf_ops;
                         if (cmp == XFS_CMP_EXACT)
                                 return XFS_ERROR(EEXIST);
                 }
@@ -643,10 +638,11 @@ xfs_dir2_leafn_lookup_for_entry(
                         state->extrablk.index = -1;
                         state->extrablk.blkno = curdb;
                         state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+                       curbp->b_ops = &xfs_dir2_data_buf_ops;
                 } else {
                         /* If the curbp is not the CI match block, drop it */
                         if (state->extrablk.bp != curbp)
-                               xfs_da_brelse(tp, curbp);
+                               xfs_trans_brelse(tp, curbp);
                 }
         } else {
                 state->extravalid = 0;
@@ -662,7 +658,7 @@ xfs_dir2_leafn_lookup_for_entry(
   */
  int
  xfs_dir2_leafn_lookup_int(
-       xfs_dabuf_t             *bp,            /* leaf buffer */
+       struct xfs_buf          *bp,            /* leaf buffer */
         xfs_da_args_t           *args,          /* operation arguments */
         int                     *indexp,        /* out: leaf entry index */
         xfs_da_state_t          *state)         /* state to fill in */
@@ -680,9 +676,9 @@ xfs_dir2_leafn_lookup_int(
  static void
  xfs_dir2_leafn_moveents(
         xfs_da_args_t   *args,                  /* operation arguments */
-       xfs_dabuf_t     *bp_s,                  /* source leaf buffer */
+       struct xfs_buf  *bp_s,                  /* source leaf buffer */
         int             start_s,                /* source leaf index */
-       xfs_dabuf_t     *bp_d,                  /* destination leaf buffer */
+       struct xfs_buf  *bp_d,                  /* destination leaf buffer */
         int             start_d,                /* destination leaf index */
         int             count)                  /* count of leaves to copy */
  {
@@ -700,8 +696,8 @@ xfs_dir2_leafn_moveents(
                 return;
         }
         tp = args->trans;
-       leaf_s = bp_s->data;
-       leaf_d = bp_d->data;
+       leaf_s = bp_s->b_addr;
+       leaf_d = bp_d->b_addr;
         /*
          * If the destination index is not the end of the current
          * destination leaf entries, open up a hole in the destination
@@ -722,7 +718,8 @@ xfs_dir2_leafn_moveents(
                 int     i;                      /* temp leaf index */
  
                 for (i = start_s, stale = 0; i < start_s + count; i++) {
-                       if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+                       if (leaf_s->ents[i].address ==
+                           cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                                 stale++;
                 }
         } else
@@ -761,16 +758,16 @@ xfs_dir2_leafn_moveents(
   */
  int                                            /* sort order */
  xfs_dir2_leafn_order(
-       xfs_dabuf_t     *leaf1_bp,              /* leaf1 buffer */
-       xfs_dabuf_t     *leaf2_bp)              /* leaf2 buffer */
+       struct xfs_buf  *leaf1_bp,              /* leaf1 buffer */
+       struct xfs_buf  *leaf2_bp)              /* leaf2 buffer */
  {
         xfs_dir2_leaf_t *leaf1;                 /* leaf1 structure */
         xfs_dir2_leaf_t *leaf2;                 /* leaf2 structure */
  
-       leaf1 = leaf1_bp->data;
-       leaf2 = leaf2_bp->data;
-       ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
-       ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       leaf1 = leaf1_bp->b_addr;
+       leaf2 = leaf2_bp->b_addr;
+       ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+       ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
         if (be16_to_cpu(leaf1->hdr.count) > 0 &&
             be16_to_cpu(leaf2->hdr.count) > 0 &&
             (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
@@ -816,8 +813,8 @@ xfs_dir2_leafn_rebalance(
                 blk1 = blk2;
                 blk2 = tmp;
         }
-       leaf1 = blk1->bp->data;
-       leaf2 = blk2->bp->data;
+       leaf1 = blk1->bp->b_addr;
+       leaf2 = blk2->bp->b_addr;
         oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count);
  #ifdef DEBUG
         oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale);
@@ -879,11 +876,81 @@ xfs_dir2_leafn_rebalance(
         if(blk2->index < 0) {
                 state->inleaf = 1;
                 blk2->index = 0;
-               cmn_err(CE_ALERT,
-                       "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: "
-                       "blk1->index %d\n",
-                       blk1->index);
+               xfs_alert(args->dp->i_mount,
+       "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n",
+                       __func__, blk1->index);
+       }
+}
+
+static int
+xfs_dir2_data_block_free(
+       xfs_da_args_t           *args,
+       struct xfs_dir2_data_hdr *hdr,
+       struct xfs_dir2_free    *free,
+       xfs_dir2_db_t           fdb,
+       int                     findex,
+       struct xfs_buf          *fbp,
+       int                     longest)
+{
+       struct xfs_trans        *tp = args->trans;
+       int                     logfree = 0;
+
+       if (!hdr) {
+               /* One less used entry in the free table.  */
+               be32_add_cpu(&free->hdr.nused, -1);
+               xfs_dir2_free_log_header(tp, fbp);
+
+               /*
+                * If this was the last entry in the table, we can trim the
+                * table size back.  There might be other entries at the end
+                * referring to non-existent data blocks, get those too.
+                */
+               if (findex == be32_to_cpu(free->hdr.nvalid) - 1) {
+                       int     i;              /* free entry index */
+
+                       for (i = findex - 1; i >= 0; i--) {
+                               if (free->bests[i] != cpu_to_be16(NULLDATAOFF))
+                                       break;
+                       }
+                       free->hdr.nvalid = cpu_to_be32(i + 1);
+                       logfree = 0;
+               } else {
+                       /* Not the last entry, just punch it out.  */
+                       free->bests[findex] = cpu_to_be16(NULLDATAOFF);
+                       logfree = 1;
+               }
+               /*
+                * If there are no useful entries left in the block,
+                * get rid of the block if we can.
+                */
+               if (!free->hdr.nused) {
+                       int error;
+
+                       error = xfs_dir2_shrink_inode(args, fdb, fbp);
+                       if (error == 0) {
+                               fbp = NULL;
+                               logfree = 0;
+                       } else if (error != ENOSPC || args->total != 0)
+                               return error;
+                       /*
+                        * It's possible to get ENOSPC if there is no
+                        * space reservation.  In this case some one
+                        * else will eventually get rid of this block.
+                        */
+               }
+       } else {
+               /*
+                * Data block is not empty, just set the free entry to the new
+                * value.
+                */
+               free->bests[findex] = cpu_to_be16(longest);
+               logfree = 1;
         }
+
+       /* Log the free entry that changed, unless we got rid of it.  */
+       if (logfree)
+               xfs_dir2_free_log_bests(tp, fbp, findex, findex);
+       return 0;
  }
  
  /*
@@ -894,14 +961,14 @@ xfs_dir2_leafn_rebalance(
  static int                                     /* error */
  xfs_dir2_leafn_remove(
         xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dabuf_t             *bp,            /* leaf buffer */
+       struct xfs_buf          *bp,            /* leaf buffer */
         int                     index,          /* leaf entry index */
         xfs_da_state_blk_t      *dblk,          /* data block */
         int                     *rval)          /* resulting block needs join */
  {
-       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
         xfs_dir2_db_t           db;             /* data block number */
-       xfs_dabuf_t             *dbp;           /* data block buffer */
+       struct xfs_buf          *dbp;           /* data block buffer */
         xfs_dir2_data_entry_t   *dep;           /* data block entry */
         xfs_inode_t             *dp;            /* incore directory inode */
         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
@@ -918,8 +985,8 @@ xfs_dir2_leafn_remove(
         dp = args->dp;
         tp = args->trans;
         mp = dp->i_mount;
-       leaf = bp->data;
-       ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       leaf = bp->b_addr;
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
         /*
          * Point to the entry we're removing.
          */
@@ -944,9 +1011,9 @@ xfs_dir2_leafn_remove(
          * in the data block in case it changes.
          */
         dbp = dblk->bp;
-       data = dbp->data;
-       dep = (xfs_dir2_data_entry_t *)((char *)data + off);
-       longest = be16_to_cpu(data->hdr.bestfree[0].length);
+       hdr = dbp->b_addr;
+       dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
+       longest = be16_to_cpu(hdr->bestfree[0].length);
         needlog = needscan = 0;
         xfs_dir2_data_make_free(tp, dbp, off,
                 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
@@ -955,7 +1022,7 @@ xfs_dir2_leafn_remove(
          * Log the data block header if needed.
          */
         if (needscan)
-               xfs_dir2_data_freescan(mp, data, &needlog);
+               xfs_dir2_data_freescan(mp, hdr, &needlog);
         if (needlog)
                 xfs_dir2_data_log_header(tp, dbp);
         xfs_dir2_data_check(dp, dbp);
@@ -963,126 +1030,63 @@ xfs_dir2_leafn_remove(
          * If the longest data block freespace changes, need to update
          * the corresponding freeblock entry.
          */
-       if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) {
+       if (longest < be16_to_cpu(hdr->bestfree[0].length)) {
                 int             error;          /* error return value */
-               xfs_dabuf_t     *fbp;           /* freeblock buffer */
+               struct xfs_buf  *fbp;           /* freeblock buffer */
                 xfs_dir2_db_t   fdb;            /* freeblock block number */
                 int             findex;         /* index in freeblock entries */
                 xfs_dir2_free_t *free;          /* freeblock structure */
-               int             logfree;        /* need to log free entry */
  
                 /*
                  * Convert the data block number to a free block,
                  * read in the free block.
                  */
                 fdb = xfs_dir2_db_to_fdb(mp, db);
-               if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb),
-                               -1, &fbp, XFS_DATA_FORK))) {
+               error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb),
+                                          &fbp);
+               if (error)
                         return error;
-               }
-               free = fbp->data;
-               ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+               free = fbp->b_addr;
+               ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
                 ASSERT(be32_to_cpu(free->hdr.firstdb) ==
-                      XFS_DIR2_MAX_FREE_BESTS(mp) *
+                      xfs_dir2_free_max_bests(mp) *
                        (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
                 /*
                  * Calculate which entry we need to fix.
                  */
                 findex = xfs_dir2_db_to_fdindex(mp, db);
-               longest = be16_to_cpu(data->hdr.bestfree[0].length);
+               longest = be16_to_cpu(hdr->bestfree[0].length);
                 /*
                  * If the data block is now empty we can get rid of it
                  * (usually).
                  */
-               if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+               if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) {
                         /*
                          * Try to punch out the data block.
                          */
                         error = xfs_dir2_shrink_inode(args, db, dbp);
                         if (error == 0) {
                                 dblk->bp = NULL;
-                               data = NULL;
+                               hdr = NULL;
                         }
                         /*
                          * We can get ENOSPC if there's no space reservation.
                          * In this case just drop the buffer and some one else
                          * will eventually get rid of the empty block.
                          */
-                       else if (error == ENOSPC && args->total == 0)
-                               xfs_da_buf_done(dbp);
-                       else
+                       else if (!(error == ENOSPC && args->total == 0))
                                 return error;
                 }
                 /*
                  * If we got rid of the data block, we can eliminate that entry
                  * in the free block.
                  */
-               if (data == NULL) {
-                       /*
-                        * One less used entry in the free table.
-                        */
-                       be32_add_cpu(&free->hdr.nused, -1);
-                       xfs_dir2_free_log_header(tp, fbp);
-                       /*
-                        * If this was the last entry in the table, we can
-                        * trim the table size back.  There might be other
-                        * entries at the end referring to non-existent
-                        * data blocks, get those too.
-                        */
-                       if (findex == be32_to_cpu(free->hdr.nvalid) - 1) {
-                               int     i;              /* free entry index */
-
-                               for (i = findex - 1;
-                                    i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF;
-                                    i--)
-                                       continue;
-                               free->hdr.nvalid = cpu_to_be32(i + 1);
-                               logfree = 0;
-                       }
-                       /*
-                        * Not the last entry, just punch it out.
-                        */
-                       else {
-                               free->bests[findex] = cpu_to_be16(NULLDATAOFF);
-                               logfree = 1;
-                       }
-                       /*
-                        * If there are no useful entries left in the block,
-                        * get rid of the block if we can.
-                        */
-                       if (!free->hdr.nused) {
-                               error = xfs_dir2_shrink_inode(args, fdb, fbp);
-                               if (error == 0) {
-                                       fbp = NULL;
-                                       logfree = 0;
-                               } else if (error != ENOSPC || args->total != 0)
-                                       return error;
-                               /*
-                                * It's possible to get ENOSPC if there is no
-                                * space reservation.  In this case some one
-                                * else will eventually get rid of this block.
-                                */
-                       }
-               }
-               /*
-                * Data block is not empty, just set the free entry to
-                * the new value.
-                */
-               else {
-                       free->bests[findex] = cpu_to_be16(longest);
-                       logfree = 1;
-               }
-               /*
-                * Log the free entry that changed, unless we got rid of it.
-                */
-               if (logfree)
-                       xfs_dir2_free_log_bests(tp, fbp, findex, findex);
-               /*
-                * Drop the buffer if we still have it.
-                */
-               if (fbp)
-                       xfs_da_buf_done(fbp);
+               error = xfs_dir2_data_block_free(args, hdr, free,
+                                                fdb, findex, fbp, longest);
+               if (error)
+                       return error;
         }
+
         xfs_dir2_leafn_check(dp, bp);
         /*
          * Return indication of whether this leaf block is empty enough
@@ -1173,7 +1177,7 @@ xfs_dir2_leafn_toosmall(
  {
         xfs_da_state_blk_t      *blk;           /* leaf block */
         xfs_dablk_t             blkno;          /* leaf block number */
-       xfs_dabuf_t             *bp;            /* leaf buffer */
+       struct xfs_buf          *bp;            /* leaf buffer */
         int                     bytes;          /* bytes in use */
         int                     count;          /* leaf live entry count */
         int                     error;          /* error return value */
@@ -1189,8 +1193,8 @@ xfs_dir2_leafn_toosmall(
          * to coalesce with a sibling.
          */
         blk = &state->path.blk[state->path.active - 1];
-       info = blk->bp->data;
-       ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC);
+       info = blk->bp->b_addr;
+       ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
         leaf = (xfs_dir2_leaf_t *)info;
         count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
         bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
@@ -1236,20 +1240,19 @@ xfs_dir2_leafn_toosmall(
                 /*
                  * Read the sibling leaf block.
                  */
-               if ((error =
-                   xfs_da_read_buf(state->args->trans, state->args->dp, blkno,
-                           -1, &bp, XFS_DATA_FORK))) {
+               error = xfs_dir2_leafn_read(state->args->trans, state->args->dp,
+                                           blkno, -1, &bp);
+               if (error)
                         return error;
-               }
-               ASSERT(bp != NULL);
+
                 /*
                  * Count bytes in the two blocks combined.
                  */
                 leaf = (xfs_dir2_leaf_t *)info;
                 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
                 bytes = state->blocksize - (state->blocksize >> 2);
-               leaf = bp->data;
-               ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+               leaf = bp->b_addr;
+               ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
                 count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
                 bytes -= count * (uint)sizeof(leaf->ents[0]);
                 /*
@@ -1257,7 +1260,7 @@ xfs_dir2_leafn_toosmall(
                  */
                 if (bytes >= 0)
                         break;
-               xfs_da_brelse(state->args->trans, bp);
+               xfs_trans_brelse(state->args->trans, bp);
         }
         /*
          * Didn't like either block, give up.
@@ -1266,11 +1269,7 @@ xfs_dir2_leafn_toosmall(
                 *action = 0;
                 return 0;
         }
-       /*
-        * Done with the sibling leaf block here, drop the dabuf
-        * so path_shift can get it.
-        */
-       xfs_da_buf_done(bp);
+
         /*
          * Make altpath point to the block we want to keep (the lower
          * numbered block) and path point to the block we want to drop.
@@ -1306,10 +1305,10 @@ xfs_dir2_leafn_unbalance(
         args = state->args;
         ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
         ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
-       drop_leaf = drop_blk->bp->data;
-       save_leaf = save_blk->bp->data;
-       ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
-       ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+       drop_leaf = drop_blk->bp->b_addr;
+       save_leaf = save_blk->bp->b_addr;
+       ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+       ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
         /*
          * If there are any stale leaf entries, take this opportunity
          * to purge them.
@@ -1413,15 +1412,15 @@ xfs_dir2_node_addname_int(
         xfs_da_args_t           *args,          /* operation arguments */
         xfs_da_state_blk_t      *fblk)          /* optional freespace block */
  {
-       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
         xfs_dir2_db_t           dbno;           /* data block number */
-       xfs_dabuf_t             *dbp;           /* data block buffer */
+       struct xfs_buf          *dbp;           /* data block buffer */
         xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
         xfs_inode_t             *dp;            /* incore directory inode */
         xfs_dir2_data_unused_t  *dup;           /* data unused entry pointer */
         int                     error;          /* error return value */
         xfs_dir2_db_t           fbno;           /* freespace block number */
-       xfs_dabuf_t             *fbp;           /* freespace buffer */
+       struct xfs_buf          *fbp;           /* freespace buffer */
         int                     findex;         /* freespace entry index */
         xfs_dir2_free_t         *free=NULL;     /* freespace block structure */
         xfs_dir2_db_t           ifbno;          /* initial freespace block no */
@@ -1449,8 +1448,8 @@ xfs_dir2_node_addname_int(
                  * Remember initial freespace block number.
                  */
                 ifbno = fblk->blkno;
-               free = fbp->data;
-               ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+               free = fbp->b_addr;
+               ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
                 findex = fblk->index;
                 /*
                  * This means the free entry showed that the data block had
@@ -1525,16 +1524,15 @@ xfs_dir2_node_addname_int(
                          * This should be really rare, so there's no reason
                          * to avoid it.
                          */
-                       if ((error = xfs_da_read_buf(tp, dp,
-                                       xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
-                                       XFS_DATA_FORK))) {
+                       error = xfs_dir2_free_try_read(tp, dp,
+                                               xfs_dir2_db_to_da(mp, fbno),
+                                               &fbp);
+                       if (error)
                                 return error;
-                       }
-                       if (unlikely(fbp == NULL)) {
+                       if (!fbp)
                                 continue;
-                       }
-                       free = fbp->data;
-                       ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+                       free = fbp->b_addr;
+                       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
                         findex = 0;
                 }
                 /*
@@ -1551,7 +1549,7 @@ xfs_dir2_node_addname_int(
                                 /*
                                  * Drop the block.
                                  */
-                               xfs_da_brelse(tp, fbp);
+                               xfs_trans_brelse(tp, fbp);
                                 fbp = NULL;
                                 if (fblk && fblk->bp)
                                         fblk->bp = NULL;
@@ -1566,36 +1564,23 @@ xfs_dir2_node_addname_int(
                 /*
                  * Not allowed to allocate, return failure.
                  */
-               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
-                                                       args->total == 0) {
-                       /*
-                        * Drop the freespace buffer unless it came from our
-                        * caller.
-                        */
-                       if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-                               xfs_da_buf_done(fbp);
+               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
                         return XFS_ERROR(ENOSPC);
-               }
+
                 /*
                  * Allocate and initialize the new data block.
                  */
                 if (unlikely((error = xfs_dir2_grow_inode(args,
                                                          XFS_DIR2_DATA_SPACE,
                                                          &dbno)) ||
-                   (error = xfs_dir2_data_init(args, dbno, &dbp)))) {
-                       /*
-                        * Drop the freespace buffer unless it came from our
-                        * caller.
-                        */
-                       if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-                               xfs_da_buf_done(fbp);
+                   (error = xfs_dir2_data_init(args, dbno, &dbp))))
                         return error;
-               }
+
                 /*
                  * If (somehow) we have a freespace block, get rid of it.
                  */
                 if (fbp)
-                       xfs_da_brelse(tp, fbp);
+                       xfs_trans_brelse(tp, fbp);
                 if (fblk && fblk->bp)
                         fblk->bp = NULL;
  
@@ -1604,12 +1589,12 @@ xfs_dir2_node_addname_int(
                  * that was just allocated.
                  */
                 fbno = xfs_dir2_db_to_fdb(mp, dbno);
-               if (unlikely(error = xfs_da_read_buf(tp, dp,
-                               xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
-                               XFS_DATA_FORK))) {
-                       xfs_da_buf_done(dbp);
+               error = xfs_dir2_free_try_read(tp, dp,
+                                              xfs_dir2_db_to_da(mp, fbno),
+                                              &fbp);
+               if (error)
                         return error;
-               }
+
                 /*
                  * If there wasn't a freespace block, the read will
                  * return a NULL fbp.  Allocate and initialize a new one.
@@ -1621,26 +1606,22 @@ xfs_dir2_node_addname_int(
                         }
  
                         if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
-                               cmn_err(CE_ALERT,
-                                       "xfs_dir2_node_addname_int: dir ino "
-                                       "%llu needed freesp block %lld for\n"
-                                       "  data block %lld, got %lld\n"
-                                       "  ifbno %llu lastfbno %d\n",
-                                       (unsigned long long)dp->i_ino,
+                               xfs_alert(mp,
+                       "%s: dir ino %llu needed freesp block %lld for\n"
+                       "  data block %lld, got %lld ifbno %llu lastfbno %d",
+                                       __func__, (unsigned long long)dp->i_ino,
                                         (long long)xfs_dir2_db_to_fdb(mp, dbno),
                                         (long long)dbno, (long long)fbno,
                                         (unsigned long long)ifbno, lastfbno);
                                 if (fblk) {
-                                       cmn_err(CE_ALERT,
-                                               " fblk 0x%p blkno %llu "
-                                               "index %d magic 0x%x\n",
+                                       xfs_alert(mp,
+                               " fblk 0x%p blkno %llu index %d magic 0x%x",
                                                 fblk,
                                                 (unsigned long long)fblk->blkno,
                                                 fblk->index,
                                                 fblk->magic);
                                 } else {
-                                       cmn_err(CE_ALERT,
-                                               " ... fblk is NULL\n");
+                                       xfs_alert(mp, " ... fblk is NULL");
                                 }
                                 XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
                                                  XFS_ERRLEVEL_LOW, mp);
@@ -1650,27 +1631,27 @@ xfs_dir2_node_addname_int(
                         /*
                          * Get a buffer for the new block.
                          */
-                       if ((error = xfs_da_get_buf(tp, dp,
-                                                  xfs_dir2_db_to_da(mp, fbno),
-                                                  -1, &fbp, XFS_DATA_FORK))) {
+                       error = xfs_da_get_buf(tp, dp,
+                                              xfs_dir2_db_to_da(mp, fbno),
+                                              -1, &fbp, XFS_DATA_FORK);
+                       if (error)
                                 return error;
-                       }
-                       ASSERT(fbp != NULL);
+                       fbp->b_ops = &xfs_dir2_free_buf_ops;
  
                         /*
                          * Initialize the new block to be empty, and remember
                          * its first slot as our empty slot.
                          */
-                       free = fbp->data;
+                       free = fbp->b_addr;
                         free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
                         free->hdr.firstdb = cpu_to_be32(
                                 (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
-                               XFS_DIR2_MAX_FREE_BESTS(mp));
+                               xfs_dir2_free_max_bests(mp));
                         free->hdr.nvalid = 0;
                         free->hdr.nused = 0;
                 } else {
-                       free = fbp->data;
-                       ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+                       free = fbp->b_addr;
+                       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
                 }
  
                 /*
@@ -1682,7 +1663,7 @@ xfs_dir2_node_addname_int(
                  * freespace block, extend that table.
                  */
                 if (findex >= be32_to_cpu(free->hdr.nvalid)) {
-                       ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
+                       ASSERT(findex < xfs_dir2_free_max_bests(mp));
                         free->hdr.nvalid = cpu_to_be32(findex + 1);
                         /*
                          * Tag new entry so nused will go up.
@@ -1693,7 +1674,7 @@ xfs_dir2_node_addname_int(
                  * If this entry was for an empty data block
                  * (this should always be true) then update the header.
                  */
-               if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
+               if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) {
                         be32_add_cpu(&free->hdr.nused, 1);
                         xfs_dir2_free_log_header(tp, fbp);
                 }
@@ -1702,8 +1683,8 @@ xfs_dir2_node_addname_int(
                  * We haven't allocated the data entry yet so this will
                  * change again.
                  */
-               data = dbp->data;
-               free->bests[findex] = data->hdr.bestfree[0].length;
+               hdr = dbp->b_addr;
+               free->bests[findex] = hdr->bestfree[0].length;
                 logfree = 1;
         }
         /*
@@ -1713,36 +1694,31 @@ xfs_dir2_node_addname_int(
                 /*
                  * If just checking, we succeeded.
                  */
-               if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
-                       if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-                               xfs_da_buf_done(fbp);
+               if (args->op_flags & XFS_DA_OP_JUSTCHECK)
                         return 0;
-               }
+
                 /*
                  * Read the data block in.
                  */
-               if (unlikely(
-                   error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
-                               -1, &dbp, XFS_DATA_FORK))) {
-                       if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-                               xfs_da_buf_done(fbp);
+               error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno),
+                                          -1, &dbp);
+               if (error)
                         return error;
-               }
-               data = dbp->data;
+               hdr = dbp->b_addr;
                 logfree = 0;
         }
-       ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length);
+       ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length);
         /*
          * Point to the existing unused space.
          */
         dup = (xfs_dir2_data_unused_t *)
-             ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
+             ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
         needscan = needlog = 0;
         /*
          * Mark the first part of the unused space, inuse for us.
          */
         xfs_dir2_data_use_free(tp, dbp, dup,
-               (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
                 &needlog, &needscan);
         /*
          * Fill in the new entry and log it.
@@ -1752,13 +1728,13 @@ xfs_dir2_node_addname_int(
         dep->namelen = args->namelen;
         memcpy(dep->name, args->name, dep->namelen);
         tagp = xfs_dir2_data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)data);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
         xfs_dir2_data_log_entry(tp, dbp, dep);
         /*
          * Rescan the block for bestfree if needed.
          */
         if (needscan)
-               xfs_dir2_data_freescan(mp, data, &needlog);
+               xfs_dir2_data_freescan(mp, hdr, &needlog);
         /*
          * Log the data block header if needed.
          */
@@ -1767,8 +1743,8 @@ xfs_dir2_node_addname_int(
         /*
          * If the freespace entry is now wrong, update it.
          */
-       if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
-               free->bests[findex] = data->hdr.bestfree[0].length;
+       if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) {
+               free->bests[findex] = hdr->bestfree[0].length;
                 logfree = 1;
         }
         /*
@@ -1776,17 +1752,11 @@ xfs_dir2_node_addname_int(
          */
         if (logfree)
                 xfs_dir2_free_log_bests(tp, fbp, findex, findex);
-       /*
-        * If the caller didn't hand us the freespace block, drop it.
-        */
-       if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
-               xfs_da_buf_done(fbp);
         /*
          * Return the data block and offset in args, then drop the data block.
          */
         args->blkno = (xfs_dablk_t)dbno;
         args->index = be16_to_cpu(*tagp);
-       xfs_da_buf_done(dbp);
         return 0;
  }
  
@@ -1824,22 +1794,23 @@ xfs_dir2_node_lookup(
                 /* If a CI match, dup the actual name and return EEXIST */
                 xfs_dir2_data_entry_t   *dep;
  
-               dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
-                                               data + state->extrablk.index);
+               dep = (xfs_dir2_data_entry_t *)
+                       ((char *)state->extrablk.bp->b_addr +
+                                                state->extrablk.index);
                 rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
         }
         /*
          * Release the btree blocks and leaf block.
          */
         for (i = 0; i < state->path.active; i++) {
-               xfs_da_brelse(args->trans, state->path.blk[i].bp);
+               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
                 state->path.blk[i].bp = NULL;
         }
         /*
          * Release the data block if we have it.
          */
         if (state->extravalid && state->extrablk.bp) {
-               xfs_da_brelse(args->trans, state->extrablk.bp);
+               xfs_trans_brelse(args->trans, state->extrablk.bp);
                 state->extrablk.bp = NULL;
         }
         xfs_da_state_free(state);
@@ -1918,7 +1889,7 @@ xfs_dir2_node_replace(
         xfs_da_args_t           *args)          /* operation arguments */
  {
         xfs_da_state_blk_t      *blk;           /* leaf block */
-       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
         xfs_dir2_data_entry_t   *dep;           /* data entry changed */
         int                     error;          /* error return value */
         int                     i;              /* btree level */
@@ -1956,16 +1927,16 @@ xfs_dir2_node_replace(
                  */
                 blk = &state->path.blk[state->path.active - 1];
                 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
-               leaf = blk->bp->data;
+               leaf = blk->bp->b_addr;
                 lep = &leaf->ents[blk->index];
                 ASSERT(state->extravalid);
                 /*
                  * Point to the data entry.
                  */
-               data = state->extrablk.bp->data;
-               ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+               hdr = state->extrablk.bp->b_addr;
+               ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
                 dep = (xfs_dir2_data_entry_t *)
-                     ((char *)data +
+                     ((char *)hdr +
                        xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
                 ASSERT(inum != be64_to_cpu(dep->inumber));
                 /*
@@ -1979,14 +1950,14 @@ xfs_dir2_node_replace(
          * Didn't find it, and we're holding a data block.  Drop it.
          */
         else if (state->extravalid) {
-               xfs_da_brelse(args->trans, state->extrablk.bp);
+               xfs_trans_brelse(args->trans, state->extrablk.bp);
                 state->extrablk.bp = NULL;
         }
         /*
          * Release all the buffers in the cursor.
          */
         for (i = 0; i < state->path.active; i++) {
-               xfs_da_brelse(args->trans, state->path.blk[i].bp);
+               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
                 state->path.blk[i].bp = NULL;
         }
         xfs_da_state_free(state);
@@ -2003,7 +1974,7 @@ xfs_dir2_node_trim_free(
         xfs_fileoff_t           fo,             /* free block number */
         int                     *rvalp)         /* out: did something */
  {
-       xfs_dabuf_t             *bp;            /* freespace buffer */
+       struct xfs_buf          *bp;            /* freespace buffer */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     error;          /* error return code */
         xfs_dir2_free_t         *free;          /* freespace structure */
@@ -2016,25 +1987,22 @@ xfs_dir2_node_trim_free(
         /*
          * Read the freespace block.
          */
-       if (unlikely(error = xfs_da_read_buf(tp, dp, (xfs_dablk_t)fo, -2, &bp,
-                       XFS_DATA_FORK))) {
+       error = xfs_dir2_free_try_read(tp, dp, fo, &bp);
+       if (error)
                 return error;
-       }
-
         /*
          * There can be holes in freespace.  If fo is a hole, there's
          * nothing to do.
          */
-       if (bp == NULL) {
+       if (!bp)
                 return 0;
-       }
-       free = bp->data;
-       ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+       free = bp->b_addr;
+       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
         /*
          * If there are used entries, there's nothing to do.
          */
         if (be32_to_cpu(free->hdr.nused) > 0) {
-               xfs_da_brelse(tp, bp);
+               xfs_trans_brelse(tp, bp);
                 *rvalp = 0;
                 return 0;
         }
@@ -2050,7 +2018,7 @@ xfs_dir2_node_trim_free(
                  * pieces.  This is the last block of an extent.
                  */
                 ASSERT(error != ENOSPC);
-               xfs_da_brelse(tp, bp);
+               xfs_trans_brelse(tp, bp);
                 return error;
         }
         /*
diff --git a/libxfs/xfs_dir2_priv.h b/libxfs/xfs_dir2_priv.h

new file mode 100644 (file)

index 0000000..7da79f6
--- /dev/null
+++ b/libxfs/xfs_dir2_priv.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_PRIV_H__
+#define __XFS_DIR2_PRIV_H__
+
+/* xfs_dir2.c */
+extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+                               xfs_dir2_db_t *dbp);
+extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+                               struct xfs_buf *bp);
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
+                               const unsigned char *name, int len);
+
+/* xfs_dir2_block.c */
+extern const struct xfs_buf_ops xfs_dir2_block_buf_ops;
+
+extern int xfs_dir2_block_addname(struct xfs_da_args *args);
+extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
+               xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_block_removename(struct xfs_da_args *args);
+extern int xfs_dir2_block_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
+               struct xfs_buf *lbp, struct xfs_buf *dbp);
+
+/* xfs_dir2_data.c */
+#ifdef DEBUG
+#define        xfs_dir2_data_check(dp,bp) __xfs_dir2_data_check(dp, bp);
+#else
+#define        xfs_dir2_data_check(dp,bp)
+#endif
+
+extern const struct xfs_buf_ops xfs_dir2_data_buf_ops;
+
+extern int __xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
+extern int xfs_dir2_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
+               xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
+extern int xfs_dir2_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
+               xfs_dablk_t bno, xfs_daddr_t mapped_bno);
+
+extern struct xfs_dir2_data_free *
+xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
+               struct xfs_dir2_data_unused *dup, int *loghead);
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
+               struct xfs_dir2_data_hdr *hdr, int *loghead);
+extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
+               struct xfs_buf **bpp);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
+               struct xfs_dir2_data_entry *dep);
+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
+               struct xfs_buf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
+               struct xfs_dir2_data_unused *dup);
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
+               xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
+               int *needlogp, int *needscanp);
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
+               struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
+               xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
+
+/* xfs_dir2_leaf.c */
+extern const struct xfs_buf_ops xfs_dir2_leafn_buf_ops;
+
+extern int xfs_dir2_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
+               xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
+extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
+               struct xfs_buf *dbp);
+extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
+extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
+               struct xfs_buf *bp);
+extern void xfs_dir2_leaf_compact_x1(struct xfs_buf *bp, int *indexp,
+               int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
+extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
+               size_t bufsize, xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
+               struct xfs_buf **bpp, int magic);
+extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
+               int first, int last);
+extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
+               struct xfs_buf *bp);
+extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
+               struct xfs_buf *lbp);
+extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
+               struct xfs_buf *lbp, xfs_dir2_db_t db);
+extern struct xfs_dir2_leaf_entry *
+xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact,
+               int lowstale, int highstale,
+               int *lfloglow, int *lfloghigh);
+extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
+
+/* xfs_dir2_node.c */
+extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
+               struct xfs_buf *lbp);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count);
+extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
+               struct xfs_da_args *args, int *indexp,
+               struct xfs_da_state *state);
+extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp,
+               struct xfs_buf *leaf2_bp);
+extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
+       struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
+extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
+extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
+               struct xfs_da_state_blk *drop_blk,
+               struct xfs_da_state_blk *save_blk);
+extern int xfs_dir2_node_addname(struct xfs_da_args *args);
+extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_node_removename(struct xfs_da_args *args);
+extern int xfs_dir2_node_replace(struct xfs_da_args *args);
+extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
+               int *rvalp);
+extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
+               xfs_dablk_t fbno, struct xfs_buf **bpp);
+
+/* xfs_dir2_sf.c */
+extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
+extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
+               struct xfs_dir2_sf_entry *sfep);
+extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
+               struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
+extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
+               int size, xfs_dir2_sf_hdr_t *sfhp);
+extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
+extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
+extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
+               xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+
+#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/libxfs/xfs_dir2_sf.c b/libxfs/xfs_dir2_sf.c

index 6b5e6d455bd4d6e7cc75cdba82fc541335e03ff8..a96be7695f09229d89814954917c7d0aa6c1df03 100644 (file)
--- a/libxfs/xfs_dir2_sf.c
+++ b/libxfs/xfs_dir2_sf.c
@@ -40,6 +40,82 @@ static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
  static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
  #endif /* XFS_BIG_INUMS */
  
+/*
+ * Inode numbers in short-form directories can come in two versions,
+ * either 4 bytes or 8 bytes wide.  These helpers deal with the
+ * two forms transparently by looking at the headers i8count field.
+ *
+ * For 64-bit inode number the most significant byte must be zero.
+ */
+static xfs_ino_t
+xfs_dir2_sf_get_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       xfs_dir2_inou_t         *from)
+{
+       if (hdr->i8count)
+               return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
+       else
+               return get_unaligned_be32(&from->i4.i);
+}
+
+static void
+xfs_dir2_sf_put_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       xfs_dir2_inou_t         *to,
+       xfs_ino_t               ino)
+{
+       ASSERT((ino & 0xff00000000000000ULL) == 0);
+
+       if (hdr->i8count)
+               put_unaligned_be64(ino, &to->i8.i);
+       else
+               put_unaligned_be32(ino, &to->i4.i);
+}
+
+xfs_ino_t
+xfs_dir2_sf_get_parent_ino(
+       struct xfs_dir2_sf_hdr  *hdr)
+{
+       return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
+}
+
+void
+xfs_dir2_sf_put_parent_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       xfs_ino_t               ino)
+{
+       xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
+}
+
+/*
+ * In short-form directory entries the inode numbers are stored at variable
+ * offset behind the entry name.  The inode numbers may only be accessed
+ * through the helpers below.
+ */
+static xfs_dir2_inou_t *
+xfs_dir2_sfe_inop(
+       struct xfs_dir2_sf_entry *sfep)
+{
+       return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen];
+}
+
+xfs_ino_t
+xfs_dir2_sfe_get_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       struct xfs_dir2_sf_entry *sfep)
+{
+       return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep));
+}
+
+void
+xfs_dir2_sfe_put_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       struct xfs_dir2_sf_entry *sfep,
+       xfs_ino_t               ino)
+{
+       xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino);
+}
+
  /*
   * Given a block directory (dp/block), calculate its size as a shortform (sf)
   * directory and a header for the sf directory, if it will fit it the
@@ -49,7 +125,7 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
  int                                            /* size for sf form */
  xfs_dir2_block_sfsize(
         xfs_inode_t             *dp,            /* incore inode pointer */
-       xfs_dir2_block_t        *block,         /* block directory data */
+       xfs_dir2_data_hdr_t     *hdr,           /* block directory data */
         xfs_dir2_sf_hdr_t       *sfhp)          /* output: header for sf form */
  {
         xfs_dir2_dataptr_t      addr;           /* data entry address */
@@ -69,7 +145,7 @@ xfs_dir2_block_sfsize(
         mp = dp->i_mount;
  
         count = i8count = namelen = 0;
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, hdr);
         blp = xfs_dir2_block_leaf_p(btp);
  
         /*
@@ -82,7 +158,7 @@ xfs_dir2_block_sfsize(
                  * Calculate the pointer to the entry at hand.
                  */
                 dep = (xfs_dir2_data_entry_t *)
-                     ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
+                     ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
                 /*
                  * Detect . and .., so we can special-case them.
                  * . is not included in sf directories.
@@ -119,7 +195,7 @@ xfs_dir2_block_sfsize(
          */
         sfhp->count = count;
         sfhp->i8count = i8count;
-       xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
+       xfs_dir2_sf_put_parent_ino(sfhp, parent);
         return size;
  }
  
@@ -130,11 +206,11 @@ xfs_dir2_block_sfsize(
  int                                            /* error */
  xfs_dir2_block_to_sf(
         xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dabuf_t             *bp,            /* block buffer */
+       struct xfs_buf          *bp,
         int                     size,           /* shortform directory size */
         xfs_dir2_sf_hdr_t       *sfhp)          /* shortform directory hdr */
  {
-       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
         xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
         xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
         xfs_inode_t             *dp;            /* incore directory inode */
@@ -145,8 +221,7 @@ xfs_dir2_block_to_sf(
         xfs_mount_t             *mp;            /* filesystem mount point */
         char                    *ptr;           /* current data pointer */
         xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
-       xfs_ino_t               temp;
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform directory header */
  
         trace_xfs_dir2_block_to_sf(args);
  
@@ -157,13 +232,14 @@ xfs_dir2_block_to_sf(
          * Make a copy of the block data, so we can shrink the inode
          * and add local data.
          */
-       block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
-       memcpy(block, bp->data, mp->m_dirblksize);
+       hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
+       memcpy(hdr, bp->b_addr, mp->m_dirblksize);
         logflags = XFS_ILOG_CORE;
         if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
                 ASSERT(error != ENOSPC);
                 goto out;
         }
+
         /*
          * The buffer is now unconditionally gone, whether
          * xfs_dir2_shrink_inode worked or not.
@@ -179,14 +255,14 @@ xfs_dir2_block_to_sf(
         /*
          * Copy the header into the newly allocate local space.
          */
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
         dp->i_d.di_size = size;
         /*
          * Set up to loop over the block's entries.
          */
-       btp = xfs_dir2_block_tail_p(mp, block);
-       ptr = (char *)block->u;
+       btp = xfs_dir2_block_tail_p(mp, hdr);
+       ptr = (char *)(hdr + 1);
         endptr = (char *)xfs_dir2_block_leaf_p(btp);
         sfep = xfs_dir2_sf_firstentry(sfp);
         /*
@@ -214,7 +290,7 @@ xfs_dir2_block_to_sf(
                 else if (dep->namelen == 2 &&
                          dep->name[0] == '.' && dep->name[1] == '.')
                         ASSERT(be64_to_cpu(dep->inumber) ==
-                              xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
+                              xfs_dir2_sf_get_parent_ino(sfp));
                 /*
                  * Normal entry, copy it into shortform.
                  */
@@ -222,11 +298,11 @@ xfs_dir2_block_to_sf(
                         sfep->namelen = dep->namelen;
                         xfs_dir2_sf_put_offset(sfep,
                                 (xfs_dir2_data_aoff_t)
-                               ((char *)dep - (char *)block));
+                               ((char *)dep - (char *)hdr));
                         memcpy(sfep->name, dep->name, dep->namelen);
-                       temp = be64_to_cpu(dep->inumber);
-                       xfs_dir2_sf_put_inumber(sfp, &temp,
-                               xfs_dir2_sf_inumberp(sfep));
+                       xfs_dir2_sfe_put_ino(sfp, sfep,
+                                            be64_to_cpu(dep->inumber));
+
                         sfep = xfs_dir2_sf_nextentry(sfp, sfep);
                 }
                 ptr += xfs_dir2_data_entsize(dep->namelen);
@@ -235,7 +311,7 @@ xfs_dir2_block_to_sf(
         xfs_dir2_sf_check(args);
  out:
         xfs_trans_log_inode(args->trans, dp, logflags);
-       kmem_free(block);
+       kmem_free(hdr);
         return error;
  }
  
@@ -258,7 +334,7 @@ xfs_dir2_sf_addname(
         xfs_dir2_data_aoff_t    offset = 0;     /* offset for new entry */
         int                     old_isize;      /* di_size before adding name */
         int                     pick;           /* which algorithm to use */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
         xfs_dir2_sf_entry_t     *sfep = NULL;   /* shortform entry */
  
         trace_xfs_dir2_sf_addname(args);
@@ -275,19 +351,19 @@ xfs_dir2_sf_addname(
         }
         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
         ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
         /*
          * Compute entry (and change in) size.
          */
-       add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
+       add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
         incr_isize = add_entsize;
         objchange = 0;
  #if XFS_BIG_INUMS
         /*
          * Do we have to change to 8 byte inodes?
          */
-       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
                 /*
                  * Yes, adjust the entry size and the total size.
                  */
@@ -295,7 +371,7 @@ xfs_dir2_sf_addname(
                         (uint)sizeof(xfs_dir2_ino8_t) -
                         (uint)sizeof(xfs_dir2_ino4_t);
                 incr_isize +=
-                       (sfp->hdr.count + 2) *
+                       (sfp->count + 2) *
                         ((uint)sizeof(xfs_dir2_ino8_t) -
                          (uint)sizeof(xfs_dir2_ino4_t));
                 objchange = 1;
@@ -365,21 +441,21 @@ xfs_dir2_sf_addname_easy(
  {
         int                     byteoff;        /* byte offset in sf dir */
         xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
  
         dp = args->dp;
  
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         byteoff = (int)((char *)sfep - (char *)sfp);
         /*
          * Grow the in-inode space.
          */
-       xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
+       xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen),
                 XFS_DATA_FORK);
         /*
          * Need to set up again due to realloc of the inode data.
          */
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
         /*
          * Fill in the new entry.
@@ -387,15 +463,14 @@ xfs_dir2_sf_addname_easy(
         sfep->namelen = args->namelen;
         xfs_dir2_sf_put_offset(sfep, offset);
         memcpy(sfep->name, args->name, sfep->namelen);
-       xfs_dir2_sf_put_inumber(sfp, &args->inumber,
-               xfs_dir2_sf_inumberp(sfep));
+       xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
         /*
          * Update the header and inode.
          */
-       sfp->hdr.count++;
+       sfp->count++;
  #if XFS_BIG_INUMS
         if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
-               sfp->hdr.i8count++;
+               sfp->i8count++;
  #endif
         dp->i_d.di_size = new_isize;
         xfs_dir2_sf_check(args);
@@ -425,19 +500,19 @@ xfs_dir2_sf_addname_hard(
         xfs_dir2_data_aoff_t    offset;         /* current offset value */
         int                     old_isize;      /* previous di_size */
         xfs_dir2_sf_entry_t     *oldsfep;       /* entry in original dir */
-       xfs_dir2_sf_t           *oldsfp;        /* original shortform dir */
+       xfs_dir2_sf_hdr_t       *oldsfp;        /* original shortform dir */
         xfs_dir2_sf_entry_t     *sfep;          /* entry in new dir */
-       xfs_dir2_sf_t           *sfp;           /* new shortform dir */
+       xfs_dir2_sf_hdr_t       *sfp;           /* new shortform dir */
  
         /*
          * Copy the old directory to the stack buffer.
          */
         dp = args->dp;
  
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         old_isize = (int)dp->i_d.di_size;
         buf = kmem_alloc(old_isize, KM_SLEEP);
-       oldsfp = (xfs_dir2_sf_t *)buf;
+       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
         memcpy(oldsfp, sfp, old_isize);
         /*
          * Loop over the old directory finding the place we're going
@@ -466,7 +541,7 @@ xfs_dir2_sf_addname_hard(
         /*
          * Reset the pointer since the buffer was reallocated.
          */
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         /*
          * Copy the first part of the directory, including the header.
          */
@@ -479,12 +554,11 @@ xfs_dir2_sf_addname_hard(
         sfep->namelen = args->namelen;
         xfs_dir2_sf_put_offset(sfep, offset);
         memcpy(sfep->name, args->name, sfep->namelen);
-       xfs_dir2_sf_put_inumber(sfp, &args->inumber,
-               xfs_dir2_sf_inumberp(sfep));
-       sfp->hdr.count++;
+       xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
+       sfp->count++;
  #if XFS_BIG_INUMS
         if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
-               sfp->hdr.i8count++;
+               sfp->i8count++;
  #endif
         /*
          * If there's more left to copy, do that.
@@ -518,14 +592,14 @@ xfs_dir2_sf_addname_pick(
         xfs_mount_t             *mp;            /* filesystem mount point */
         xfs_dir2_data_aoff_t    offset;         /* data block offset */
         xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
         int                     size;           /* entry's data size */
         int                     used;           /* data bytes used */
  
         dp = args->dp;
         mp = dp->i_mount;
  
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         size = xfs_dir2_data_entsize(args->namelen);
         offset = XFS_DIR2_DATA_FIRST_OFFSET;
         sfep = xfs_dir2_sf_firstentry(sfp);
@@ -535,7 +609,7 @@ xfs_dir2_sf_addname_pick(
          * Keep track of data offset and whether we've seen a place
          * to insert the new entry.
          */
-       for (i = 0; i < sfp->hdr.count; i++) {
+       for (i = 0; i < sfp->count; i++) {
                 if (!holefit)
                         holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
                 offset = xfs_dir2_sf_get_offset(sfep) +
@@ -547,7 +621,7 @@ xfs_dir2_sf_addname_pick(
          * was a data block (block form directory).
          */
         used = offset +
-              (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+              (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
                (uint)sizeof(xfs_dir2_block_tail_t);
         /*
          * If it won't fit in a block form then we can't insert it,
@@ -593,30 +667,30 @@ xfs_dir2_sf_check(
         xfs_ino_t               ino;            /* entry inode number */
         int                     offset;         /* data offset */
         xfs_dir2_sf_entry_t     *sfep;          /* shortform dir entry */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
  
         dp = args->dp;
  
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         offset = XFS_DIR2_DATA_FIRST_OFFSET;
-       ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+       ino = xfs_dir2_sf_get_parent_ino(sfp);
         i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
  
         for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-            i < sfp->hdr.count;
+            i < sfp->count;
              i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                 ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
-               ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+               ino = xfs_dir2_sfe_get_ino(sfp, sfep);
                 i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
                 offset =
                         xfs_dir2_sf_get_offset(sfep) +
                         xfs_dir2_data_entsize(sfep->namelen);
         }
-       ASSERT(i8count == sfp->hdr.i8count);
+       ASSERT(i8count == sfp->i8count);
         ASSERT(XFS_BIG_INUMS || i8count == 0);
         ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
         ASSERT(offset +
-              (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+              (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
                (uint)sizeof(xfs_dir2_block_tail_t) <=
                dp->i_mount->m_dirblksize);
  }
@@ -632,7 +706,7 @@ xfs_dir2_sf_create(
  {
         xfs_inode_t     *dp;            /* incore directory inode */
         int             i8count;        /* parent inode is an 8-byte number */
-       xfs_dir2_sf_t   *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t *sfp;         /* shortform structure */
         int             size;           /* directory size */
  
         trace_xfs_dir2_sf_create(args);
@@ -662,13 +736,13 @@ xfs_dir2_sf_create(
         /*
          * Fill in the header,
          */
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       sfp->hdr.i8count = i8count;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       sfp->i8count = i8count;
         /*
          * Now can put in the inode number, since i8count is set.
          */
-       xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
-       sfp->hdr.count = 0;
+       xfs_dir2_sf_put_parent_ino(sfp, pino);
+       sfp->count = 0;
         dp->i_d.di_size = size;
         xfs_dir2_sf_check(args);
         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -687,7 +761,7 @@ xfs_dir2_sf_lookup(
         int                     i;              /* entry index */
         int                     error;
         xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
         enum xfs_dacmp          cmp;            /* comparison result */
         xfs_dir2_sf_entry_t     *ci_sfep;       /* case-insens. entry */
  
@@ -706,8 +780,8 @@ xfs_dir2_sf_lookup(
         }
         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
         ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
         /*
          * Special case for .
          */
@@ -721,7 +795,7 @@ xfs_dir2_sf_lookup(
          */
         if (args->namelen == 2 &&
             args->name[0] == '.' && args->name[1] == '.') {
-               args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+               args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
                 args->cmpresult = XFS_CMP_EXACT;
                 return XFS_ERROR(EEXIST);
         }
@@ -729,7 +803,7 @@ xfs_dir2_sf_lookup(
          * Loop over all the entries trying to match ours.
          */
         ci_sfep = NULL;
-       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
                                 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                 /*
                  * Compare name and if it's an exact match, return the inode
@@ -740,8 +814,7 @@ xfs_dir2_sf_lookup(
                                                                 sfep->namelen);
                 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
                         args->cmpresult = cmp;
-                       args->inumber = xfs_dir2_sf_get_inumber(sfp,
-                                               xfs_dir2_sf_inumberp(sfep));
+                       args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep);
                         if (cmp == XFS_CMP_EXACT)
                                 return XFS_ERROR(EEXIST);
                         ci_sfep = sfep;
@@ -773,7 +846,7 @@ xfs_dir2_sf_removename(
         int                     newsize;        /* new inode size */
         int                     oldsize;        /* old inode size */
         xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
  
         trace_xfs_dir2_sf_removename(args);
  
@@ -790,32 +863,31 @@ xfs_dir2_sf_removename(
         }
         ASSERT(dp->i_df.if_bytes == oldsize);
         ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
         /*
          * Loop over the old directory entries.
          * Find the one we're deleting.
          */
-       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
                                 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
                                                                 XFS_CMP_EXACT) {
-                       ASSERT(xfs_dir2_sf_get_inumber(sfp,
-                                               xfs_dir2_sf_inumberp(sfep)) ==
-                                                               args->inumber);
+                       ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) ==
+                              args->inumber);
                         break;
                 }
         }
         /*
          * Didn't find it.
          */
-       if (i == sfp->hdr.count)
+       if (i == sfp->count)
                 return XFS_ERROR(ENOENT);
         /*
          * Calculate sizes.
          */
         byteoff = (int)((char *)sfep - (char *)sfp);
-       entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
+       entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
         newsize = oldsize - entsize;
         /*
          * Copy the part if any after the removed entry, sliding it down.
@@ -826,22 +898,22 @@ xfs_dir2_sf_removename(
         /*
          * Fix up the header and file size.
          */
-       sfp->hdr.count--;
+       sfp->count--;
         dp->i_d.di_size = newsize;
         /*
          * Reallocate, making it smaller.
          */
         xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
  #if XFS_BIG_INUMS
         /*
          * Are we changing inode number size?
          */
         if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
-               if (sfp->hdr.i8count == 1)
+               if (sfp->i8count == 1)
                         xfs_dir2_sf_toino4(args);
                 else
-                       sfp->hdr.i8count--;
+                       sfp->i8count--;
         }
  #endif
         xfs_dir2_sf_check(args);
@@ -865,7 +937,7 @@ xfs_dir2_sf_replace(
         int                     i8elevated;     /* sf_toino8 set i8count=1 */
  #endif
         xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
  
         trace_xfs_dir2_sf_replace(args);
  
@@ -881,19 +953,19 @@ xfs_dir2_sf_replace(
         }
         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
         ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
  #if XFS_BIG_INUMS
         /*
          * New inode number is large, and need to convert to 8-byte inodes.
          */
-       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
                 int     error;                  /* error return value */
                 int     newsize;                /* new inode size */
  
                 newsize =
                         dp->i_df.if_bytes +
-                       (sfp->hdr.count + 1) *
+                       (sfp->count + 1) *
                         ((uint)sizeof(xfs_dir2_ino8_t) -
                          (uint)sizeof(xfs_dir2_ino4_t));
                 /*
@@ -911,7 +983,7 @@ xfs_dir2_sf_replace(
                  */
                 xfs_dir2_sf_toino8(args);
                 i8elevated = 1;
-               sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+               sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         } else
                 i8elevated = 0;
  #endif
@@ -922,34 +994,32 @@ xfs_dir2_sf_replace(
         if (args->namelen == 2 &&
             args->name[0] == '.' && args->name[1] == '.') {
  #if XFS_BIG_INUMS || defined(DEBUG)
-               ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+               ino = xfs_dir2_sf_get_parent_ino(sfp);
                 ASSERT(args->inumber != ino);
  #endif
-               xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
+               xfs_dir2_sf_put_parent_ino(sfp, args->inumber);
         }
         /*
          * Normal entry, look for the name.
          */
         else {
                 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-                               i < sfp->hdr.count;
+                               i < sfp->count;
                                 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                         if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
                                                                 XFS_CMP_EXACT) {
  #if XFS_BIG_INUMS || defined(DEBUG)
-                               ino = xfs_dir2_sf_get_inumber(sfp,
-                                       xfs_dir2_sf_inumberp(sfep));
+                               ino = xfs_dir2_sfe_get_ino(sfp, sfep);
                                 ASSERT(args->inumber != ino);
  #endif
-                               xfs_dir2_sf_put_inumber(sfp, &args->inumber,
-                                       xfs_dir2_sf_inumberp(sfep));
+                               xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
                                 break;
                         }
                 }
                 /*
                  * Didn't find it.
                  */
-               if (i == sfp->hdr.count) {
+               if (i == sfp->count) {
                         ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
  #if XFS_BIG_INUMS
                         if (i8elevated)
@@ -967,10 +1037,10 @@ xfs_dir2_sf_replace(
                 /*
                  * And the old count was one, so need to convert to small.
                  */
-               if (sfp->hdr.i8count == 1)
+               if (sfp->i8count == 1)
                         xfs_dir2_sf_toino4(args);
                 else
-                       sfp->hdr.i8count--;
+                       sfp->i8count--;
         }
         /*
          * See if the old number was small, the new number is large.
@@ -981,9 +1051,9 @@ xfs_dir2_sf_replace(
                  * add to the i8count unless we just converted to 8-byte
                  * inodes (which does an implied i8count = 1)
                  */
-               ASSERT(sfp->hdr.i8count != 0);
+               ASSERT(sfp->i8count != 0);
                 if (!i8elevated)
-                       sfp->hdr.i8count++;
+                       sfp->i8count++;
         }
  #endif
         xfs_dir2_sf_check(args);
@@ -1003,13 +1073,12 @@ xfs_dir2_sf_toino4(
         char                    *buf;           /* old dir's buffer */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     i;              /* entry index */
-       xfs_ino_t               ino;            /* entry inode number */
         int                     newsize;        /* new inode size */
         xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
-       xfs_dir2_sf_t           *oldsfp;        /* old sf directory */
+       xfs_dir2_sf_hdr_t       *oldsfp;        /* old sf directory */
         int                     oldsize;        /* old inode size */
         xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
-       xfs_dir2_sf_t           *sfp;           /* new sf directory */
+       xfs_dir2_sf_hdr_t       *sfp;           /* new sf directory */
  
         trace_xfs_dir2_sf_toino4(args);
  
@@ -1022,44 +1091,42 @@ xfs_dir2_sf_toino4(
          */
         oldsize = dp->i_df.if_bytes;
         buf = kmem_alloc(oldsize, KM_SLEEP);
-       oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       ASSERT(oldsfp->hdr.i8count == 1);
+       oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsfp->i8count == 1);
         memcpy(buf, oldsfp, oldsize);
         /*
          * Compute the new inode size.
          */
         newsize =
                 oldsize -
-               (oldsfp->hdr.count + 1) *
+               (oldsfp->count + 1) *
                 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
         xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
         xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
         /*
          * Reset our pointers, the data has moved.
          */
-       oldsfp = (xfs_dir2_sf_t *)buf;
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         /*
          * Fill in the new header.
          */
-       sfp->hdr.count = oldsfp->hdr.count;
-       sfp->hdr.i8count = 0;
-       ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
-       xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
+       sfp->count = oldsfp->count;
+       sfp->i8count = 0;
+       xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
         /*
          * Copy the entries field by field.
          */
         for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
                     oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-            i < sfp->hdr.count;
+            i < sfp->count;
              i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
                   oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
                 sfep->namelen = oldsfep->namelen;
                 sfep->offset = oldsfep->offset;
                 memcpy(sfep->name, oldsfep->name, sfep->namelen);
-               ino = xfs_dir2_sf_get_inumber(oldsfp,
-                       xfs_dir2_sf_inumberp(oldsfep));
-               xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
+               xfs_dir2_sfe_put_ino(sfp, sfep,
+                       xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
         }
         /*
          * Clean up the inode.
@@ -1081,13 +1148,12 @@ xfs_dir2_sf_toino8(
         char                    *buf;           /* old dir's buffer */
         xfs_inode_t             *dp;            /* incore directory inode */
         int                     i;              /* entry index */
-       xfs_ino_t               ino;            /* entry inode number */
         int                     newsize;        /* new inode size */
         xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
-       xfs_dir2_sf_t           *oldsfp;        /* old sf directory */
+       xfs_dir2_sf_hdr_t       *oldsfp;        /* old sf directory */
         int                     oldsize;        /* old inode size */
         xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
-       xfs_dir2_sf_t           *sfp;           /* new sf directory */
+       xfs_dir2_sf_hdr_t       *sfp;           /* new sf directory */
  
         trace_xfs_dir2_sf_toino8(args);
  
@@ -1100,44 +1166,42 @@ xfs_dir2_sf_toino8(
          */
         oldsize = dp->i_df.if_bytes;
         buf = kmem_alloc(oldsize, KM_SLEEP);
-       oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-       ASSERT(oldsfp->hdr.i8count == 0);
+       oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsfp->i8count == 0);
         memcpy(buf, oldsfp, oldsize);
         /*
          * Compute the new inode size.
          */
         newsize =
                 oldsize +
-               (oldsfp->hdr.count + 1) *
+               (oldsfp->count + 1) *
                 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
         xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
         xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
         /*
          * Reset our pointers, the data has moved.
          */
-       oldsfp = (xfs_dir2_sf_t *)buf;
-       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
         /*
          * Fill in the new header.
          */
-       sfp->hdr.count = oldsfp->hdr.count;
-       sfp->hdr.i8count = 1;
-       ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
-       xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
+       sfp->count = oldsfp->count;
+       sfp->i8count = 1;
+       xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
         /*
          * Copy the entries field by field.
          */
         for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
                     oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-            i < sfp->hdr.count;
+            i < sfp->count;
              i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
                   oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
                 sfep->namelen = oldsfep->namelen;
                 sfep->offset = oldsfep->offset;
                 memcpy(sfep->name, oldsfep->name, sfep->namelen);
-               ino = xfs_dir2_sf_get_inumber(oldsfp,
-                       xfs_dir2_sf_inumberp(oldsfep));
-               xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
+               xfs_dir2_sfe_put_ino(sfp, sfep,
+                       xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
         }
         /*
          * Clean up the inode.
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c

index 1fcafb6940327678c34bd3393c2c518c4e70b619..529d92d4760fd78f2d12de713bb9e733fb33eaa3 100644 (file)
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -131,7 +131,7 @@ xfs_check_agi_freecount(
  /*
   * Initialise a new set of inodes.
   */
-STATIC void
+STATIC int
  xfs_ialloc_inode_init(
         struct xfs_mount        *mp,
         struct xfs_trans        *tp,
@@ -182,10 +182,9 @@ xfs_ialloc_inode_init(
                 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
                 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
                                          mp->m_bsize * blks_per_cluster,
-                                        XBF_LOCK);
-               ASSERT(fbuf);
-               ASSERT(!XFS_BUF_GETERROR(fbuf));
-
+                                        XBF_UNMAPPED);
+               if (!fbuf)
+                       return ENOMEM;
                 /*
                  * Initialize all inodes in this buffer and then log them.
                  *
@@ -193,6 +192,7 @@ xfs_ialloc_inode_init(
                  *      to log a whole cluster of inodes instead of all the
                  *      individual transactions causing a lot of log traffic.
                  */
+               fbuf->b_ops = &xfs_inode_buf_ops;
                 xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
                 for (i = 0; i < ninodes; i++) {
                         int     ioffset = i << mp->m_sb.sb_inodelog;
@@ -207,6 +207,7 @@ xfs_ialloc_inode_init(
                 }
                 xfs_trans_inode_alloc_buf(tp, fbuf);
         }
+       return 0;
  }
  
  /*
@@ -232,6 +233,7 @@ xfs_ialloc_ag_alloc(
                                         /* boundary */
         struct xfs_perag *pag;
  
+       memset(&args, 0, sizeof(args));
         args.tp = tp;
         args.mp = tp->t_mountp;
  
@@ -258,8 +260,6 @@ xfs_ialloc_ag_alloc(
                   (args.agbno < be32_to_cpu(agi->agi_length)))) {
                 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
                 args.type = XFS_ALLOCTYPE_THIS_BNO;
-               args.mod = args.total = args.wasdel = args.isfl =
-                       args.userdata = args.minalignslop = 0;
                 args.prod = 1;
  
                 /*
@@ -312,8 +312,6 @@ xfs_ialloc_ag_alloc(
                  * Allocate a fixed-size extent of inodes.
                  */
                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.mod = args.total = args.wasdel = args.isfl =
-                       args.userdata = args.minalignslop = 0;
                 args.prod = 1;
                 /*
                  * Allow space for the inode btree to split.
@@ -351,9 +349,11 @@ xfs_ialloc_ag_alloc(
          * rather than a linear progression to prevent the next generation
          * number from being easily guessable.
          */
-       xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len,
-                             random32());
+       error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
+                       args.len, random32());
  
+       if (error)
+               return error;
         /*
          * Convert the results.
          */
@@ -411,7 +411,7 @@ xfs_ialloc_next_ag(
  
         spin_lock(&mp->m_agirotor_lock);
         agno = mp->m_agirotor;
-       if (++mp->m_agirotor == mp->m_maxagi)
+       if (++mp->m_agirotor >= mp->m_maxagi)
                 mp->m_agirotor = 0;
         spin_unlock(&mp->m_agirotor_lock);
  
@@ -422,14 +422,13 @@ xfs_ialloc_next_ag(
   * Select an allocation group to look for a free inode in, based on the parent
   * inode and then mode.  Return the allocation group buffer.
   */
-STATIC xfs_buf_t *                     /* allocation group buffer */
+STATIC xfs_agnumber_t
  xfs_ialloc_ag_select(
         xfs_trans_t     *tp,            /* transaction pointer */
         xfs_ino_t       parent,         /* parent directory inode number */
-       mode_t          mode,           /* bits set to indicate file type */
+       umode_t         mode,           /* bits set to indicate file type */
         int             okalloc)        /* ok to allocate more space */
  {
-       xfs_buf_t       *agbp;          /* allocation group header buffer */
         xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
         xfs_agnumber_t  agno;           /* current ag number */
         int             flags;          /* alloc buffer locking flags */
@@ -439,6 +438,7 @@ xfs_ialloc_ag_select(
         int             needspace;      /* file mode implies space allocated */
         xfs_perag_t     *pag;           /* per allocation group data */
         xfs_agnumber_t  pagno;          /* parent (starting) ag number */
+       int             error;
  
         /*
          * Files of these types need at least one block if length > 0
@@ -454,7 +454,9 @@ xfs_ialloc_ag_select(
                 if (pagno >= agcount)
                         pagno = 0;
         }
+
         ASSERT(pagno < agcount);
+
         /*
          * Loop through allocation groups, looking for one with a little
          * free space in it.  Note we don't look for free inodes, exactly.
@@ -466,51 +468,45 @@ xfs_ialloc_ag_select(
         flags = XFS_ALLOC_FLAG_TRYLOCK;
         for (;;) {
                 pag = xfs_perag_get(mp, agno);
+               if (!pag->pagi_inodeok) {
+                       xfs_ialloc_next_ag(mp);
+                       goto nextag;
+               }
+
                 if (!pag->pagi_init) {
-                       if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
-                               agbp = NULL;
+                       error = xfs_ialloc_pagi_init(mp, tp, agno);
+                       if (error)
                                 goto nextag;
-                       }
-               } else
-                       agbp = NULL;
+               }
  
-               if (!pag->pagi_inodeok) {
-                       xfs_ialloc_next_ag(mp);
-                       goto unlock_nextag;
+               if (pag->pagi_freecount) {
+                       xfs_perag_put(pag);
+                       return agno;
                 }
  
-               /*
-                * Is there enough free space for the file plus a block
-                * of inodes (if we need to allocate some)?
-                */
-               ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
-               if (ineed && !pag->pagf_init) {
-                       if (agbp == NULL &&
-                           xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
-                               agbp = NULL;
+               if (!okalloc)
+                       goto nextag;
+
+               if (!pag->pagf_init) {
+                       error = xfs_alloc_pagf_init(mp, tp, agno, flags);
+                       if (error)
                                 goto nextag;
-                       }
-                       (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
                 }
-               if (!ineed || pag->pagf_init) {
-                       if (ineed && !(longest = pag->pagf_longest))
-                               longest = pag->pagf_flcount > 0;
-                       if (!ineed ||
-                           (pag->pagf_freeblks >= needspace + ineed &&
-                            longest >= ineed &&
-                            okalloc)) {
-                               if (agbp == NULL &&
-                                   xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
-                                       agbp = NULL;
-                                       goto nextag;
-                               }
-                               xfs_perag_put(pag);
-                               return agbp;
-                       }
+
+               /*
+                * Is there enough free space for the file plus a block of
+                * inodes? (if we need to allocate some)?
+                */
+               ineed = XFS_IALLOC_BLOCKS(mp);
+               longest = pag->pagf_longest;
+               if (!longest)
+                       longest = pag->pagf_flcount > 0;
+
+               if (pag->pagf_freeblks >= needspace + ineed &&
+                   longest >= ineed) {
+                       xfs_perag_put(pag);
+                       return agno;
                 }
-unlock_nextag:
-               if (agbp)
-                       xfs_trans_brelse(tp, agbp);
  nextag:
                 xfs_perag_put(pag);
                 /*
@@ -518,13 +514,13 @@ nextag:
                  * down.
                  */
                 if (XFS_FORCED_SHUTDOWN(mp))
-                       return NULL;
+                       return NULLAGNUMBER;
                 agno++;
                 if (agno >= agcount)
                         agno = 0;
                 if (agno == pagno) {
                         if (flags == 0)
-                               return NULL;
+                               return NULLAGNUMBER;
                         flags = 0;
                 }
         }
@@ -587,188 +583,39 @@ xfs_ialloc_get_rec(
  }
  
  /*
- * Visible inode allocation functions.
- */
-
-/*
- * Allocate an inode on disk.
- * Mode is used to tell whether the new inode will need space, and whether
- * it is a directory.
+ * Allocate an inode.
   *
- * The arguments IO_agbp and alloc_done are defined to work within
- * the constraint of one allocation per transaction.
- * xfs_dialloc() is designed to be called twice if it has to do an
- * allocation to make more free inodes.  On the first call,
- * IO_agbp should be set to NULL. If an inode is available,
- * i.e., xfs_dialloc() did not need to do an allocation, an inode
- * number is returned.  In this case, IO_agbp would be set to the
- * current ag_buf and alloc_done set to false.
- * If an allocation needed to be done, xfs_dialloc would return
- * the current ag_buf in IO_agbp and set alloc_done to true.
- * The caller should then commit the current transaction, allocate a new
- * transaction, and call xfs_dialloc() again, passing in the previous
- * value of IO_agbp.  IO_agbp should be held across the transactions.
- * Since the agbp is locked across the two calls, the second call is
- * guaranteed to have a free inode available.
- *
- * Once we successfully pick an inode its number is returned and the
- * on-disk data structures are updated.  The inode itself is not read
- * in, since doing so would break ordering constraints with xfs_reclaim.
+ * The caller selected an AG for us, and made sure that free inodes are
+ * available.
   */
-int
-xfs_dialloc(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_ino_t       parent,         /* parent inode (directory) */
-       mode_t          mode,           /* mode bits for new inode */
-       int             okalloc,        /* ok to allocate more space */
-       xfs_buf_t       **IO_agbp,      /* in/out ag header's buffer */
-       boolean_t       *alloc_done,    /* true if we needed to replenish
-                                          inode freelist */
-       xfs_ino_t       *inop)          /* inode number allocated */
+STATIC int
+xfs_dialloc_ag(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_ino_t               parent,
+       xfs_ino_t               *inop)
  {
-       xfs_agnumber_t  agcount;        /* number of allocation groups */
-       xfs_buf_t       *agbp;          /* allocation group header's buffer */
-       xfs_agnumber_t  agno;           /* allocation group number */
-       xfs_agi_t       *agi;           /* allocation group header structure */
-       xfs_btree_cur_t *cur;           /* inode allocation btree cursor */
-       int             error;          /* error return value */
-       int             i;              /* result code */
-       int             ialloced;       /* inode allocation status */
-       int             noroom = 0;     /* no space for inode blk allocation */
-       xfs_ino_t       ino;            /* fs-relative inode to be returned */
-       /* REFERENCED */
-       int             j;              /* result code */
-       xfs_mount_t     *mp;            /* file system mount structure */
-       int             offset;         /* index of inode in chunk */
-       xfs_agino_t     pagino;         /* parent's AG relative inode # */
-       xfs_agnumber_t  pagno;          /* parent's AG number */
-       xfs_inobt_rec_incore_t rec;     /* inode allocation record */
-       xfs_agnumber_t  tagno;          /* testing allocation group number */
-       xfs_btree_cur_t *tcur;          /* temp cursor */
-       xfs_inobt_rec_incore_t trec;    /* temp inode allocation record */
-       struct xfs_perag *pag;
-
-
-       if (*IO_agbp == NULL) {
-               /*
-                * We do not have an agbp, so select an initial allocation
-                * group for inode allocation.
-                */
-               agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
-               /*
-                * Couldn't find an allocation group satisfying the
-                * criteria, give up.
-                */
-               if (!agbp) {
-                       *inop = NULLFSINO;
-                       return 0;
-               }
-               agi = XFS_BUF_TO_AGI(agbp);
-               ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
-       } else {
-               /*
-                * Continue where we left off before.  In this case, we
-                * know that the allocation group has free inodes.
-                */
-               agbp = *IO_agbp;
-               agi = XFS_BUF_TO_AGI(agbp);
-               ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
-               ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
-       }
-       mp = tp->t_mountp;
-       agcount = mp->m_sb.sb_agcount;
-       agno = be32_to_cpu(agi->agi_seqno);
-       tagno = agno;
-       pagno = XFS_INO_TO_AGNO(mp, parent);
-       pagino = XFS_INO_TO_AGINO(mp, parent);
-
-       /*
-        * If we have already hit the ceiling of inode blocks then clear
-        * okalloc so we scan all available agi structures for a free
-        * inode.
-        */
-
-       if (mp->m_maxicount &&
-           mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
-               noroom = 1;
-               okalloc = 0;
-       }
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
+       xfs_agnumber_t          agno = be32_to_cpu(agi->agi_seqno);
+       xfs_agnumber_t          pagno = XFS_INO_TO_AGNO(mp, parent);
+       xfs_agino_t             pagino = XFS_INO_TO_AGINO(mp, parent);
+       struct xfs_perag        *pag;
+       struct xfs_btree_cur    *cur, *tcur;
+       struct xfs_inobt_rec_incore rec, trec;
+       xfs_ino_t               ino;
+       int                     error;
+       int                     offset;
+       int                     i, j;
  
-       /*
-        * Loop until we find an allocation group that either has free inodes
-        * or in which we can allocate some inodes.  Iterate through the
-        * allocation groups upward, wrapping at the end.
-        */
-       *alloc_done = B_FALSE;
-       while (!agi->agi_freecount) {
-               /*
-                * Don't do anything if we're not supposed to allocate
-                * any blocks, just go on to the next ag.
-                */
-               if (okalloc) {
-                       /*
-                        * Try to allocate some new inodes in the allocation
-                        * group.
-                        */
-                       if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
-                               xfs_trans_brelse(tp, agbp);
-                               if (error == ENOSPC) {
-                                       *inop = NULLFSINO;
-                                       return 0;
-                               } else
-                                       return error;
-                       }
-                       if (ialloced) {
-                               /*
-                                * We successfully allocated some inodes, return
-                                * the current context to the caller so that it
-                                * can commit the current transaction and call
-                                * us again where we left off.
-                                */
-                               ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
-                               *alloc_done = B_TRUE;
-                               *IO_agbp = agbp;
-                               *inop = NULLFSINO;
-                               return 0;
-                       }
-               }
-               /*
-                * If it failed, give up on this ag.
-                */
-               xfs_trans_brelse(tp, agbp);
-               /*
-                * Go on to the next ag: get its ag header.
-                */
-nextag:
-               if (++tagno == agcount)
-                       tagno = 0;
-               if (tagno == agno) {
-                       *inop = NULLFSINO;
-                       return noroom ? ENOSPC : 0;
-               }
-               pag = xfs_perag_get(mp, tagno);
-               if (pag->pagi_inodeok == 0) {
-                       xfs_perag_put(pag);
-                       goto nextag;
-               }
-               error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
-               xfs_perag_put(pag);
-               if (error)
-                       goto nextag;
-               agi = XFS_BUF_TO_AGI(agbp);
-               ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
-       }
-       /*
-        * Here with an allocation group that has a free inode.
-        * Reset agno since we may have chosen a new ag in the
-        * loop above.
-        */
-       agno = tagno;
-       *IO_agbp = NULL;
         pag = xfs_perag_get(mp, agno);
  
+       ASSERT(pag->pagi_init);
+       ASSERT(pag->pagi_inodeok);
+       ASSERT(pag->pagi_freecount > 0);
+
   restart_pagno:
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
         /*
          * If pagino is 0 (this is the root inode allocation) use newino.
          * This must work because we've just allocated some.
@@ -925,7 +772,7 @@ nextag:
          * See if the most recently allocated block has any free.
          */
  newino:
-       if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
+       if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
                 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
                                          XFS_LOOKUP_EQ, &i);
                 if (error)
@@ -968,7 +815,7 @@ newino:
         }
  
  alloc_inode:
-       offset = xfs_ialloc_find_free(&rec.ir_free);
+       offset = xfs_lowbit64(rec.ir_free);
         ASSERT(offset >= 0);
         ASSERT(offset < XFS_INODES_PER_CHUNK);
         ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1000,6 +847,165 @@ error0:
         return error;
  }
  
+/*
+ * Allocate an inode on disk.
+ *
+ * Mode is used to tell whether the new inode will need space, and whether it
+ * is a directory.
+ *
+ * This function is designed to be called twice if it has to do an allocation
+ * to make more free inodes.  On the first call, *IO_agbp should be set to NULL.
+ * If an inode is available without having to performn an allocation, an inode
+ * number is returned.  In this case, *IO_agbp is set to NULL.  If an allocation
+ * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp.
+ * The caller should then commit the current transaction, allocate a
+ * new transaction, and call xfs_dialloc() again, passing in the previous value
+ * of *IO_agbp.  IO_agbp should be held across the transactions. Since the AGI
+ * buffer is locked across the two calls, the second call is guaranteed to have
+ * a free inode available.
+ *
+ * Once we successfully pick an inode its number is returned and the on-disk
+ * data structures are updated.  The inode itself is not read in, since doing so
+ * would break ordering constraints with xfs_reclaim.
+ */
+int
+xfs_dialloc(
+       struct xfs_trans        *tp,
+       xfs_ino_t               parent,
+       umode_t                 mode,
+       int                     okalloc,
+       struct xfs_buf          **IO_agbp,
+       xfs_ino_t               *inop)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_buf          *agbp;
+       xfs_agnumber_t          agno;
+       int                     error;
+       int                     ialloced;
+       int                     noroom = 0;
+       xfs_agnumber_t          start_agno;
+       struct xfs_perag        *pag;
+
+       if (*IO_agbp) {
+               /*
+                * If the caller passes in a pointer to the AGI buffer,
+                * continue where we left off before.  In this case, we
+                * know that the allocation group has free inodes.
+                */
+               agbp = *IO_agbp;
+               goto out_alloc;
+       }
+
+       /*
+        * We do not have an agbp, so select an initial allocation
+        * group for inode allocation.
+        */
+       start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+       if (start_agno == NULLAGNUMBER) {
+               *inop = NULLFSINO;
+               return 0;
+       }
+
+       /*
+        * If we have already hit the ceiling of inode blocks then clear
+        * okalloc so we scan all available agi structures for a free
+        * inode.
+        */
+       if (mp->m_maxicount &&
+           mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
+               noroom = 1;
+               okalloc = 0;
+       }
+
+       /*
+        * Loop until we find an allocation group that either has free inodes
+        * or in which we can allocate some inodes.  Iterate through the
+        * allocation groups upward, wrapping at the end.
+        */
+       agno = start_agno;
+       for (;;) {
+               pag = xfs_perag_get(mp, agno);
+               if (!pag->pagi_inodeok) {
+                       xfs_ialloc_next_ag(mp);
+                       goto nextag;
+               }
+
+               if (!pag->pagi_init) {
+                       error = xfs_ialloc_pagi_init(mp, tp, agno);
+                       if (error)
+                               goto out_error;
+               }
+
+               /*
+                * Do a first racy fast path check if this AG is usable.
+                */
+               if (!pag->pagi_freecount && !okalloc)
+                       goto nextag;
+
+               /*
+                * Then read in the AGI buffer and recheck with the AGI buffer
+                * lock held.
+                */
+               error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+               if (error)
+                       goto out_error;
+
+               if (pag->pagi_freecount) {
+                       xfs_perag_put(pag);
+                       goto out_alloc;
+               }
+
+               if (!okalloc)
+                       goto nextag_relse_buffer;
+
+
+               error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
+               if (error) {
+                       xfs_trans_brelse(tp, agbp);
+
+                       if (error != ENOSPC)
+                               goto out_error;
+
+                       xfs_perag_put(pag);
+                       *inop = NULLFSINO;
+                       return 0;
+               }
+
+               if (ialloced) {
+                       /*
+                        * We successfully allocated some inodes, return
+                        * the current context to the caller so that it
+                        * can commit the current transaction and call
+                        * us again where we left off.
+                        */
+                       ASSERT(pag->pagi_freecount > 0);
+                       xfs_perag_put(pag);
+
+                       *IO_agbp = agbp;
+                       *inop = NULLFSINO;
+                       return 0;
+               }
+
+nextag_relse_buffer:
+               xfs_trans_brelse(tp, agbp);
+nextag:
+               xfs_perag_put(pag);
+               if (++agno == mp->m_sb.sb_agcount)
+                       agno = 0;
+               if (agno == start_agno) {
+                       *inop = NULLFSINO;
+                       return noroom ? ENOSPC : 0;
+               }
+       }
+
+out_alloc:
+       *IO_agbp = NULL;
+       return xfs_dialloc_ag(tp, agbp, parent, inop);
+out_error:
+       xfs_perag_put(pag);
+       return XFS_ERROR(error);
+}
+
  STATIC int
  xfs_imap_lookup(
         struct xfs_mount        *mp,
@@ -1019,10 +1025,9 @@ xfs_imap_lookup(
  
         error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
         if (error) {
-               xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-                               "xfs_ialloc_read_agi() returned "
-                               "error %d, agno %d",
-                               error, agno);
+               xfs_alert(mp,
+                       "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
+                       __func__, error, agno);
                 return error;
         }
  
@@ -1100,24 +1105,21 @@ xfs_imap(
                 if (flags & XFS_IGET_UNTRUSTED)
                         return XFS_ERROR(EINVAL);
                 if (agno >= mp->m_sb.sb_agcount) {
-                       xfs_fs_cmn_err(CE_ALERT, mp,
-                                       "xfs_imap: agno (%d) >= "
-                                       "mp->m_sb.sb_agcount (%d)",
-                                       agno,  mp->m_sb.sb_agcount);
+                       xfs_alert(mp,
+                               "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
+                               __func__, agno, mp->m_sb.sb_agcount);
                 }
                 if (agbno >= mp->m_sb.sb_agblocks) {
-                       xfs_fs_cmn_err(CE_ALERT, mp,
-                                       "xfs_imap: agbno (0x%llx) >= "
-                                       "mp->m_sb.sb_agblocks (0x%lx)",
-                                       (unsigned long long) agbno,
-                                       (unsigned long) mp->m_sb.sb_agblocks);
+                       xfs_alert(mp,
+               "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
+                               __func__, (unsigned long long)agbno,
+                               (unsigned long)mp->m_sb.sb_agblocks);
                 }
                 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
-                       xfs_fs_cmn_err(CE_ALERT, mp,
-                                       "xfs_imap: ino (0x%llx) != "
-                                       "XFS_AGINO_TO_INO(mp, agno, agino) "
-                                       "(0x%llx)",
-                                       ino, XFS_AGINO_TO_INO(mp, agno, agino));
+                       xfs_alert(mp,
+               "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
+                               __func__, ino,
+                               XFS_AGINO_TO_INO(mp, agno, agino));
                 }
                 xfs_stack_trace();
  #endif /* DEBUG */
@@ -1189,10 +1191,9 @@ out_map:
          */
         if ((imap->im_blkno + imap->im_len) >
             XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-               xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-                       "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
-                       " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
-                       (unsigned long long) imap->im_blkno,
+               xfs_alert(mp,
+       "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
+                       __func__, (unsigned long long) imap->im_blkno,
                         (unsigned long long) imap->im_len,
                         XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
                 return XFS_ERROR(EINVAL);
@@ -1253,7 +1254,7 @@ xfs_ialloc_log_agi(
         xfs_agi_t               *agi;   /* allocation group header */
  
         agi = XFS_BUF_TO_AGI(bp);
-       ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+       ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
  #endif
         /*
          * Compute byte offsets for the first and last fields.
@@ -1279,6 +1280,57 @@ xfs_check_agi_unlinked(
  #define xfs_check_agi_unlinked(agi)
  #endif
  
+static void
+xfs_agi_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_agi  *agi = XFS_BUF_TO_AGI(bp);
+       int             agi_ok;
+
+       /*
+        * Validate the magic number of the agi block.
+        */
+       agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
+               XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
+
+       /*
+        * during growfs operations, the perag is not fully initialised,
+        * so we can't use it for any useful checking. growfs ensures we can't
+        * use it by using uncached buffers that don't have the perag attached
+        * so we can detect and avoid this problem.
+        */
+       if (bp->b_pag)
+               agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) ==
+                                               bp->b_pag->pag_agno;
+
+       if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
+                       XFS_RANDOM_IALLOC_READ_AGI))) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+       xfs_check_agi_unlinked(agi);
+}
+
+static void
+xfs_agi_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_agi_verify(bp);
+}
+
+static void
+xfs_agi_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_agi_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_agi_buf_ops = {
+       .verify_read = xfs_agi_read_verify,
+       .verify_write = xfs_agi_write_verify,
+};
+
  /*
   * Read in the allocation group header (inode allocation section)
   */
@@ -1289,38 +1341,18 @@ xfs_read_agi(
         xfs_agnumber_t          agno,   /* allocation group number */
         struct xfs_buf          **bpp)  /* allocation group hdr buf */
  {
-       struct xfs_agi          *agi;   /* allocation group header */
-       int                     agi_ok; /* agi is consistent */
         int                     error;
  
         ASSERT(agno != NULLAGNUMBER);
  
         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                         XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
-                       XFS_FSS_TO_BB(mp, 1), 0, bpp);
+                       XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
         if (error)
                 return error;
  
-       ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
-       agi = XFS_BUF_TO_AGI(*bpp);
-
-       /*
-        * Validate the magic number of the agi block.
-        */
-       agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
-               XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
-               be32_to_cpu(agi->agi_seqno) == agno;
-       if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
-                       XFS_RANDOM_IALLOC_READ_AGI))) {
-               XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
-                                    mp, agi);
-               xfs_trans_brelse(tp, *bpp);
-               return XFS_ERROR(EFSCORRUPTED);
-       }
-
-       XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF);
-
-       xfs_check_agi_unlinked(agi);
+       ASSERT(!xfs_buf_geterror(*bpp));
+       xfs_buf_set_ref(*bpp, XFS_AGI_REF);
         return 0;
  }
  
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c

index 35dd96f3c8f2786458c3f2b10037618a2bfa6876..0bc24cc87ca4abbc44f01c374fca39b32df1eadd 100644 (file)
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -163,6 +163,59 @@ xfs_inobt_key_diff(
                           cur->bc_rec.i.ir_startino;
  }
  
+void
+xfs_inobt_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       unsigned int            level;
+       int                     sblock_ok; /* block passes checks */
+
+       /* magic number and level verification */
+       level = be16_to_cpu(block->bb_level);
+       sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) &&
+                   level < mp->m_in_maxlevels;
+
+       /* numrecs verification */
+       sblock_ok = sblock_ok &&
+               be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0];
+
+       /* sibling pointer verification */
+       sblock_ok = sblock_ok &&
+               (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
+                be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
+               block->bb_u.s.bb_leftsib &&
+               (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
+                be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
+               block->bb_u.s.bb_rightsib;
+
+       if (!sblock_ok) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+       }
+}
+
+static void
+xfs_inobt_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_inobt_verify(bp);
+}
+
+static void
+xfs_inobt_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_inobt_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_inobt_buf_ops = {
+       .verify_read = xfs_inobt_read_verify,
+       .verify_write = xfs_inobt_write_verify,
+};
+
  #ifdef DEBUG
  STATIC int
  xfs_inobt_keys_inorder(
@@ -266,7 +319,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
         .init_rec_from_cur      = xfs_inobt_init_rec_from_cur,
         .init_ptr_from_cur      = xfs_inobt_init_ptr_from_cur,
         .key_diff               = xfs_inobt_key_diff,
-
+       .buf_ops                = &xfs_inobt_buf_ops,
  #ifdef DEBUG
         .keys_inorder           = xfs_inobt_keys_inorder,
         .recs_inorder           = xfs_inobt_recs_inorder,
diff --git a/libxfs/xfs_inode.c b/libxfs/xfs_inode.c

index e4474fda3c45cd7c48ab387b893e41ecc9cd4d97..2970f46d0db391a8e012076afd1b0f49e05c4c41 100644 (file)
--- a/libxfs/xfs_inode.c
+++ b/libxfs/xfs_inode.c
@@ -21,6 +21,12 @@
  kmem_zone_t *xfs_ifork_zone;
  kmem_zone_t *xfs_inode_zone;
  
+/*
+ * Used in xfs_itruncate_extents().  This is the maximum number of extents
+ * freed from a file in a single transaction.
+ */
+#define        XFS_ITRUNC_MAX_EXTENTS  2
+
  STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
  STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
  STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
@@ -73,8 +79,8 @@ xfs_inobp_check(
                 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
                                         i * mp->m_sb.sb_inodesize);
                 if (!dip->di_next_unlinked)  {
-                       xfs_fs_cmn_err(CE_ALERT, mp,
-                               "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p.  About to pop an ASSERT.",
+                       xfs_alert(mp,
+       "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
                                 bp);
                         ASSERT(dip->di_next_unlinked);
                 }
@@ -82,176 +88,108 @@ xfs_inobp_check(
  }
  #endif
  
-/*
- * Find the buffer associated with the given inode map
- * We do basic validation checks on the buffer once it has been
- * retrieved from disk.
- */
-int
-xfs_imap_to_bp(
-       xfs_mount_t     *mp,
-       xfs_trans_t     *tp,
-       struct xfs_imap *imap,
-       xfs_buf_t       **bpp,
-       uint            buf_flags,
-       uint            iget_flags)
+static void
+xfs_inode_buf_verify(
+       struct xfs_buf  *bp)
  {
-       int             error;
+       struct xfs_mount *mp = bp->b_target->bt_mount;
         int             i;
         int             ni;
-       xfs_buf_t       *bp;
-
-       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
-                                  (int)imap->im_len, buf_flags, &bp);
-       if (error) {
-               if (error != EAGAIN) {
-                       cmn_err(CE_WARN,
-                               "xfs_imap_to_bp: xfs_trans_read_buf()returned "
-                               "an error %d on %s.  Returning error.",
-                               error, mp->m_fsname);
-               } else {
-                       ASSERT(buf_flags & XBF_TRYLOCK);
-               }
-               return error;
-       }
  
         /*
          * Validate the magic number and version of every inode in the buffer
-        * (if DEBUG kernel) or the first inode in the buffer, otherwise.
          */
-#ifdef DEBUG
-       ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
-#else  /* usual case */
-       ni = 1;
-#endif
-
+       ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
         for (i = 0; i < ni; i++) {
                 int             di_ok;
                 xfs_dinode_t    *dip;
  
-               dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+               dip = (struct xfs_dinode *)xfs_buf_offset(bp,
                                         (i << mp->m_sb.sb_inodelog));
-               di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
+               di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
                             XFS_DINODE_GOOD_VERSION(dip->di_version);
                 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
                                                 XFS_ERRTAG_ITOBP_INOTOBP,
                                                 XFS_RANDOM_ITOBP_INOTOBP))) {
-                       if (iget_flags & XFS_IGET_UNTRUSTED) {
-                               xfs_trans_brelse(tp, bp);
-                               return XFS_ERROR(EINVAL);
-                       }
-                       XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
-                                               XFS_ERRLEVEL_HIGH, mp, dip);
+                       xfs_buf_ioerror(bp, EFSCORRUPTED);
+                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
+                                            mp, dip);
  #ifdef DEBUG
-                       cmn_err(CE_PANIC,
-                                       "Device %s - bad inode magic/vsn "
-                                       "daddr %lld #%d (magic=%x)",
-                               XFS_BUFTARG_NAME(mp->m_ddev_targp),
-                               (unsigned long long)imap->im_blkno, i,
+                       xfs_emerg(mp,
+                               "bad inode magic/vsn daddr %lld #%d (magic=%x)",
+                               (unsigned long long)bp->b_bn, i,
                                 be16_to_cpu(dip->di_magic));
+                       ASSERT(0);
  #endif
-                       xfs_trans_brelse(tp, bp);
-                       return XFS_ERROR(EFSCORRUPTED);
                 }
         }
-
         xfs_inobp_check(mp, bp);
+}
  
-       /*
-        * Mark the buffer as an inode buffer now that it looks good
-        */
-       XFS_BUF_SET_VTYPE(bp, B_FS_INO);
  
-       *bpp = bp;
-       return 0;
+static void
+xfs_inode_buf_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_inode_buf_verify(bp);
  }
  
-/*
- * This routine is called to map an inode number within a file
- * system to the buffer containing the on-disk version of the
- * inode.  It returns a pointer to the buffer containing the
- * on-disk inode in the bpp parameter, and in the dip parameter
- * it returns a pointer to the on-disk inode within that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and
- * dipp are undefined.
- *
- * Use xfs_imap() to determine the size and location of the
- * buffer to read from disk.
- */
-int
-xfs_inotobp(
-       xfs_mount_t     *mp,
-       xfs_trans_t     *tp,
-       xfs_ino_t       ino,
-       xfs_dinode_t    **dipp,
-       xfs_buf_t       **bpp,
-       int             *offset,
-       uint            imap_flags)
+static void
+xfs_inode_buf_write_verify(
+       struct xfs_buf  *bp)
  {
-       struct xfs_imap imap;
-       xfs_buf_t       *bp;
-       int             error;
-
-       imap.im_blkno = 0;
-       error = xfs_imap(mp, tp, ino, &imap, imap_flags);
-       if (error)
-               return error;
-
-       error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
-       if (error)
-               return error;
-
-       *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
-       *bpp = bp;
-       *offset = imap.im_boffset;
-       return 0;
+       xfs_inode_buf_verify(bp);
  }
  
+const struct xfs_buf_ops xfs_inode_buf_ops = {
+       .verify_read = xfs_inode_buf_read_verify,
+       .verify_write = xfs_inode_buf_write_verify,
+};
+
  
  /*
- * This routine is called to map an inode to the buffer containing
- * the on-disk version of the inode.  It returns a pointer to the
- * buffer containing the on-disk inode in the bpp parameter, and in
- * the dip parameter it returns a pointer to the on-disk inode within
- * that buffer.
+ * This routine is called to map an inode to the buffer containing the on-disk
+ * version of the inode.  It returns a pointer to the buffer containing the
+ * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
+ * pointer to the on-disk inode within that buffer.
   *
- * If a non-zero error is returned, then the contents of bpp and
- * dipp are undefined.
- *
- * The inode is expected to already been mapped to its buffer and read
- * in once, thus we can use the mapping information stored in the inode
- * rather than calling xfs_imap().  This allows us to avoid the overhead
- * of looking at the inode btree for small block file systems
- * (see xfs_imap()).
+ * If a non-zero error is returned, then the contents of bpp and dipp are
+ * undefined.
   */
  int
-xfs_itobp(
-       xfs_mount_t     *mp,
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       xfs_dinode_t    **dipp,
-       xfs_buf_t       **bpp,
-       uint            buf_flags)
+xfs_imap_to_bp(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_imap         *imap,
+       struct xfs_dinode       **dipp,
+       struct xfs_buf          **bpp,
+       uint                    buf_flags,
+       uint                    iget_flags)
  {
-       xfs_buf_t       *bp;
-       int             error;
+       struct xfs_buf          *bp;
+       int                     error;
  
-       ASSERT(ip->i_imap.im_blkno != 0);
+       buf_flags |= XBF_UNMAPPED;
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+                                  (int)imap->im_len, buf_flags, &bp,
+                                  &xfs_inode_buf_ops);
+       if (error) {
+               if (error == EAGAIN) {
+                       ASSERT(buf_flags & XBF_TRYLOCK);
+                       return error;
+               }
  
-       error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
-       if (error)
-               return error;
+               if (error == EFSCORRUPTED &&
+                   (iget_flags & XFS_IGET_UNTRUSTED))
+                       return XFS_ERROR(EINVAL);
  
-       if (!bp) {
-               ASSERT(buf_flags & XBF_TRYLOCK);
-               ASSERT(tp == NULL);
-               *bpp = NULL;
-               return EAGAIN;
+               xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
+                       __func__, error);
+               return error;
         }
  
-       *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
         *bpp = bp;
+       *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
         return 0;
  }
  
@@ -264,23 +202,20 @@ xfs_itobp(
   * brought in-core.  The rest will be in-lined in if_extents when it
   * is first referenced (see xfs_iread_extents()).
   */
-int
+STATIC int
  xfs_iformat(
         xfs_inode_t             *ip,
         xfs_dinode_t            *dip)
  {
         xfs_attr_shortform_t    *atp;
         int                     size;
-       int                     error;
+       int                     error = 0;
         xfs_fsize_t             di_size;
-       ip->i_df.if_ext_max =
-               XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
-       error = 0;
  
         if (unlikely(be32_to_cpu(dip->di_nextents) +
                      be16_to_cpu(dip->di_anextents) >
                      be64_to_cpu(dip->di_nblocks))) {
-               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+               xfs_warn(ip->i_mount,
                         "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
                         (unsigned long long)ip->i_ino,
                         (int)(be32_to_cpu(dip->di_nextents) +
@@ -293,8 +228,7 @@ xfs_iformat(
         }
  
         if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
-               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                       "corrupt dinode %Lu, forkoff = 0x%x.",
+               xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
                         (unsigned long long)ip->i_ino,
                         dip->di_forkoff);
                 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -304,7 +238,7 @@ xfs_iformat(
  
         if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
                      !ip->i_mount->m_rtdev)) {
-               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+               xfs_warn(ip->i_mount,
                         "corrupt dinode %Lu, has realtime flag set.",
                         ip->i_ino);
                 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
@@ -323,7 +257,6 @@ xfs_iformat(
                         return XFS_ERROR(EFSCORRUPTED);
                 }
                 ip->i_d.di_size = 0;
-               ip->i_size = 0;
                 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
                 break;
  
@@ -335,10 +268,9 @@ xfs_iformat(
                         /*
                          * no local regular files yet
                          */
-                       if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
-                               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                                       "corrupt inode %Lu "
-                                       "(local format for regular file).",
+                       if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
+                               xfs_warn(ip->i_mount,
+                       "corrupt inode %Lu (local format for regular file).",
                                         (unsigned long long) ip->i_ino);
                                 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
                                                      XFS_ERRLEVEL_LOW,
@@ -348,9 +280,8 @@ xfs_iformat(
  
                         di_size = be64_to_cpu(dip->di_size);
                         if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
-                               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                                       "corrupt inode %Lu "
-                                       "(bad size %Ld for local inode).",
+                               xfs_warn(ip->i_mount,
+                       "corrupt inode %Lu (bad size %Ld for local inode).",
                                         (unsigned long long) ip->i_ino,
                                         (long long) di_size);
                                 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -384,19 +315,18 @@ xfs_iformat(
         }
         if (!XFS_DFORK_Q(dip))
                 return 0;
+
         ASSERT(ip->i_afp == NULL);
         ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
-       ip->i_afp->if_ext_max =
-               XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+
         switch (dip->di_aformat) {
         case XFS_DINODE_FMT_LOCAL:
                 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
                 size = be16_to_cpu(atp->hdr.totsize);
  
                 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
-                       xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                               "corrupt inode %Lu "
-                               "(bad attr fork size %Ld).",
+                       xfs_warn(ip->i_mount,
+                               "corrupt inode %Lu (bad attr fork size %Ld).",
                                 (unsigned long long) ip->i_ino,
                                 (long long) size);
                         XFS_CORRUPTION_ERROR("xfs_iformat(8)",
@@ -451,9 +381,8 @@ xfs_iformat_local(
          * kmem_alloc() or memcpy() below.
          */
         if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                       "corrupt inode %Lu "
-                       "(bad size %d for local fork, size = %d).",
+               xfs_warn(ip->i_mount,
+       "corrupt inode %Lu (bad size %d for local fork, size = %d).",
                         (unsigned long long) ip->i_ino, size,
                         XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
                 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -510,8 +439,7 @@ xfs_iformat_extents(
          * kmem_alloc() or memcpy() below.
          */
         if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                       "corrupt inode %Lu ((a)extents = %d).",
+               xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
                         (unsigned long long) ip->i_ino, nex);
                 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
                                      ip->i_mount, dip);
@@ -582,15 +510,15 @@ xfs_iformat_btree(
          * or the number of extents is greater than the number of
          * blocks.
          */
-       if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
-           || XFS_BMDR_SPACE_CALC(nrecs) >
-                       XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
-           || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
-               xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
-                       "corrupt inode %Lu (btree).",
+       if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
+                       XFS_IFORK_MAXEXT(ip, whichfork) ||
+                    XFS_BMDR_SPACE_CALC(nrecs) >
+                       XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
+                    XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+               xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
                         (unsigned long long) ip->i_ino);
-               XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
-                                ip->i_mount);
+               XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
+                                ip->i_mount, dip);
                 return XFS_ERROR(EFSCORRUPTED);
         }
  
@@ -682,6 +610,124 @@ xfs_dinode_to_disk(
         to->di_gen = cpu_to_be32(from->di_gen);
  }
  
+/*
+ * Read the disk inode attributes into the in-core inode structure.
+ */
+int
+xfs_iread(
+       xfs_mount_t     *mp,
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       uint            iget_flags)
+{
+       xfs_buf_t       *bp;
+       xfs_dinode_t    *dip;
+       int             error;
+
+       /*
+        * Fill in the location information in the in-core inode.
+        */
+       error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
+       if (error)
+               return error;
+
+       /*
+        * Get pointers to the on-disk inode and the buffer containing it.
+        */
+       error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
+       if (error)
+               return error;
+
+       /*
+        * If we got something that isn't an inode it means someone
+        * (nfs or dmi) has a stale handle.
+        */
+       if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
+#ifdef DEBUG
+               xfs_alert(mp,
+                       "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
+                       __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
+#endif /* DEBUG */
+               error = XFS_ERROR(EINVAL);
+               goto out_brelse;
+       }
+
+       /*
+        * If the on-disk inode is already linked to a directory
+        * entry, copy all of the inode into the in-core inode.
+        * xfs_iformat() handles copying in the inode format
+        * specific information.
+        * Otherwise, just get the truly permanent information.
+        */
+       if (dip->di_mode) {
+               xfs_dinode_from_disk(&ip->i_d, dip);
+               error = xfs_iformat(ip, dip);
+               if (error)  {
+#ifdef DEBUG
+                       xfs_alert(mp, "%s: xfs_iformat() returned error %d",
+                               __func__, error);
+#endif /* DEBUG */
+                       goto out_brelse;
+               }
+       } else {
+               ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
+               ip->i_d.di_version = dip->di_version;
+               ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
+               ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
+               /*
+                * Make sure to pull in the mode here as well in
+                * case the inode is released without being used.
+                * This ensures that xfs_inactive() will see that
+                * the inode is already free and not try to mess
+                * with the uninitialized part of it.
+                */
+               ip->i_d.di_mode = 0;
+       }
+
+       /*
+        * The inode format changed when we moved the link count and
+        * made it 32 bits long.  If this is an old format inode,
+        * convert it in memory to look like a new one.  If it gets
+        * flushed to disk we will convert back before flushing or
+        * logging it.  We zero out the new projid field and the old link
+        * count field.  We'll handle clearing the pad field (the remains
+        * of the old uuid field) when we actually convert the inode to
+        * the new format. We don't change the version number so that we
+        * can distinguish this from a real new format inode.
+        */
+       if (ip->i_d.di_version == 1) {
+               ip->i_d.di_nlink = ip->i_d.di_onlink;
+               ip->i_d.di_onlink = 0;
+               xfs_set_projid(&ip->i_d, 0);
+       }
+
+       ip->i_delayed_blks = 0;
+
+       /*
+        * Mark the buffer containing the inode as something to keep
+        * around for a while.  This helps to keep recently accessed
+        * meta-data in-core longer.
+        */
+       xfs_buf_set_ref(bp, XFS_INO_REF);
+
+       /*
+        * Use xfs_trans_brelse() to release the buffer containing the
+        * on-disk inode, because it was acquired with xfs_trans_read_buf()
+        * in xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
+        * brelse().  If we're within a transaction, then xfs_trans_brelse()
+        * will only release the buffer if it is not dirty within the
+        * transaction.  It will be OK to release the buffer in this case,
+        * because inodes on disk are never destroyed and we will be
+        * locking the new in-core inode before putting it in the hash
+        * table where other processes can find it.  Thus we don't have
+        * to worry about the inode being changed just because we released
+        * the buffer.
+        */
+ out_brelse:
+       xfs_trans_brelse(tp, bp);
+       return error;
+}
+
  /*
   * Read in extents from a btree-format inode.
   * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
@@ -707,7 +753,6 @@ xfs_iread_extents(
         /*
          * We know that the size is valid (it's checked in iformat_btree)
          */
-       ifp->if_lastex = NULLEXTNUM;
         ifp->if_bytes = ifp->if_real_bytes = 0;
         ifp->if_flags |= XFS_IFEXTENTS;
         xfs_iext_add(ifp, 0, nextents);
@@ -1067,9 +1112,6 @@ xfs_iflush_fork(
         char                    *cp;
         xfs_ifork_t             *ifp;
         xfs_mount_t             *mp;
-#ifdef XFS_TRANS_DEBUG
-       int                     first;
-#endif
         static const short      brootflag[2] =
                 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
         static const short      dataflag[2] =
@@ -1092,7 +1134,7 @@ xfs_iflush_fork(
         mp = ip->i_mount;
         switch (XFS_IFORK_FORMAT(ip, whichfork)) {
         case XFS_DINODE_FMT_LOCAL:
-               if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
+               if ((iip->ili_fields & dataflag[whichfork]) &&
                     (ifp->if_bytes > 0)) {
                         ASSERT(ifp->if_u1.if_data != NULL);
                         ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
@@ -1102,13 +1144,10 @@ xfs_iflush_fork(
  
         case XFS_DINODE_FMT_EXTENTS:
                 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
-                      !(iip->ili_format.ilf_fields & extflag[whichfork]));
-               ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) ||
-                       (ifp->if_bytes == 0));
-               ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) ||
-                       (ifp->if_bytes > 0));
-               if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
+                      !(iip->ili_fields & extflag[whichfork]));
+               if ((iip->ili_fields & extflag[whichfork]) &&
                     (ifp->if_bytes > 0)) {
+                       ASSERT(xfs_iext_get_ext(ifp, 0));
                         ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
                         (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
                                 whichfork);
@@ -1116,7 +1155,7 @@ xfs_iflush_fork(
                 break;
  
         case XFS_DINODE_FMT_BTREE:
-               if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
+               if ((iip->ili_fields & brootflag[whichfork]) &&
                     (ifp->if_broot_bytes > 0)) {
                         ASSERT(ifp->if_broot != NULL);
                         ASSERT(ifp->if_broot_bytes <=
@@ -1129,14 +1168,14 @@ xfs_iflush_fork(
                 break;
  
         case XFS_DINODE_FMT_DEV:
-               if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
+               if (iip->ili_fields & XFS_ILOG_DEV) {
                         ASSERT(whichfork == XFS_DATA_FORK);
                         xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
                 }
                 break;
  
         case XFS_DINODE_FMT_UUID:
-               if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
+               if (iip->ili_fields & XFS_ILOG_UUID) {
                         ASSERT(whichfork == XFS_DATA_FORK);
                         memcpy(XFS_DFORK_DPTR(dip),
                                &ip->i_df.if_u2.if_uuid,
@@ -1159,6 +1198,8 @@ xfs_iext_get_ext(
         xfs_extnum_t    idx)            /* index of target extent */
  {
         ASSERT(idx >= 0);
+       ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+
         if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
                 return ifp->if_u1.if_ext_irec->er_extbuf;
         } else if (ifp->if_flags & XFS_IFEXTIREC) {
@@ -1238,7 +1279,6 @@ xfs_iext_add(
                 }
                 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
                 ifp->if_real_bytes = 0;
-               ifp->if_lastex = nextents + ext_diff;
         }
         /*
          * Otherwise use a linear (direct) extent list.
@@ -1933,8 +1973,10 @@ xfs_iext_idx_to_irec(
         xfs_extnum_t    page_idx = *idxp; /* extent index in target list */
  
         ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       ASSERT(page_idx >= 0 && page_idx <=
-               ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
+       ASSERT(page_idx >= 0);
+       ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+       ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
+
         nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
         erp_idx = 0;
         low = 0;
diff --git a/libxfs/xfs_mount.c b/libxfs/xfs_mount.c

index 32d22553bdc125421fb6be82462bc1620fc52132..a9155b39b864408c065e4b15fb29a3cf5eca0268 100644 (file)
--- a/libxfs/xfs_mount.c
+++ b/libxfs/xfs_mount.c
@@ -90,8 +90,8 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
                 ASSERT(atomic_read(&pag->pag_ref) >= 0);
                 ref = atomic_inc_return(&pag->pag_ref);
         }
-       trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
         rcu_read_unlock();
+       trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
         return pag;
  }
  
@@ -105,6 +105,114 @@ xfs_perag_put(struct xfs_perag *pag)
         trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
  }
  
+/*
+ * Check the validity of the SB found.
+ */
+STATIC int
+xfs_mount_validate_sb(
+       xfs_mount_t     *mp,
+       xfs_sb_t        *sbp,
+       bool            check_inprogress)
+{
+
+       /*
+        * If the log device and data device have the
+        * same device number, the log is internal.
+        * Consequently, the sb_logstart should be non-zero.  If
+        * we have a zero sb_logstart in this case, we may be trying to mount
+        * a volume filesystem in a non-volume manner.
+        */
+       if (sbp->sb_magicnum != XFS_SB_MAGIC) {
+               xfs_warn(mp, "bad magic number");
+               return XFS_ERROR(EWRONGFS);
+       }
+
+       if (!xfs_sb_good_version(sbp)) {
+               xfs_warn(mp, "bad version");
+               return XFS_ERROR(EWRONGFS);
+       }
+
+       if (unlikely(
+           sbp->sb_logstart == 0 && mp->m_logdev == mp->m_dev)) {
+               xfs_warn(mp,
+               "filesystem is marked as having an external log; "
+               "specify logdev on the mount command line.");
+               return XFS_ERROR(EINVAL);
+       }
+
+       if (unlikely(
+           sbp->sb_logstart != 0 && mp->m_logdev != mp->m_dev)) {
+               xfs_warn(mp,
+               "filesystem is marked as having an internal log; "
+               "do not specify logdev on the mount command line.");
+               return XFS_ERROR(EINVAL);
+       }
+
+       /*
+        * More sanity checking.  Most of these were stolen directly from
+        * xfs_repair.
+        */
+       if (unlikely(
+           sbp->sb_agcount <= 0                                        ||
+           sbp->sb_sectsize < XFS_MIN_SECTORSIZE                       ||
+           sbp->sb_sectsize > XFS_MAX_SECTORSIZE                       ||
+           sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG                    ||
+           sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG                    ||
+           sbp->sb_sectsize != (1 << sbp->sb_sectlog)                  ||
+           sbp->sb_blocksize < XFS_MIN_BLOCKSIZE                       ||
+           sbp->sb_blocksize > XFS_MAX_BLOCKSIZE                       ||
+           sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG                    ||
+           sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG                    ||
+           sbp->sb_blocksize != (1 << sbp->sb_blocklog)                ||
+           sbp->sb_inodesize < XFS_DINODE_MIN_SIZE                     ||
+           sbp->sb_inodesize > XFS_DINODE_MAX_SIZE                     ||
+           sbp->sb_inodelog < XFS_DINODE_MIN_LOG                       ||
+           sbp->sb_inodelog > XFS_DINODE_MAX_LOG                       ||
+           sbp->sb_inodesize != (1 << sbp->sb_inodelog)                ||
+           (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)   ||
+           (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)  ||
+           (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)  ||
+           (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)    ||
+           sbp->sb_dblocks == 0                                        ||
+           sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)                      ||
+           sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
+               XFS_CORRUPTION_ERROR("SB sanity check failed",
+                               XFS_ERRLEVEL_LOW, mp, sbp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       /*
+        * Currently only very few inode sizes are supported.
+        */
+       switch (sbp->sb_inodesize) {
+       case 256:
+       case 512:
+       case 1024:
+       case 2048:
+               break;
+       default:
+               xfs_warn(mp, "inode size of %d bytes not supported",
+                               sbp->sb_inodesize);
+               return XFS_ERROR(ENOSYS);
+       }
+
+
+       if (check_inprogress && sbp->sb_inprogress) {
+               xfs_warn(mp, "Offline file system operation in progress!");
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       /*
+        * Version 1 directory format has never worked on Linux.
+        */
+       if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
+               xfs_warn(mp, "file system using version 1 directory format");
+               return XFS_ERROR(ENOSYS);
+       }
+
+       return 0;
+}
+
  void
  xfs_sb_from_disk(
         xfs_sb_t        *to,
@@ -211,6 +319,72 @@ xfs_sb_to_disk(
         }
  }
  
+static void
+xfs_sb_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_sb   sb;
+       int             error;
+
+       xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
+
+       /*
+        * Only check the in progress field for the primary superblock as
+        * mkfs.xfs doesn't clear it from secondary superblocks.
+        */
+       error = xfs_mount_validate_sb(mp, &sb, bp->b_blkno == XFS_SB_DADDR);
+       if (error)
+               xfs_buf_ioerror(bp, error);
+}
+
+static void
+xfs_sb_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_sb_verify(bp);
+}
+
+/*
+ * We may be probed for a filesystem match, so we may not want to emit
+ * messages when the superblock buffer is not actually an XFS superblock.
+ * If we find an XFS superblock, the run a normal, noisy mount because we are
+ * really going to mount it and want to know about errors.
+ */
+static void
+xfs_sb_quiet_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_sb   sb;
+
+       xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
+
+       if (sb.sb_magicnum == XFS_SB_MAGIC) {
+               /* XFS filesystem, verify noisily! */
+               xfs_sb_read_verify(bp);
+               return;
+       }
+       /* quietly fail */
+       xfs_buf_ioerror(bp, EFSCORRUPTED);
+}
+
+static void
+xfs_sb_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_sb_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_sb_buf_ops = {
+       .verify_read = xfs_sb_read_verify,
+       .verify_write = xfs_sb_write_verify,
+};
+
+static const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
+       .verify_read = xfs_sb_quiet_read_verify,
+       .verify_write = xfs_sb_write_verify,
+};
+
  /*
   * xfs_mount_common
   *
diff --git a/libxfs/xfs_rtalloc.c b/libxfs/xfs_rtalloc.c

index 4fbdaa9d2964ad0652aa52b548b2125c79439471..1de85fd2c530fb00a7126b8e33b9fbe8d67917cc 100644 (file)
--- a/libxfs/xfs_rtalloc.c
+++ b/libxfs/xfs_rtalloc.c
@@ -49,34 +49,24 @@ xfs_rtbuf_get(
         xfs_buf_t       **bpp)          /* output: buffer for the block */
  {
         xfs_buf_t       *bp;            /* block buffer, result */
-       xfs_daddr_t     d;              /* disk addr of block */
-       int             error;          /* error value */
-       xfs_fsblock_t   fsb;            /* fs block number for block */
         xfs_inode_t     *ip;            /* bitmap or summary inode */
+       xfs_bmbt_irec_t map;
+       int             nmap = 1;
+       int             error;          /* error value */
  
         ip = issum ? mp->m_rsumip : mp->m_rbmip;
-       /*
-        * Map from the file offset (block) and inode number to the
-        * file system block.
-        */
-       error = xfs_bmapi_single(tp, ip, XFS_DATA_FORK, &fsb, block);
-       if (error) {
+
+       error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK);
+       if (error)
                 return error;
-       }
-       ASSERT(fsb != NULLFSBLOCK);
-       /*
-        * Convert to disk address for buffer cache.
-        */
-       d = XFS_FSB_TO_DADDR(mp, fsb);
-       /*
-        * Read the buffer.
-        */
-       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
-                                  mp->m_bsize, 0, &bp);
-       if (error) {
+
+       ASSERT(map.br_startblock != NULLFSBLOCK);
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                                  XFS_FSB_TO_DADDR(mp, map.br_startblock),
+                                  mp->m_bsize, 0, &bp, NULL);
+       if (error)
                 return error;
-       }
-       ASSERT(bp && !XFS_BUF_GETERROR(bp));
+       ASSERT(!xfs_buf_geterror(bp));
         *bpp = bp;
         return 0;
  }
@@ -115,7 +105,7 @@ xfs_rtfind_back(
         if (error) {
                 return error;
         }
-       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       bufp = bp->b_addr;
         /*
          * Get the first word's index & point to it.
          */
@@ -167,7 +157,7 @@ xfs_rtfind_back(
                         if (error) {
                                 return error;
                         }
-                       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       bufp = bp->b_addr;
                         word = XFS_BLOCKWMASK(mp);
                         b = &bufp[word];
                 } else {
@@ -213,7 +203,7 @@ xfs_rtfind_back(
                         if (error) {
                                 return error;
                         }
-                       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       bufp = bp->b_addr;
                         word = XFS_BLOCKWMASK(mp);
                         b = &bufp[word];
                 } else {
@@ -290,7 +280,7 @@ xfs_rtfind_forw(
         if (error) {
                 return error;
         }
-       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       bufp = bp->b_addr;
         /*
          * Get the first word's index & point to it.
          */
@@ -341,7 +331,7 @@ xfs_rtfind_forw(
                         if (error) {
                                 return error;
                         }
-                       b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -386,7 +376,7 @@ xfs_rtfind_forw(
                         if (error) {
                                 return error;
                         }
-                       b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -537,7 +527,7 @@ xfs_rtmodify_range(
         if (error) {
                 return error;
         }
-       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       bufp = bp->b_addr;
         /*
          * Compute the starting word's address, and starting bit.
          */
@@ -582,7 +572,7 @@ xfs_rtmodify_range(
                         if (error) {
                                 return error;
                         }
-                       first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       first = b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -622,7 +612,7 @@ xfs_rtmodify_range(
                         if (error) {
                                 return error;
                         }
-                       first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       first = b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -720,8 +710,8 @@ xfs_rtmodify_summary(
          */
         sp = XFS_SUMPTR(mp, bp, so);
         *sp += delta;
-       xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)),
-               (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1));
+       xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
+               (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
         return 0;
  }
  
@@ -736,18 +726,15 @@ xfs_rtfree_extent(
         xfs_extlen_t    len)            /* length of extent freed */
  {
         int             error;          /* error value */
-       xfs_inode_t     *ip;            /* bitmap file inode */
         xfs_mount_t     *mp;            /* file system mount structure */
         xfs_fsblock_t   sb;             /* summary file block number */
         xfs_buf_t       *sumbp;         /* summary file block buffer */
  
         mp = tp->t_mountp;
-       /*
-        * Synchronize by locking the bitmap inode.
-        */
-       if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
-                                       XFS_ILOCK_EXCL, &ip)))
-               return error;
+
+       ASSERT(mp->m_rbmip->i_itemp != NULL);
+       ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
+
  #if defined(__KERNEL__) && defined(DEBUG)
         /*
          * Check to see that this whole range is currently allocated.
@@ -780,10 +767,10 @@ xfs_rtfree_extent(
          */
         if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
             mp->m_sb.sb_rextents) {
-               if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
-                       ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
-               *(__uint64_t *)&ip->i_d.di_atime = 0;
-               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+               if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
+                       mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
+               *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
+               xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
         }
         return 0;
  }
diff --git a/libxfs/xfs_trans.c b/libxfs/xfs_trans.c

index 7249196eacee18d8450bd66e4fcc7ad38cc3f323..bdd0ebc2f3cbe80ca0f498fcdea3e93f372b5290 100644 (file)
--- a/libxfs/xfs_trans.c
+++ b/libxfs/xfs_trans.c
@@ -583,14 +583,13 @@ xfs_trans_add_item(
  {
         struct xfs_log_item_desc *lidp;
  
-       ASSERT(lip->li_mountp = tp->t_mountp);
-       ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
+       ASSERT(lip->li_mountp == tp->t_mountp);
+       ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
  
         lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
  
         lidp->lid_item = lip;
         lidp->lid_flags = 0;
-       lidp->lid_size = 0;
         list_add_tail(&lidp->lid_trans, &tp->t_items);
  
         lip->li_desc = lidp;
@@ -673,8 +672,6 @@ xfs_trans_roll(
         if (error)
                 return error;
  
-       xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
-       xfs_trans_ihold(trans, dp);
+       xfs_trans_ijoin(trans, dp, 0);
         return 0;
  }
-
diff --git a/logprint/log_misc.c b/logprint/log_misc.c

index d94c0cc8ef310065799c554b4364fc2ab11e5515..567cdf22558b27ef999dbafc17b25ae8318b192f 100644 (file)
--- a/logprint/log_misc.c
+++ b/logprint/log_misc.c
@@ -558,7 +558,7 @@ xlog_print_trans_inode_core(xfs_icdinode_t *ip)
  }
  
  void
-xlog_print_dir2_sf(xfs_dir2_sf_t *sfp, int size)
+xlog_print_dir2_sf(xfs_dir2_sf_hdr_t *sfp, int size)
  {
         xfs_ino_t       ino;
         int             count;
@@ -566,9 +566,6 @@ xlog_print_dir2_sf(xfs_dir2_sf_t *sfp, int size)
         char            namebuf[257];
         xfs_dir2_sf_entry_t     *sfep;
  
-       /* XXX need to determine whether this is v1 or v2, then
-          print appropriate structure */
-
         printf(_("SHORTFORM DIRECTORY size %d\n"),
                 size);
         /* bail out for now */
@@ -576,14 +573,14 @@ xlog_print_dir2_sf(xfs_dir2_sf_t *sfp, int size)
         return;
  
         printf(_("SHORTFORM DIRECTORY size %d count %d\n"),
-              size, sfp->hdr.count);
-       memmove(&ino, &(sfp->hdr.parent), sizeof(ino));
-       printf(_(".. ino 0x%llx\n"), (unsigned long long) be64_to_cpu(ino));
+              size, sfp->count);
+       memmove(&ino, &(sfp->parent), sizeof(ino));
+       printf(_(".. ino 0x%llx\n"), (unsigned long long) be64_to_cpu(ino));
  
-       count = (uint)(sfp->hdr.count);
-       sfep = &(sfp->list[0]);
+       count = sfp->count;
+       sfep = xfs_dir2_sf_firstentry(sfp);
         for (i = 0; i < count; i++) {
-               memmove(&ino, &(sfep->inumber), sizeof(ino));
+               ino = xfs_dir2_sfe_get_ino(sfp, sfep);
                 memmove(namebuf, (sfep->name), sfep->namelen);
                 namebuf[sfep->namelen] = '\0';
                 printf(_("%s ino 0x%llx namelen %d\n"),
@@ -691,7 +688,7 @@ xlog_print_trans_inode(xfs_caddr_t *ptr,
             case XFS_ILOG_DDATA:
                 printf(_("LOCAL inode data\n"));
                 if (mode == S_IFDIR)
-                   xlog_print_dir2_sf((xfs_dir2_sf_t *)*ptr, size);
+                   xlog_print_dir2_sf((xfs_dir2_sf_hdr_t *)*ptr, size);
                 break;
             default:
                 ASSERT((f->ilf_fields & XFS_ILOG_DFORK) == 0);
@@ -718,7 +715,7 @@ xlog_print_trans_inode(xfs_caddr_t *ptr,
             case XFS_ILOG_ADATA:
                 printf(_("LOCAL attr data\n"));
                 if (mode == S_IFDIR)
-                   xlog_print_dir2_sf((xfs_dir2_sf_t *)*ptr, size);
+                   xlog_print_dir2_sf((xfs_dir2_sf_hdr_t *)*ptr, size);
                 break;
             default:
                 ASSERT((f->ilf_fields & XFS_ILOG_AFORK) == 0);
@@ -1039,7 +1036,7 @@ xlog_print_rec_head(xlog_rec_header_t *head, int *len)
      }
  
      /* check for cleared blocks written by xlog_clear_stale_blocks() */
-    if (!head->h_len && !head->h_chksum && !head->h_prev_block &&
+    if (!head->h_len && !head->h_crc && !head->h_prev_block &&
         !head->h_num_logops && !head->h_size)
         return CLEARED_BLKS;
  
diff --git a/logprint/log_print_all.c b/logprint/log_print_all.c

index 2c45ff29055ea6c8c6779443dd48bfa03543105a..8f1c8abe74ff7bc004b583562111a1477173fae6 100644 (file)
--- a/logprint/log_print_all.c
+++ b/logprint/log_print_all.c
@@ -262,7 +262,7 @@ xlog_recover_print_inode_core(
                (di->di_magic>>8) & 0xff, di->di_magic & 0xff,
                di->di_mode, di->di_version, di->di_format, di->di_onlink);
         printf(_("              uid:%d  gid:%d  nlink:%d projid:%u\n"),
-              di->di_uid, di->di_gid, di->di_nlink, xfs_get_projid(*di));
+              di->di_uid, di->di_gid, di->di_nlink, xfs_get_projid(di));
         printf(_("              atime:%d  mtime:%d  ctime:%d\n"),
                di->di_atime.t_sec, di->di_mtime.t_sec, di->di_ctime.t_sec);
         printf(_("              flushiter:%d\n"), di->di_flushiter);
diff --git a/mkfs/proto.c b/mkfs/proto.c

index 3021028682f558854a3f715a3ffbaa1da0cc08c7..56eed31d2174f9f4242d7a0e1e485393550596be 100644 (file)
--- a/mkfs/proto.c
+++ b/mkfs/proto.c
@@ -243,7 +243,7 @@ newfile(
         } else if (len > 0) {
                 nb = XFS_B_TO_FSB(mp, len);
                 nmap = 1;
-               error = libxfs_bmapi(tp, ip, 0, nb, XFS_BMAPI_WRITE, first, nb,
+               error = libxfs_bmapi_write(tp, ip, 0, nb, 0, first, nb,
                                 &map, &nmap, flist);
                 if (error) {
                         fail(_("error allocating space for a file"), error);
@@ -667,9 +667,9 @@ rtinit(
         xfs_bmap_init(&flist, &first);
         while (bno < mp->m_sb.sb_rbmblocks) {
                 nmap = XFS_BMAP_MAX_NMAP;
-               error = libxfs_bmapi(tp, rbmip, bno,
+               error = libxfs_bmapi_write(tp, rbmip, bno,
                                 (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
-                               XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks,
+                               0, &first, mp->m_sb.sb_rbmblocks,
                                 map, &nmap, &flist);
                 if (error) {
                         fail(_("Allocation of the realtime bitmap failed"),
@@ -704,9 +704,9 @@ rtinit(
         xfs_bmap_init(&flist, &first);
         while (bno < nsumblocks) {
                 nmap = XFS_BMAP_MAX_NMAP;
-               error = libxfs_bmapi(tp, rsumip, bno,
+               error = libxfs_bmapi_write(tp, rsumip, bno,
                                 (xfs_extlen_t)(nsumblocks - bno),
-                               XFS_BMAPI_WRITE, &first, nsumblocks,
+                               0, &first, nsumblocks,
                                 map, &nmap, &flist);
                 if (error) {
                         fail(_("Allocation of the realtime summary failed"),
diff --git a/repair/dir2.c b/repair/dir2.c

index 932e99436e44a56839b82f416e0537a04c71d62b..c01e0bc769e388245e67d6b9a81eb6c1db96cfdd 100644 (file)
--- a/repair/dir2.c
+++ b/repair/dir2.c
@@ -99,182 +99,36 @@ namecheck(char *name, int length)
   * Multibuffer handling.
   * V2 directory blocks can be noncontiguous, needing multiple buffers.
   */
-static xfs_dabuf_t *
+static struct xfs_buf *
  da_read_buf(
         xfs_mount_t     *mp,
         int             nex,
         bmap_ext_t      *bmp)
  {
-       xfs_buf_t       *bp;
-       xfs_buf_t       *bparray[4];
-       xfs_buf_t       **bplist;
-       xfs_dabuf_t     *dabuf;
+#define MAP_ARRAY_SZ 4
+       struct xfs_buf_map map_array[MAP_ARRAY_SZ];
+       struct xfs_buf_map *map;
+       struct xfs_buf  *bp;
         int             i;
-       int             off;
  
-       if (nex > (sizeof(bparray)/sizeof(xfs_buf_t *))) {
-               bplist = calloc(nex, sizeof(*bplist));
-               if (bplist == NULL) {
+       if (nex > MAP_ARRAY_SZ) {
+               map = calloc(nex, sizeof(*map));
+               if (map == NULL) {
                         do_error(_("couldn't malloc dir2 buffer list\n"));
                         exit(1);
                 }
-       }
-       else {
+       } else {
                 /* common case avoids calloc/free */
-               bplist = bparray;
+               map = map_array;
         }
         for (i = 0; i < nex; i++) {
-               pftrace("about to read off %llu (len = %d)",
-                       (long long)XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
-                       XFS_FSB_TO_BB(mp, bmp[i].blockcount));
-
-               bplist[i] = libxfs_readbuf(mp->m_dev,
-                               XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
-                               XFS_FSB_TO_BB(mp, bmp[i].blockcount), 0);
-               if (!bplist[i]) {
-                       nex = i;
-                       goto failed;
-               }
-
-               pftrace("readbuf %p (%llu, %d)", bplist[i],
-                       (long long)XFS_BUF_ADDR(bplist[i]),
-                       XFS_BUF_COUNT(bplist[i]));
-       }
-       dabuf = malloc(XFS_DA_BUF_SIZE(nex));
-       if (dabuf == NULL) {
-               do_error(_("couldn't malloc dir2 buffer header\n"));
-               exit(1);
-       }
-       dabuf->dirty = 0;
-       dabuf->nbuf = nex;
-       if (nex == 1) {
-               bp = bplist[0];
-               dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
-               dabuf->data = XFS_BUF_PTR(bp);
-               dabuf->bps[0] = bp;
-       } else {
-               for (i = 0, dabuf->bbcount = 0; i < nex; i++) {
-                       dabuf->bps[i] = bp = bplist[i];
-                       dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
-               }
-               dabuf->data = malloc(BBTOB(dabuf->bbcount));
-               if (dabuf->data == NULL) {
-                       do_error(_("couldn't malloc dir2 buffer data\n"));
-                       exit(1);
-               }
-               for (i = off = 0; i < nex; i++, off += XFS_BUF_COUNT(bp)) {
-                       bp = bplist[i];
-                       memmove((char *)dabuf->data + off, XFS_BUF_PTR(bp),
-                               XFS_BUF_COUNT(bp));
-               }
-       }
-       if (bplist != bparray)
-               free(bplist);
-       return dabuf;
-failed:
-       for (i = 0; i < nex; i++)
-               libxfs_putbuf(bplist[i]);
-       if (bplist != bparray)
-               free(bplist);
-       return NULL;
-}
-
-static void
-da_buf_clean(
-       xfs_dabuf_t     *dabuf)
-{
-       xfs_buf_t       *bp;
-       int             i;
-       int             off;
-
-       if (dabuf->dirty) {
-               dabuf->dirty = 0;
-               for (i=off=0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
-                       bp = dabuf->bps[i];
-                       memmove(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
-                               XFS_BUF_COUNT(bp));
-               }
-       }
-}
-
-static void
-da_buf_done(
-       xfs_dabuf_t     *dabuf)
-{
-       da_buf_clean(dabuf);
-       if (dabuf->nbuf > 1)
-               free(dabuf->data);
-       free(dabuf);
-}
-
-static int
-da_bwrite(
-       xfs_mount_t     *mp,
-       xfs_dabuf_t     *dabuf)
-{
-       xfs_buf_t       *bp;
-       xfs_buf_t       **bplist;
-       int             e;
-       int             error;
-       int             i;
-       int             nbuf;
-       int             off;
-
-       if ((nbuf = dabuf->nbuf) == 1) {
-               bplist = &bp;
-               bp = dabuf->bps[0];
-       } else {
-               bplist = malloc(nbuf * sizeof(*bplist));
-               if (bplist == NULL) {
-                       do_error(_("couldn't malloc dir2 buffer list\n"));
-                       exit(1);
-               }
-               memmove(bplist, dabuf->bps, nbuf * sizeof(*bplist));
-               for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
-                       bp = bplist[i];
-                       memmove(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
-                               XFS_BUF_COUNT(bp));
-               }
-       }
-       da_buf_done(dabuf);
-       for (i = error = 0; i < nbuf; i++) {
-               e = libxfs_writebuf(bplist[i], 0);
-               if (e)
-                       error = e;
-       }
-       if (bplist != &bp)
-               free(bplist);
-       return error;
-}
-
-static void
-da_brelse(
-       xfs_dabuf_t     *dabuf)
-{
-       xfs_buf_t       *bp;
-       xfs_buf_t       **bplist;
-       int             i;
-       int             nbuf;
-
-       if ((nbuf = dabuf->nbuf) == 1) {
-               bplist = &bp;
-               bp = dabuf->bps[0];
-       } else {
-               bplist = malloc(nbuf * sizeof(*bplist));
-               if (bplist == NULL) {
-                       do_error(_("couldn't malloc dir2 buffer list\n"));
-                       exit(1);
-               }
-               memmove(bplist, dabuf->bps, nbuf * sizeof(*bplist));
-       }
-       da_buf_done(dabuf);
-       for (i = 0; i < nbuf; i++) {
-               pftrace("putbuf %p (%llu)", bplist[i],
-                                       (long long)XFS_BUF_ADDR(bplist[i]));
-               libxfs_putbuf(bplist[i]);
+               map[i].bm_bn = XFS_FSB_TO_DADDR(mp, bmp[i].startblock);
+               map[i].bm_len = XFS_FSB_TO_BB(mp, bmp[i].blockcount);
         }
-       if (bplist != &bp)
-               free(bplist);
+       bp = libxfs_readbuf_map(mp->m_dev, map, nex, 0);
+       if (map != map_array)
+               free(map);
+       return bp;
  }
  
  /*
@@ -290,7 +144,7 @@ traverse_int_dir2block(xfs_mount_t  *mp,
  {
         bmap_ext_t              *bmp;
         xfs_dablk_t             bno;
-       xfs_dabuf_t             *bp;
+       struct xfs_buf          *bp;
         int                     i;
         int                     nex;
         xfs_da_blkinfo_t        *info;
@@ -327,7 +181,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"),
                         goto error_out;
                 }
  
-               info = bp->data;
+               info = bp->b_addr;
  
                 if (be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC)  {
                         if ( i != -1 ) {
@@ -336,10 +190,10 @@ _("found non-root LEAFN node in inode %" PRIu64 " bno = %u\n"),
                                         da_cursor->ino, bno);
                         }
                         *rbno = 0;
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
                         return(1);
                 } else if (be16_to_cpu(info->magic) != XFS_DA_NODE_MAGIC)  {
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
                         do_warn(
  _("bad dir magic number 0x%x in inode %" PRIu64 " bno = %u\n"),
                                 be16_to_cpu(info->magic),
@@ -348,7 +202,7 @@ _("bad dir magic number 0x%x in inode %" PRIu64 " bno = %u\n"),
                 }
                 node = (xfs_da_intnode_t*)info;
                 if (be16_to_cpu(node->hdr.count) > mp->m_dir_node_ents)  {
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
                         do_warn(
  _("bad record count in inode %" PRIu64 ", count = %d, max = %d\n"), da_cursor->ino,
                                 be16_to_cpu(node->hdr.count),
@@ -364,7 +218,7 @@ _("bad record count in inode %" PRIu64 ", count = %d, max = %d\n"), da_cursor->i
                                 do_warn(
  _("bad header depth for directory inode %" PRIu64 "\n"),
                                         da_cursor->ino);
-                               da_brelse(bp);
+                               libxfs_putbuf(bp);
                                 i = -1;
                                 goto error_out;
                         }
@@ -375,7 +229,7 @@ _("bad header depth for directory inode %" PRIu64 "\n"),
                                 do_warn(
  _("bad directory btree for directory inode %" PRIu64 "\n"),
                                         da_cursor->ino);
-                               da_brelse(bp);
+                               libxfs_putbuf(bp);
                                 goto error_out;
                         }
                 }
@@ -400,7 +254,7 @@ _("bad directory btree for directory inode %" PRIu64 "\n"),
  
  error_out:
         while (i > 1 && i <= da_cursor->active)  {
-               da_brelse(da_cursor->level[i].bp);
+               libxfs_putbuf(da_cursor->level[i].bp);
                 i++;
         }
  
@@ -429,7 +283,7 @@ release_dir2_cursor_int(xfs_mount_t         *mp,
                 }
                 ASSERT(error != 0);
  
-               da_brelse(cursor->level[level].bp);
+               libxfs_putbuf(cursor->level[level].bp);
                 cursor->level[level].bp = NULL;
         }
  
@@ -478,7 +332,7 @@ verify_final_dir2_path(xfs_mount_t  *mp,
          * in the block which should be the final (rightmost) entry
          */
         entry = cursor->level[this_level].index;
-       node = (xfs_da_intnode_t *)(cursor->level[this_level].bp->data);
+       node = (xfs_da_intnode_t *)(cursor->level[this_level].bp->b_addr);
         /*
          * check internal block consistency on this level -- ensure
          * that all entries are used, encountered and expected hashvals
@@ -550,9 +404,9 @@ _("would correct bad hashval in non-leaf dir block\n"
                 (cursor->level[this_level].dirty && !no_modify));
  
         if (cursor->level[this_level].dirty && !no_modify)
-               da_bwrite(mp, cursor->level[this_level].bp);
+               libxfs_writebuf(cursor->level[this_level].bp, 0);
         else
-               da_brelse(cursor->level[this_level].bp);
+               libxfs_putbuf(cursor->level[this_level].bp);
  
         cursor->level[this_level].bp = NULL;
  
@@ -618,7 +472,7 @@ verify_dir2_path(xfs_mount_t        *mp,
         xfs_da_intnode_t        *node;
         xfs_da_intnode_t        *newnode;
         xfs_dablk_t             dabno;
-       xfs_dabuf_t             *bp;
+       struct xfs_buf          *bp;
         int                     bad;
         int                     entry;
         int                     this_level = p_level + 1;
@@ -631,7 +485,7 @@ verify_dir2_path(xfs_mount_t        *mp,
          * should be processed now in this level.
          */
         entry = cursor->level[this_level].index;
-       node = cursor->level[this_level].bp->data;
+       node = cursor->level[this_level].bp->b_addr;
  
         /*
          * if this block is out of entries, validate this
@@ -685,7 +539,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"),
                         return(1);
                 }
  
-               newnode = bp->data;
+               newnode = bp->b_addr;
                 /*
                  * verify magic number and back pointer, sanity-check
                  * entry count, verify level
@@ -720,7 +574,7 @@ _("bad level %d in block %u for directory inode %" PRIu64 "\n"),
                         bad++;
                 }
                 if (bad)  {
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
                         return(1);
                 }
                 /*
@@ -731,9 +585,9 @@ _("bad level %d in block %u for directory inode %" PRIu64 "\n"),
                         (cursor->level[this_level].dirty && !no_modify));
  
                 if (cursor->level[this_level].dirty && !no_modify)
-                       da_bwrite(mp, cursor->level[this_level].bp);
+                       libxfs_writebuf(cursor->level[this_level].bp, 0);
                 else
-                       da_brelse(cursor->level[this_level].bp);
+                       libxfs_putbuf(cursor->level[this_level].bp);
                 cursor->level[this_level].bp = bp;
                 cursor->level[this_level].dirty = 0;
                 cursor->level[this_level].bno = dabno;
@@ -805,21 +659,19 @@ process_sf_dir2_fixi8(
         memmove(oldsfp, newsfp, oldsize);
         newsfp->hdr.count = oldsfp->hdr.count;
         newsfp->hdr.i8count = 0;
-       ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
-       xfs_dir2_sf_put_inumber(newsfp, &ino, &newsfp->hdr.parent);
-       oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-       newsfep = xfs_dir2_sf_firstentry(newsfp);
+       ino = xfs_dir2_sf_get_parent_ino(&sfp->hdr);
+       xfs_dir2_sf_put_parent_ino(&newsfp->hdr, ino);
+       oldsfep = xfs_dir2_sf_firstentry(&oldsfp->hdr);
+       newsfep = xfs_dir2_sf_firstentry(&newsfp->hdr);
         while ((int)((char *)oldsfep - (char *)oldsfp) < oldsize) {
                 newsfep->namelen = oldsfep->namelen;
                 xfs_dir2_sf_put_offset(newsfep,
                         xfs_dir2_sf_get_offset(oldsfep));
                 memmove(newsfep->name, oldsfep->name, newsfep->namelen);
-               ino = xfs_dir2_sf_get_inumber(oldsfp,
-                       xfs_dir2_sf_inumberp(oldsfep));
-               xfs_dir2_sf_put_inumber(newsfp, &ino,
-                       xfs_dir2_sf_inumberp(newsfep));
-               oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep);
-               newsfep = xfs_dir2_sf_nextentry(newsfp, newsfep);
+               ino = xfs_dir2_sfe_get_ino(&oldsfp->hdr, oldsfep);
+               xfs_dir2_sfe_put_ino(&newsfp->hdr, newsfep, ino);
+               oldsfep = xfs_dir2_sf_nextentry(&oldsfp->hdr, oldsfep);
+               newsfep = xfs_dir2_sf_nextentry(&newsfp->hdr, newsfep);
         }
         *next_sfep = newsfep;
         free(oldsfp);
@@ -838,13 +690,13 @@ process_sf_dir2_fixoff(
         xfs_dir2_sf_t           *sfp;
  
         sfp = (xfs_dir2_sf_t *)XFS_DFORK_DPTR(dip);
-       sfep = xfs_dir2_sf_firstentry(sfp);
+       sfep = xfs_dir2_sf_firstentry(&sfp->hdr);
         offset = XFS_DIR2_DATA_FIRST_OFFSET;
  
         for (i = 0; i < sfp->hdr.count; i++) {
                 xfs_dir2_sf_put_offset(sfep, offset);
                 offset += xfs_dir2_data_entsize(sfep->namelen);
-               sfep = xfs_dir2_sf_nextentry(sfp, sfep);
+               sfep = xfs_dir2_sf_nextentry(&sfp->hdr, sfep);
         }
  }
  
@@ -901,13 +753,12 @@ process_sf_dir2(
         /*
          * Initialize i8 based on size of parent inode number.
          */
-       i8 = (xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)
-               > XFS_DIR2_MAX_SHORT_INUM);
+       i8 = (xfs_dir2_sf_get_parent_ino(&sfp->hdr) > XFS_DIR2_MAX_SHORT_INUM);
  
         /*
          * check for bad entry count
          */
-       if (num_entries * xfs_dir2_sf_entsize_byname(sfp, 1) +
+       if (num_entries * xfs_dir2_sf_entsize(&sfp->hdr, 1) +
                     xfs_dir2_sf_hdr_size(0) > max_size || num_entries == 0)
                 num_entries = 0xFF;
  
@@ -915,7 +766,7 @@ process_sf_dir2(
          * run through entries, stop at first bad entry, don't need
          * to check for .. since that's encoded in its own field
          */
-       sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
+       sfep = next_sfep = xfs_dir2_sf_firstentry(&sfp->hdr);
         for (i = 0;
              i < num_entries && ino_dir_size > (char *)next_sfep - (char *)sfp;
              i++) {
@@ -923,7 +774,7 @@ process_sf_dir2(
                 sfep = next_sfep;
                 junkit = 0;
                 bad_sfnamelen = 0;
-               lino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+               lino = xfs_dir2_sfe_get_ino(&sfp->hdr, sfep);
                 /*
                  * if entry points to self, junk it since only '.' or '..'
                  * should do that and shortform dirs don't contain either
@@ -1037,7 +888,7 @@ _("zero length entry in shortform dir %" PRIu64 ""),
                                 break;
                         }
                 } else if ((__psint_t) sfep - (__psint_t) sfp +
-                               xfs_dir2_sf_entsize_byentry(sfp, sfep)
+                               xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen)
                                                         > ino_dir_size)  {
                         bad_sfnamelen = 1;
  
@@ -1125,8 +976,8 @@ _("entry contains offset out of order in shortform dir %" PRIu64 "\n"),
                         name[namelen] = '\0';
  
                         if (!no_modify)  {
-                               tmp_elen =
-                                       xfs_dir2_sf_entsize_byentry(sfp, sfep);
+                               tmp_elen = xfs_dir2_sf_entsize(&sfp->hdr,
+                                                               sfep->namelen);
                                 be64_add_cpu(&dip->di_size, -tmp_elen);
                                 ino_dir_size -= tmp_elen;
  
@@ -1178,11 +1029,9 @@ _("would have junked entry \"%s\" in directory inode %" PRIu64 "\n"),
                  */
                 next_sfep = (tmp_sfep == NULL)
                         ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep
-                               + ((!bad_sfnamelen)
-                                       ? xfs_dir2_sf_entsize_byentry(sfp,
-                                               sfep)
-                                       : xfs_dir2_sf_entsize_byname(sfp,
-                                               namelen)))
+                                                       + ((!bad_sfnamelen)
+                               ? xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen)
+                               : xfs_dir2_sf_entsize(&sfp->hdr, namelen)))
                         : tmp_sfep;
         }
  
@@ -1262,7 +1111,7 @@ _("corrected entry offsets in directory %" PRIu64 "\n"),
         /*
          * check parent (..) entry
          */
-       *parent = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+       *parent = xfs_dir2_sf_get_parent_ino(&sfp->hdr);
  
         /*
          * if parent entry is bogus, null it out.  we'll fix it later .
@@ -1276,7 +1125,7 @@ _("bogus .. inode number (%" PRIu64 ") in directory inode %" PRIu64 ", "),
                 if (!no_modify)  {
                         do_warn(_("clearing inode number\n"));
  
-                       xfs_dir2_sf_put_inumber(sfp, &zero, &sfp->hdr.parent);
+                       xfs_dir2_sf_put_parent_ino(&sfp->hdr, zero);
                         *dino_dirty = 1;
                         *repair = 1;
                 } else  {
@@ -1291,7 +1140,7 @@ _("bogus .. inode number (%" PRIu64 ") in directory inode %" PRIu64 ", "),
  _("corrected root directory %" PRIu64 " .. entry, was %" PRIu64 ", now %" PRIu64 "\n"),
                                 ino, *parent, ino);
                         *parent = ino;
-                       xfs_dir2_sf_put_inumber(sfp, parent, &sfp->hdr.parent);
+                       xfs_dir2_sf_put_parent_ino(&sfp->hdr, ino);
                         *dino_dirty = 1;
                         *repair = 1;
                 } else  {
@@ -1311,7 +1160,7 @@ _("bad .. entry in directory inode %" PRIu64 ", points to self, "),
                 if (!no_modify)  {
                         do_warn(_("clearing inode number\n"));
  
-                       xfs_dir2_sf_put_inumber(sfp, &zero, &sfp->hdr.parent);
+                       xfs_dir2_sf_put_parent_ino(&sfp->hdr, zero);
                         *dino_dirty = 1;
                         *repair = 1;
                 } else  {
@@ -1334,11 +1183,12 @@ process_dir2_data(
         int             ino_discovery,
         char            *dirname,       /* directory pathname */
         xfs_ino_t       *parent,        /* out - NULLFSINO if entry not exist */
-       xfs_dabuf_t     *bp,
+       struct xfs_buf  *bp,
         int             *dot,           /* out - 1 if there is a dot, else 0 */
         int             *dotdot,        /* out - 1 if there's a dotdot, else 0 */
         xfs_dablk_t     da_bno,
-       char            *endptr)
+       char            *endptr,
+       int             *dirty)
  {
         int                     badbest;
         xfs_dir2_data_free_t    *bf;
@@ -1358,7 +1208,7 @@ process_dir2_data(
         char                    *ptr;
         xfs_ino_t               ent_ino;
  
-       d = bp->data;
+       d = bp->b_addr;
         bf = d->hdr.bestfree;
         ptr = (char *)d->u;
         badbest = lastfree = freeseen = 0;
@@ -1392,7 +1242,7 @@ process_dir2_data(
                                                         (char *)dup - (char *)d)
                                 break;
                         badbest |= lastfree != 0;
-                       dfp = xfs_dir2_data_freefind(d, dup);
+                       dfp = xfs_dir2_data_freefind(&d->hdr, dup);
                         if (dfp) {
                                 i = dfp - bf;
                                 badbest |= (freeseen & (1 << i)) != 0;
@@ -1535,7 +1385,7 @@ _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"),
                                         (intptr_t)ptr - (intptr_t)d);
                                 dep->inumber = cpu_to_be64(BADFSINO);
                                 ent_ino = BADFSINO;
-                               bp->dirty = 1;
+                               *dirty = 1;
                         } else {
                                 do_warn(
  _("\twould clear inode number in entry at offset %" PRIdPTR "...\n"),
@@ -1561,7 +1411,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64 " has ille
                  */
                 if (!no_modify && ent_ino == BADFSINO) {
                         dep->name[0] = '/';
-                       bp->dirty = 1;
+                       *dirty = 1;
                         junkit = 0;
                 }
                 /*
@@ -1597,7 +1447,7 @@ _("bad .. entry in root directory inode %" PRIu64 ", was %" PRIu64 ": "),
                                         if (!no_modify) {
                                                 do_warn(_("correcting\n"));
                                                 dep->inumber = cpu_to_be64(ino);
-                                               bp->dirty = 1;
+                                               *dirty = 1;
                                         } else {
                                                 do_warn(_("would correct\n"));
                                         }
@@ -1629,7 +1479,7 @@ _("bad . entry in directory inode %" PRIu64 ", was %" PRIu64 ": "),
                                         if (!no_modify) {
                                                 do_warn(_("correcting\n"));
                                                 dep->inumber = cpu_to_be64(ino);
-                                               bp->dirty = 1;
+                                               *dirty = 1;
                                         } else {
                                                 do_warn(_("would correct\n"));
                                         }
@@ -1656,7 +1506,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "),
                 if (junkit) {
                         if (!no_modify) {
                                 dep->name[0] = '/';
-                               bp->dirty = 1;
+                               *dirty = 1;
                                 do_warn(_("clearing entry\n"));
                         } else {
                                 do_warn(_("would clear entry\n"));
@@ -1676,8 +1526,8 @@ _("bad bestfree table in block %u in directory inode %" PRIu64 ": "),
                         da_bno, ino);
                 if (!no_modify) {
                         do_warn(_("repairing table\n"));
-                       libxfs_dir2_data_freescan(mp, d, &i);
-                       bp->dirty = 1;
+                       libxfs_dir2_data_freescan(mp, &d->hdr, &i);
+                       *dirty = 1;
                 } else {
                         do_warn(_("would repair table\n"));
                 }
@@ -1706,11 +1556,12 @@ process_block_dir2(
         xfs_dir2_block_t        *block;
         xfs_dir2_leaf_entry_t   *blp;
         bmap_ext_t              *bmp;
-       xfs_dabuf_t             *bp;
+       struct xfs_buf          *bp;
         xfs_dir2_block_tail_t   *btp;
         int                     nex;
         int                     rval;
         bmap_ext_t              lbmp;
+       int                     dirty = 0;
  
         *repair = *dot = *dotdot = 0;
         *parent = NULLFSINO;
@@ -1733,7 +1584,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"),
         /*
          * Verify the block
          */
-       block = bp->data;
+       block = bp->b_addr;
         if (be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)
                 do_warn(
  _("bad directory block magic # %#x in block %u for directory inode %" PRIu64 "\n"),
@@ -1742,7 +1593,7 @@ _("bad directory block magic # %#x in block %u for directory inode %" PRIu64 "\n
          * process the data area
          * this also checks & fixes the bestfree
          */
-       btp = xfs_dir2_block_tail_p(mp, block);
+       btp = xfs_dir2_block_tail_p(mp, &block->hdr);
         blp = xfs_dir2_block_leaf_p(btp);
         /*
          * Don't let this go past the end of the block.
@@ -1750,12 +1601,12 @@ _("bad directory block magic # %#x in block %u for directory inode %" PRIu64 "\n
         if ((char *)blp > (char *)btp)
                 blp = (xfs_dir2_leaf_entry_t *)btp;
         rval = process_dir2_data(mp, ino, dip, ino_discovery, dirname, parent,
-               bp, dot, dotdot, mp->m_dirdatablk, (char *)blp);
-       if (bp->dirty && !no_modify) {
+               bp, dot, dotdot, mp->m_dirdatablk, (char *)blp, &dirty);
+       if (dirty && !no_modify) {
                 *repair = 1;
-               da_bwrite(mp, bp);
+               libxfs_writebuf(bp, 0);
         } else
-               da_brelse(bp);
+               libxfs_putbuf(bp);
         return rval;
  }
  
@@ -1814,7 +1665,7 @@ process_leaf_level_dir2(
         int                     *repair)
  {
         bmap_ext_t              *bmp;
-       xfs_dabuf_t             *bp;
+       struct xfs_buf          *bp;
         int                     buf_dirty;
         xfs_dahash_t            current_hashval;
         xfs_dablk_t             da_bno;
@@ -1859,7 +1710,7 @@ _("can't read file block %u for directory inode %" PRIu64 "\n"),
                                 da_bno, ino);
                         goto error_out;
                 }
-               leaf = bp->data;
+               leaf = bp->b_addr;
                 /*
                  * Check magic number for leaf directory btree block.
                  */
@@ -1869,7 +1720,7 @@ _("can't read file block %u for directory inode %" PRIu64 "\n"),
  _("bad directory leaf magic # %#x for directory inode %" PRIu64 " block %u\n"),
                                 be16_to_cpu(leaf->hdr.info.magic),
                                 ino, da_bno);
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
                         goto error_out;
                 }
                 buf_dirty = 0;
@@ -1879,7 +1730,7 @@ _("bad directory leaf magic # %#x for directory inode %" PRIu64 " block %u\n"),
                  */
                 if (process_leaf_block_dir2(mp, leaf, da_bno, ino,
                                 current_hashval, &greatest_hashval)) {
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
                         goto error_out;
                 }
                 /*
@@ -1898,14 +1749,14 @@ _("bad directory leaf magic # %#x for directory inode %" PRIu64 " block %u\n"),
                         do_warn(
  _("bad sibling back pointer for block %u in directory inode %" PRIu64 "\n"),
                                 da_bno, ino);
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
                         goto error_out;
                 }
                 prev_bno = da_bno;
                 da_bno = be32_to_cpu(leaf->hdr.info.forw);
                 if (da_bno != 0) {
                         if (verify_dir2_path(mp, da_cursor, 0)) {
-                               da_brelse(bp);
+                               libxfs_putbuf(bp);
                                 goto error_out;
                         }
                 }
@@ -1913,9 +1764,9 @@ _("bad sibling back pointer for block %u in directory inode %" PRIu64 "\n"),
                 ASSERT(buf_dirty == 0 || (buf_dirty && !no_modify));
                 if (buf_dirty && !no_modify) {
                         *repair = 1;
-                       da_bwrite(mp, bp);
+                       libxfs_writebuf(bp, 0);
                 } else
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
         } while (da_bno != 0);
         if (verify_final_dir2_path(mp, da_cursor, 0)) {
                 /*
@@ -2008,7 +1859,7 @@ process_leaf_node_dir2(
         int             isnode)         /* node directory not leaf */
  {
         bmap_ext_t              *bmp;
-       xfs_dabuf_t             *bp;
+       struct xfs_buf          *bp;
         xfs_dir2_data_t         *data;
         xfs_dfiloff_t           dbno;
         int                     good;
@@ -2017,6 +1868,7 @@ process_leaf_node_dir2(
         int                     nex;
         int                     t;
         bmap_ext_t              lbmp;
+       int                     dirty = 0;
  
         *repair = *dot = *dotdot = good = 0;
         *parent = NULLFSINO;
@@ -2043,21 +1895,21 @@ _("can't read block %" PRIu64 " for directory inode %" PRIu64 "\n"),
                                 dbno, ino);
                         continue;
                 }
-               data = bp->data;
+               data = bp->b_addr;
                 if (be32_to_cpu(data->hdr.magic) != XFS_DIR2_DATA_MAGIC)
                         do_warn(
  _("bad directory block magic # %#x in block %" PRIu64 " for directory inode %" PRIu64 "\n"),
                                 be32_to_cpu(data->hdr.magic), dbno, ino);
                 i = process_dir2_data(mp, ino, dip, ino_discovery, dirname,
                         parent, bp, dot, dotdot, (xfs_dablk_t)dbno,
-                       (char *)data + mp->m_dirblksize);
+                       (char *)data + mp->m_dirblksize, &dirty);
                 if (i == 0)
                         good++;
-               if (bp->dirty && !no_modify) {
+               if (dirty && !no_modify) {
                         *repair = 1;
-                       da_bwrite(mp, bp);
+                       libxfs_writebuf(bp, 0);
                 } else
-                       da_brelse(bp);
+                       libxfs_putbuf(bp);
         }
         if (good == 0)
                 return 1;
diff --git a/repair/dir2.h b/repair/dir2.h

index 4d30b893cd5b37f3ac13764b1c9f519be4cd1d42..5162028fcc98f4a47735284be489221e591de7fc 100644 (file)
--- a/repair/dir2.h
+++ b/repair/dir2.h
@@ -22,6 +22,32 @@
  struct blkmap;
  struct bmap_ext;
  
+/*
+ * generic dir2 structures used by xfs_repair.
+ * XXX: shared with xfsdb
+ */
+typedef union {
+       xfs_dir2_data_entry_t   entry;
+       xfs_dir2_data_unused_t  unused;
+} xfs_dir2_data_union_t;
+
+typedef struct xfs_dir2_data {
+       xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_DATA_MAGIC */
+       xfs_dir2_data_union_t   u[1];
+} xfs_dir2_data_t;
+
+typedef struct xfs_dir2_block {
+       xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_BLOCK_MAGIC */
+       xfs_dir2_data_union_t   u[1];
+       xfs_dir2_leaf_entry_t   leaf[1];
+       xfs_dir2_block_tail_t   tail;
+} xfs_dir2_block_t;
+
+typedef struct xfs_dir2_sf {
+       xfs_dir2_sf_hdr_t       hdr;            /* shortform header */
+       xfs_dir2_sf_entry_t     list[1];        /* shortform entries */
+} xfs_dir2_sf_t;
+
  /*
   * the cursor gets passed up and down the da btree processing
   * routines.  The interior block processing routines use the
@@ -42,7 +68,7 @@ struct bmap_ext;
   * Currently, we just trash it.
   */
  typedef struct dir2_level_state  {
-       xfs_dabuf_t     *bp;            /* block bp */
+       xfs_buf_t       *bp;            /* block bp */
         xfs_dablk_t     bno;            /* file block number */
         xfs_dahash_t    hashval;        /* last verified hashval */
         int             index;          /* current index in block */
diff --git a/repair/phase6.c b/repair/phase6.c

index a44ba09014d81fc88a50d6047b9b5b5e1f76cedc..5c33797037bc2ed8b8532ad9e63b1ab43ffe4412 100644 (file)
--- a/repair/phase6.c
+++ b/repair/phase6.c
@@ -483,9 +483,9 @@ mk_rbmino(xfs_mount_t *mp)
         xfs_bmap_init(&flist, &first);
         while (bno < mp->m_sb.sb_rbmblocks) {
                 nmap = XFS_BMAP_MAX_NMAP;
-               error = libxfs_bmapi(tp, ip, bno,
+               error = libxfs_bmapi_write(tp, ip, bno,
                           (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
-                         XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks,
+                         0, &first, mp->m_sb.sb_rbmblocks,
                           map, &nmap, &flist);
                 if (error) {
                         do_error(
@@ -541,7 +541,7 @@ fill_rbmino(xfs_mount_t *mp)
                  * fill the file one block at a time
                  */
                 nmap = 1;
-               error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE,
+               error = libxfs_bmapi_write(tp, ip, bno, 1, 0,
                                         &first, 1, &map, &nmap, NULL);
                 if (error || nmap != 1) {
                         do_error(
@@ -554,7 +554,7 @@ fill_rbmino(xfs_mount_t *mp)
                 error = libxfs_trans_read_buf(
                                 mp, tp, mp->m_dev,
                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                               XFS_FSB_TO_BB(mp, 1), 1, &bp);
+                               XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
  
                 if (error) {
                         do_warn(
@@ -610,7 +610,7 @@ fill_rsumino(xfs_mount_t *mp)
                  * fill the file one block at a time
                  */
                 nmap = 1;
-               error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE,
+               error = libxfs_bmapi_write(tp, ip, bno, 1, 0,
                                         &first, 1, &map, &nmap, NULL);
                 if (error || nmap != 1) {
                         do_error(
@@ -623,7 +623,7 @@ fill_rsumino(xfs_mount_t *mp)
                 error = libxfs_trans_read_buf(
                                 mp, tp, mp->m_dev,
                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                               XFS_FSB_TO_BB(mp, 1), 1, &bp);
+                               XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
  
                 if (error) {
                         do_warn(
@@ -722,10 +722,9 @@ mk_rsumino(xfs_mount_t *mp)
         xfs_bmap_init(&flist, &first);
         while (bno < nsumblocks) {
                 nmap = XFS_BMAP_MAX_NMAP;
-               error = libxfs_bmapi(tp, ip, bno,
+               error = libxfs_bmapi_write(tp, ip, bno,
                           (xfs_extlen_t)(nsumblocks - bno),
-                         XFS_BMAPI_WRITE, &first, nsumblocks,
-                         map, &nmap, &flist);
+                         0, &first, nsumblocks, map, &nmap, &flist);
                 if (error) {
                         do_error(
                 _("couldn't allocate realtime summary inode, error = %d\n"),
@@ -1283,7 +1282,7 @@ dir2_kill_block(
         xfs_mount_t     *mp,
         xfs_inode_t     *ip,
         xfs_dablk_t     da_bno,
-       xfs_dabuf_t     *bp)
+       struct xfs_buf  *bp)
  {
         xfs_da_args_t   args;
         int             committed;
@@ -1301,7 +1300,7 @@ dir2_kill_block(
                 res_failed(error);
         libxfs_trans_ijoin(tp, ip, 0);
         libxfs_trans_ihold(tp, ip);
-       libxfs_da_bjoin(tp, bp);
+       libxfs_trans_bjoin(tp, bp);
         memset(&args, 0, sizeof(args));
         xfs_bmap_init(&flist, &firstblock);
         args.dp = ip;
@@ -1333,7 +1332,7 @@ longform_dir2_entry_check_data(
         int                     *need_dot,
         ino_tree_node_t         *current_irec,
         int                     current_ino_offset,
-       xfs_dabuf_t             **bpp,
+       struct xfs_buf          **bpp,
         dir_hash_tab_t          *hashtab,
         freetab_t               **freetabp,
         xfs_dablk_t             da_bno,
@@ -1341,7 +1340,7 @@ longform_dir2_entry_check_data(
  {
         xfs_dir2_dataptr_t      addr;
         xfs_dir2_leaf_entry_t   *blp;
-       xfs_dabuf_t             *bp;
+       struct xfs_buf          *bp;
         xfs_dir2_block_tail_t   *btp;
         int                     committed;
         xfs_dir2_data_t         *d;
@@ -1370,14 +1369,14 @@ longform_dir2_entry_check_data(
         int                     wantmagic;
  
         bp = *bpp;
-       d = bp->data;
+       d = bp->b_addr;
         ptr = (char *)d->u;
         nbad = 0;
         needscan = needlog = 0;
         junkit = 0;
         freetab = *freetabp;
         if (isblock) {
-               btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+               btp = xfs_dir2_block_tail_p(mp, (struct xfs_dir2_data_hdr *)d);
                 blp = xfs_dir2_block_leaf_p(btp);
                 endptr = (char *)blp;
                 if (endptr > (char *)btp)
@@ -1465,7 +1464,7 @@ longform_dir2_entry_check_data(
                         dir2_kill_block(mp, ip, da_bno, bp);
                 } else {
                         do_warn(_("would junk block\n"));
-                       libxfs_da_brelse(NULL, bp);
+                       libxfs_putbuf(bp);
                 }
                 freetab->ents[db].v = NULLDATAOFF;
                 *bpp = NULL;
@@ -1483,8 +1482,8 @@ longform_dir2_entry_check_data(
                 res_failed(error);
         libxfs_trans_ijoin(tp, ip, 0);
         libxfs_trans_ihold(tp, ip);
-       libxfs_da_bjoin(tp, bp);
-       libxfs_da_bhold(tp, bp);
+       libxfs_trans_bjoin(tp, bp);
+       libxfs_trans_bhold(tp, bp);
         xfs_bmap_init(&flist, &firstblock);
         if (be32_to_cpu(d->hdr.magic) != wantmagic) {
                 do_warn(
@@ -1749,7 +1748,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 "
         }
         *num_illegal += nbad;
         if (needscan)
-               libxfs_dir2_data_freescan(mp, d, &needlog);
+               libxfs_dir2_data_freescan(mp, &d->hdr, &needlog);
         if (needlog)
                 libxfs_dir2_data_log_header(tp, bp);
         libxfs_bmap_finish(&tp, &flist, &committed);
@@ -1770,7 +1769,7 @@ longform_dir2_check_leaf(
  {
         int                     badtail;
         __be16                  *bestsp;
-       xfs_dabuf_t             *bp;
+       struct xfs_buf          *bp;
         xfs_dablk_t             da_bno;
         int                     i;
         xfs_dir2_leaf_t         *leaf;
@@ -1778,13 +1777,13 @@ longform_dir2_check_leaf(
         int                     seeval;
  
         da_bno = mp->m_dirleafblk;
-       if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK)) {
+       if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK, NULL)) {
                 do_error(
         _("can't read block %u for directory inode %" PRIu64 "\n"),
                         da_bno, ip->i_ino);
                 /* NOTREACHED */
         }
-       leaf = bp->data;
+       leaf = bp->b_addr;
         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
         bestsp = xfs_dir2_leaf_bests_p(ltp);
         if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR2_LEAF1_MAGIC ||
@@ -1792,21 +1791,21 @@ longform_dir2_check_leaf(
                                 be32_to_cpu(leaf->hdr.info.back) ||
                                 be16_to_cpu(leaf->hdr.count) <
                                         be16_to_cpu(leaf->hdr.stale) ||
-                               be16_to_cpu(leaf->hdr.count) >
+                               be16_to_cpu(leaf->hdr.count) >
                                         xfs_dir2_max_leaf_ents(mp) ||
-                               (char *)&leaf->ents[be16_to_cpu(
+                               (char *)&leaf->ents[be16_to_cpu(
                                         leaf->hdr.count)] > (char *)bestsp) {
                 do_warn(
         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
                         da_bno, ip->i_ino);
-               libxfs_da_brelse(NULL, bp);
+               libxfs_putbuf(bp);
                 return 1;
         }
         seeval = dir_hash_see_all(hashtab, leaf->ents,
                                 be16_to_cpu(leaf->hdr.count),
                                 be16_to_cpu(leaf->hdr.stale));
         if (dir_hash_check(hashtab, ip, seeval)) {
-               libxfs_da_brelse(NULL, bp);
+               libxfs_putbuf(bp);
                 return 1;
         }
         badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
@@ -1818,10 +1817,10 @@ longform_dir2_check_leaf(
                 do_warn(
         _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
                         da_bno, ip->i_ino);
-               libxfs_da_brelse(NULL, bp);
+               libxfs_putbuf(bp);
                 return 1;
         }
-       libxfs_da_brelse(NULL, bp);
+       libxfs_putbuf(bp);
         return 0;
  }
  
@@ -1836,7 +1835,7 @@ longform_dir2_check_node(
         dir_hash_tab_t          *hashtab,
         freetab_t               *freetab)
  {
-       xfs_dabuf_t             *bp;
+       struct xfs_buf          *bp;
         xfs_dablk_t             da_bno;
         xfs_dir2_db_t           fdb;
         xfs_dir2_free_t         *free;
@@ -1852,25 +1851,25 @@ longform_dir2_check_node(
                 next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
                         break;
-               if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp,
-                               XFS_DATA_FORK)) {
+               if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp,
+                               XFS_DATA_FORK, NULL)) {
                         do_warn(
         _("can't read leaf block %u for directory inode %" PRIu64 "\n"),
                                 da_bno, ip->i_ino);
                         return 1;
                 }
-               leaf = bp->data;
+               leaf = bp->b_addr;
                 if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR2_LEAFN_MAGIC) {
                         if (be16_to_cpu(leaf->hdr.info.magic) ==
                                                         XFS_DA_NODE_MAGIC) {
-                               libxfs_da_brelse(NULL, bp);
+                               libxfs_putbuf(bp);
                                 continue;
                         }
                         do_warn(
         _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
                                 be16_to_cpu(leaf->hdr.info.magic),
                                 da_bno, ip->i_ino);
-                       libxfs_da_brelse(NULL, bp);
+                       libxfs_putbuf(bp);
                         return 1;
                 }
                 if (be16_to_cpu(leaf->hdr.count) > xfs_dir2_max_leaf_ents(mp) ||
@@ -1879,13 +1878,13 @@ longform_dir2_check_node(
                         do_warn(
         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
                                 da_bno, ip->i_ino);
-                       libxfs_da_brelse(NULL, bp);
+                       libxfs_putbuf(bp);
                         return 1;
                 }
                 seeval = dir_hash_see_all(hashtab, leaf->ents,
                                         be16_to_cpu(leaf->hdr.count),
                                         be16_to_cpu(leaf->hdr.stale));
-               libxfs_da_brelse(NULL, bp);
+               libxfs_putbuf(bp);
                 if (seeval != DIR_HASH_CK_OK)
                         return 1;
         }
@@ -1898,25 +1897,25 @@ longform_dir2_check_node(
                 next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
                         break;
-               if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp,
-                               XFS_DATA_FORK)) {
+               if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bp,
+                               XFS_DATA_FORK, NULL)) {
                         do_warn(
         _("can't read freespace block %u for directory inode %" PRIu64 "\n"),
                                 da_bno, ip->i_ino);
                         return 1;
                 }
-               free = bp->data;
+               free = bp->b_addr;
                 fdb = xfs_dir2_da_to_db(mp, da_bno);
                 if (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC ||
                                 be32_to_cpu(free->hdr.firstdb) !=
                                         (fdb - XFS_DIR2_FREE_FIRSTDB(mp)) *
-                                               XFS_DIR2_MAX_FREE_BESTS(mp) ||
+                                               xfs_dir2_free_max_bests(mp) ||
                                 be32_to_cpu(free->hdr.nvalid) <
                                         be32_to_cpu(free->hdr.nused)) {
                         do_warn(
         _("free block %u for directory inode %" PRIu64 " bad header\n"),
                                 da_bno, ip->i_ino);
-                       libxfs_da_brelse(NULL, bp);
+                       libxfs_putbuf(bp);
                         return 1;
                 }
                 for (i = used = 0; i < be32_to_cpu(free->hdr.nvalid); i++) {
@@ -1924,11 +1923,11 @@ longform_dir2_check_node(
                                                         freetab->nents ||
                                         freetab->ents[i + be32_to_cpu(
                                                 free->hdr.firstdb)].v !=
-                                               be16_to_cpu(free->bests[i])) {
+                                               be16_to_cpu(free->bests[i])) {
                                 do_warn(
         _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
                                         da_bno, i, ip->i_ino);
-                               libxfs_da_brelse(NULL, bp);
+                               libxfs_putbuf(bp);
                                 return 1;
                         }
                         used += be16_to_cpu(free->bests[i]) != NULLDATAOFF;
@@ -1938,10 +1937,10 @@ longform_dir2_check_node(
                         do_warn(
         _("free block %u for directory inode %" PRIu64 " bad nused\n"),
                                 da_bno, ip->i_ino);
-                       libxfs_da_brelse(NULL, bp);
+                       libxfs_putbuf(bp);
                         return 1;
                 }
-               libxfs_da_brelse(NULL, bp);
+               libxfs_putbuf(bp);
         }
         for (i = 0; i < freetab->nents; i++) {
                 if ((freetab->ents[i].s == 0) &&
@@ -1971,7 +1970,7 @@ longform_dir2_entry_check(xfs_mount_t     *mp,
                         dir_hash_tab_t  *hashtab)
  {
         xfs_dir2_block_t        *block;
-       xfs_dabuf_t             **bplist;
+       struct xfs_buf          **bplist;
         xfs_dablk_t             da_bno;
         freetab_t               *freetab;
         int                     num_bps;
@@ -1998,7 +1997,7 @@ longform_dir2_entry_check(xfs_mount_t     *mp,
                 freetab->ents[i].s = 0;
         }
         num_bps = freetab->naents;
-       bplist = calloc(num_bps, sizeof(xfs_dabuf_t*));
+       bplist = calloc(num_bps, sizeof(struct xfs_buf*));
         /* is this a block, leaf, or node directory? */
         libxfs_dir2_isblock(NULL, ip, &isblock);
         libxfs_dir2_isleaf(NULL, ip, &isleaf);
@@ -2014,14 +2013,14 @@ longform_dir2_entry_check(xfs_mount_t   *mp,
                 if (db >= num_bps) {
                         /* more data blocks than expected */
                         num_bps = db + 1;
-                       bplist = realloc(bplist, num_bps * sizeof(xfs_dabuf_t*));
+                       bplist = realloc(bplist, num_bps * sizeof(struct xfs_buf*));
                         if (!bplist)
                                 do_error(
                 _("realloc failed in longform_dir2_entry_check (%zu bytes)\n"),
-                                       num_bps * sizeof(xfs_dabuf_t*));
+                                       num_bps * sizeof(struct xfs_buf*));
                 }
-               if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bplist[db],
-                               XFS_DATA_FORK)) {
+               if (libxfs_da_read_buf(NULL, ip, da_bno, -1, &bplist[db],
+                               XFS_DATA_FORK, NULL)) {
                         do_warn(
         _("can't read data block %u for directory inode %" PRIu64 "\n"),
                                 da_bno, ino);
@@ -2040,8 +2039,8 @@ longform_dir2_entry_check(xfs_mount_t     *mp,
                         xfs_dir2_block_tail_t   *btp;
                         xfs_dir2_leaf_entry_t   *blp;
  
-                       block = bplist[0]->data;
-                       btp = xfs_dir2_block_tail_p(mp, block);
+                       block = bplist[0]->b_addr;
+                       btp = xfs_dir2_block_tail_p(mp, &block->hdr);
                         blp = xfs_dir2_block_leaf_p(btp);
                         seeval = dir_hash_see_all(hashtab, blp,
                                                 be32_to_cpu(btp->count),
@@ -2060,14 +2059,14 @@ longform_dir2_entry_check(xfs_mount_t   *mp,
                 dir_hash_dup_names(hashtab);
                 for (i = 0; i < freetab->naents; i++)
                         if (bplist[i])
-                               libxfs_da_brelse(NULL, bplist[i]);
+                               libxfs_putbuf(bplist[i]);
                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
                 *num_illegal = 0;
                 *need_dot = 0;
         } else {
                 for (i = 0; i < freetab->naents; i++)
                         if (bplist[i])
-                               libxfs_da_brelse(NULL, bplist[i]);
+                               libxfs_putbuf(bplist[i]);
         }
  
         free(bplist);
@@ -2126,7 +2125,7 @@ shortform_dir2_entry_check(xfs_mount_t    *mp,
                         do_warn(
         _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
                                 ino, parent);
-                       xfs_dir2_sf_put_inumber(sfp, &parent, &sfp->hdr.parent);
+                       xfs_dir2_sf_put_parent_ino(&sfp->hdr, parent);
                         *ino_dirty = 1;
                 }
                 return;
@@ -2143,15 +2142,14 @@ shortform_dir2_entry_check(xfs_mount_t  *mp,
         /*
          * Initialise i8 counter -- the parent inode number counts as well.
          */
-       i8 = (xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent) >
-                                               XFS_DIR2_MAX_SHORT_INUM);
+       i8 = xfs_dir2_sf_get_parent_ino(&sfp->hdr) > XFS_DIR2_MAX_SHORT_INUM;
  
         /*
          * now run through entries, stop at first bad entry, don't need
          * to skip over '..' since that's encoded in its own field and
          * no need to worry about '.' since it doesn't exist.
          */
-       sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
+       sfep = next_sfep = xfs_dir2_sf_firstentry(&sfp->hdr);
  
         for (i = 0; i < sfp->hdr.count && max_size >
                                         (__psint_t)next_sfep - (__psint_t)sfp;
@@ -2160,7 +2158,7 @@ shortform_dir2_entry_check(xfs_mount_t    *mp,
                 bad_sfnamelen = 0;
                 tmp_sfep = NULL;
  
-               lino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+               lino = xfs_dir2_sfe_get_ino(&sfp->hdr, sfep);
  
                 namelen = sfep->namelen;
  
@@ -2189,7 +2187,7 @@ shortform_dir2_entry_check(xfs_mount_t    *mp,
                                 break;
                         }
                 } else if (no_modify && (__psint_t) sfep - (__psint_t) sfp +
-                               + xfs_dir2_sf_entsize_byentry(sfp, sfep)
+                               + xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen)
                                 > ip->i_d.di_size)  {
                         bad_sfnamelen = 1;
  
@@ -2219,7 +2217,7 @@ shortform_dir2_entry_check(xfs_mount_t    *mp,
  
                 if (no_modify && verify_inum(mp, lino))  {
                         next_sfep = (xfs_dir2_sf_entry_t *)((__psint_t)sfep +
-                                       xfs_dir2_sf_entsize_byentry(sfp, sfep));
+                               xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen));
                         continue;
                 }
  
@@ -2270,8 +2268,8 @@ shortform_dir2_entry_check(xfs_mount_t    *mp,
                  * check for duplicate names in directory.
                  */
                 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
-                                       (sfep - xfs_dir2_sf_firstentry(sfp)),
-                                       lino, sfep->namelen, sfep->name)) {
+                               (sfep - xfs_dir2_sf_firstentry(&sfp->hdr)),
+                               lino, sfep->namelen, sfep->name)) {
                         do_warn(
  _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
                                 fname, lino, ino);
@@ -2327,7 +2325,8 @@ do_junkit:
                         if (lino == orphanage_ino)
                                 orphanage_ino = 0;
                         if (!no_modify)  {
-                               tmp_elen = xfs_dir2_sf_entsize_byentry(sfp, sfep);
+                               tmp_elen = xfs_dir2_sf_entsize(&sfp->hdr,
+                                                               sfep->namelen);
                                 tmp_sfep = (xfs_dir2_sf_entry_t *)
                                         ((__psint_t) sfep + tmp_elen);
                                 tmp_len = max_size - ((__psint_t) tmp_sfep
@@ -2378,9 +2377,9 @@ do_junkit:
  
                 next_sfep = (tmp_sfep == NULL)
                         ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep
-                               + ((!bad_sfnamelen)
-                                       ? xfs_dir2_sf_entsize_byentry(sfp, sfep)
-                                       : xfs_dir2_sf_entsize_byname(sfp, namelen)))
+                                                       + ((!bad_sfnamelen)
+                               ? xfs_dir2_sf_entsize(&sfp->hdr, sfep->namelen)
+                               : xfs_dir2_sf_entsize(&sfp->hdr, namelen)))
                         : tmp_sfep;
         }
author	Dave Chinner <dchinner@redhat.com>
	Thu, 9 May 2013 12:23:15 +0000 (07:23 -0500)
committer	Rich Johnston <rjohnston@sgi.com>
	Thu, 9 May 2013 12:23:15 +0000 (07:23 -0500)
db/check.c		patch \| blob \| blame \| history
db/dir2.c		patch \| blob \| blame \| history
db/dir2.h		patch \| blob \| blame \| history
db/dir2sf.c		patch \| blob \| blame \| history
db/metadump.c		patch \| blob \| blame \| history
estimate/xfs_estimate.c		patch \| blob \| blame \| history
fsr/xfs_fsr.c		patch \| blob \| blame \| history
include/Makefile		patch \| blob \| blame \| history
include/libxfs.h		patch \| blob \| blame \| history
include/linux.h		patch \| blob \| blame \| history
include/platform_defs.h.in		patch \| blob \| blame \| history
include/project.h		patch \| blob \| blame \| history
include/swab.h		patch \| blob \| blame \| history
include/xfs.h		patch \| blob \| blame \| history
include/xfs_ag.h		patch \| blob \| blame \| history
include/xfs_alloc.h		patch \| blob \| blame \| history
include/xfs_alloc_btree.h		patch \| blob \| blame \| history
include/xfs_attr_leaf.h		patch \| blob \| blame \| history
include/xfs_bmap.h		patch \| blob \| blame \| history
include/xfs_bmap_btree.h		patch \| blob \| blame \| history
include/xfs_btree.h		patch \| blob \| blame \| history
include/xfs_buf_item.h		patch \| blob \| blame \| history
include/xfs_da_btree.h		patch \| blob \| blame \| history
include/xfs_dinode.h		patch \| blob \| blame \| history
include/xfs_dir2.h		patch \| blob \| blame \| history
include/xfs_dir2_block.h	[deleted file]	patch \| blob \| blame \| history
include/xfs_dir2_data.h	[deleted file]	patch \| blob \| blame \| history
include/xfs_dir2_format.h	[new file with mode: 0644]	patch \| blob
include/xfs_dir2_leaf.h	[deleted file]	patch \| blob \| blame \| history
include/xfs_dir2_node.h	[deleted file]	patch \| blob \| blame \| history
include/xfs_dir2_sf.h	[deleted file]	patch \| blob \| blame \| history
include/xfs_fs.h		patch \| blob \| blame \| history
include/xfs_ialloc.h		patch \| blob \| blame \| history
include/xfs_ialloc_btree.h		patch \| blob \| blame \| history
include/xfs_inode.h		patch \| blob \| blame \| history
include/xfs_inode_item.h		patch \| blob \| blame \| history
include/xfs_inum.h		patch \| blob \| blame \| history
include/xfs_log.h		patch \| blob \| blame \| history
include/xfs_log_priv.h		patch \| blob \| blame \| history
include/xfs_mount.h		patch \| blob \| blame \| history
include/xfs_quota.h		patch \| blob \| blame \| history
include/xfs_rtalloc.h		patch \| blob \| blame \| history
include/xfs_sb.h		patch \| blob \| blame \| history
include/xfs_trace.h		patch \| blob \| blame \| history
include/xfs_trans.h		patch \| blob \| blame \| history
include/xfs_types.h		patch \| blob \| blame \| history
libxfs/Makefile		patch \| blob \| blame \| history
libxfs/init.c		patch \| blob \| blame \| history
libxfs/logitem.c		patch \| blob \| blame \| history
libxfs/rdwr.c		patch \| blob \| blame \| history
libxfs/trans.c		patch \| blob \| blame \| history
libxfs/util.c		patch \| blob \| blame \| history
libxfs/xfs.h		patch \| blob \| blame \| history
libxfs/xfs_alloc.c		patch \| blob \| blame \| history
libxfs/xfs_alloc_btree.c		patch \| blob \| blame \| history
libxfs/xfs_attr.c		patch \| blob \| blame \| history
libxfs/xfs_attr_leaf.c		patch \| blob \| blame \| history
libxfs/xfs_bmap.c		patch \| blob \| blame \| history
libxfs/xfs_bmap_btree.c		patch \| blob \| blame \| history
libxfs/xfs_btree.c		patch \| blob \| blame \| history
libxfs/xfs_da_btree.c		patch \| blob \| blame \| history
libxfs/xfs_dir2.c		patch \| blob \| blame \| history
libxfs/xfs_dir2_block.c		patch \| blob \| blame \| history
libxfs/xfs_dir2_data.c		patch \| blob \| blame \| history
libxfs/xfs_dir2_leaf.c		patch \| blob \| blame \| history
libxfs/xfs_dir2_node.c		patch \| blob \| blame \| history
libxfs/xfs_dir2_priv.h	[new file with mode: 0644]	patch \| blob
libxfs/xfs_dir2_sf.c		patch \| blob \| blame \| history
libxfs/xfs_ialloc.c		patch \| blob \| blame \| history
libxfs/xfs_ialloc_btree.c		patch \| blob \| blame \| history
libxfs/xfs_inode.c		patch \| blob \| blame \| history
libxfs/xfs_mount.c		patch \| blob \| blame \| history
libxfs/xfs_rtalloc.c		patch \| blob \| blame \| history
libxfs/xfs_trans.c		patch \| blob \| blame \| history
logprint/log_misc.c		patch \| blob \| blame \| history
logprint/log_print_all.c		patch \| blob \| blame \| history
mkfs/proto.c		patch \| blob \| blame \| history
repair/dir2.c		patch \| blob \| blame \| history
repair/dir2.h		patch \| blob \| blame \| history
repair/phase6.c		patch \| blob \| blame \| history