From 2bd0ea187d570cb004585bd58c4ad4dfea6606ca Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Mon, 15 Jan 2001 05:36:03 +0000
Subject: [PATCH] cmd/xfs/bmap/Makefile 1.8 Renamed to
 cmd/xfsprogs/bmap/Makefile

---
 bmap/Makefile               |   45 +
 bmap/xfs_bmap.c             |  405 ++++
 build/Makefile              |   78 +
 build/rpm/Makefile          |   78 +
 build/rpm/macros.template   |   30 +
 build/rpm/rpm-2.rc.template |   25 +
 build/tar/Makefile          |   50 +
 db/Makefile                 |   58 +
 db/addr.c                   |  139 ++
 db/addr.h                   |   33 +
 db/agf.c                    |  143 ++
 db/agf.h                    |   39 +
 db/agfl.c                   |  122 +
 db/agfl.h                   |   39 +
 db/agi.c                    |  130 +
 db/agi.h                    |   39 +
 db/attr.c                   |  465 ++++
 db/attr.h                   |   44 +
 db/attrshort.c              |  182 ++
 db/attrshort.h              |   39 +
 db/bit.c                    |  202 ++
 db/bit.h                    |   43 +
 db/block.c                  |  303 +++
 db/block.h                  |   36 +
 db/bmap.c                   |  355 +++
 db/bmap.h                   |   48 +
 db/bmapbt.c                 |  331 +++
 db/bmapbt.h                 |   45 +
 db/bmroot.c                 |  274 +++
 db/bmroot.h                 |   41 +
 db/bnobt.c                  |  187 ++
 db/bnobt.h                  |   40 +
 db/check.c                  | 4468 ++++++++++++++++++++++++++++++++++
 db/check.h                  |   33 +
 db/cntbt.c                  |  193 ++
 db/cntbt.h                  |   40 +
 db/command.c                |  158 ++
 db/command.h                |   55 +
 db/convert.c                |  340 +++
 db/convert.h                |   33 +
 db/data.c                   |   41 +
 db/data.h                   |   39 +
 db/dbread.c                 |   84 +
 db/dbread.h                 |   34 +
 db/debug.c                  |   68 +
 db/debug.h                  |   36 +
 db/dir.c                    |  270 +++
 db/dir.h                    |   44 +
 db/dir2.c                   |  727 ++++++
 db/dir2.h                   |   45 +
 db/dir2sf.c                 |  235 ++
 db/dir2sf.h                 |   41 +
 db/dirshort.c               |  146 ++
 db/dirshort.h               |   39 +
 db/dquot.c                  |  176 ++
 db/dquot.h                  |   39 +
 db/echo.c                   |   62 +
 db/echo.h                   |   33 +
 db/faddr.c                  |  404 ++++
 db/faddr.h                  |   50 +
 db/field.c                  |  394 +++
 db/field.h                  |  224 ++
 db/flist.c                  |  437 ++++
 db/flist.h                  |   66 +
 db/fprint.c                 |  201 ++
 db/fprint.h                 |   45 +
 db/frag.c                   |  534 +++++
 db/frag.h                   |   33 +
 db/freesp.c                 |  427 ++++
 db/freesp.h                 |   33 +
 db/hash.c                   |   78 +
 db/hash.h                   |   34 +
 db/help.c                   |  109 +
 db/help.h                   |   33 +
 db/init.c                   |  148 ++
 db/init.h                   |   34 +
 db/inobt.c                  |  193 ++
 db/inobt.h                  |   40 +
 db/inode.c                  |  594 +++++
 db/inode.h                  |   47 +
 db/input.c                  |  272 +++
 db/input.h                  |   37 +
 db/io.c                     |  627 +++++
 db/io.h                     |   76 +
 db/main.c                   |   61 +
 db/malloc.c                 |  106 +
 db/malloc.h                 |   37 +
 db/mount.c                  |  153 ++
 db/mount.h                  |   34 +
 db/output.c                 |  124 +
 db/output.h                 |   37 +
 db/print.c                  |  310 +++
 db/print.h                  |   41 +
 db/quit.c                   |   55 +
 db/quit.h                   |   33 +
 db/sb.c                     |  162 ++
 db/sb.h                     |   39 +
 db/sig.c                    |   80 +
 db/sig.h                    |   37 +
 db/strvec.c                 |  113 +
 db/strvec.h                 |   37 +
 db/type.c                   |  197 ++
 db/type.h                   |   68 +
 db/uuid.c                   |  364 +++
 db/uuid.h                   |   33 +
 db/write.c                  |  708 ++++++
 db/write.h                  |   38 +
 db/xfs_admin.sh             |   60 +
 db/xfs_check.sh             |   63 +
 db/xfs_check64.sh           |   63 +
 db/xfs_ncheck.sh            |   61 +
 db/xfs_ncheck64.sh          |   61 +
 doc/Makefile                |   45 +
 doc/README.LVM              |   77 +
 fsck/Makefile               |   46 +
 fsck/xfs_fsck.c             |   42 +
 growfs/Makefile             |   50 +
 growfs/xfs_growfs.c         |  458 ++++
 growfs/xfs_info.sh          |   56 +
 include/Makefile            |   52 +
 include/arch.h              |  236 ++
 include/builddefs.in        |  173 ++
 include/buildrules          |   76 +
 include/handle.h            |   53 +
 include/jdm.h               |   61 +
 include/libxfs.h            |  474 ++++
 include/platform_defs.h.in  |  107 +
 include/xfs_ag.h            |  343 +++
 include/xfs_alloc.h         |  200 ++
 include/xfs_alloc_btree.h   |  251 ++
 include/xfs_arch.h          |   79 +
 include/xfs_attr_leaf.h     |  305 +++
 include/xfs_attr_sf.h       |  156 ++
 include/xfs_bit.h           |  102 +
 include/xfs_bmap.h          |  397 +++
 include/xfs_bmap_btree.h    |  661 +++++
 include/xfs_btree.h         |  573 +++++
 include/xfs_buf_item.h      |  180 ++
 include/xfs_cred.h          |  152 ++
 include/xfs_da_btree.h      |  340 +++
 include/xfs_dfrag.h         |   67 +
 include/xfs_dinode.h        |  476 ++++
 include/xfs_dir.h           |  162 ++
 include/xfs_dir2.h          |  111 +
 include/xfs_dir2_block.h    |  128 +
 include/xfs_dir2_data.h     |  232 ++
 include/xfs_dir2_leaf.h     |  361 +++
 include/xfs_dir2_node.h     |  160 ++
 include/xfs_dir2_sf.h       |  256 ++
 include/xfs_dir_leaf.h      |  257 ++
 include/xfs_dir_sf.h        |  188 ++
 include/xfs_dqblk.h         |   99 +
 include/xfs_dquot_item.h    |  104 +
 include/xfs_extfree_item.h  |  123 +
 include/xfs_fs.h            |  476 ++++
 include/xfs_ialloc.h        |  181 ++
 include/xfs_ialloc_btree.h  |  318 +++
 include/xfs_imap.h          |   54 +
 include/xfs_inode.h         |  615 +++++
 include/xfs_inode_item.h    |  193 ++
 include/xfs_inum.h          |  173 ++
 include/xfs_log.h           |  183 ++
 include/xfs_log_priv.h      |  540 +++++
 include/xfs_log_recover.h   |   81 +
 include/xfs_mount.h         |  490 ++++
 include/xfs_quota.h         |  320 +++
 include/xfs_rtalloc.h       |  164 ++
 include/xfs_sb.h            |  490 ++++
 include/xfs_trans.h         | 1000 ++++++++
 include/xfs_trans_space.h   |  105 +
 include/xfs_types.h         |  303 +++
 include/xqm.h               |  166 ++
 libxfs/Makefile             |   62 +
 libxfs/init.c               |  764 ++++++
 libxfs/logitem.c            |  496 ++++
 libxfs/rdwr.c               |  468 ++++
 libxfs/trans.c              |  754 ++++++
 libxfs/util.c               |  735 ++++++
 libxfs/xfs.h                |  548 +++++
 libxfs/xfs_alloc.c          | 2355 ++++++++++++++++++
 libxfs/xfs_alloc_btree.c    | 2136 +++++++++++++++++
 libxfs/xfs_attr_leaf.c      | 1169 +++++++++
 libxfs/xfs_bit.c            |  307 +++
 libxfs/xfs_bmap.c           | 4511 +++++++++++++++++++++++++++++++++++
 libxfs/xfs_bmap_btree.c     | 2528 ++++++++++++++++++++
 libxfs/xfs_btree.c          |  889 +++++++
 libxfs/xfs_da_btree.c       | 2524 ++++++++++++++++++++
 libxfs/xfs_dir.c            |  622 +++++
 libxfs/xfs_dir2.c           |  594 +++++
 libxfs/xfs_dir2_block.c     | 1094 +++++++++
 libxfs/xfs_dir2_data.c      |  832 +++++++
 libxfs/xfs_dir2_leaf.c      | 1496 ++++++++++++
 libxfs/xfs_dir2_node.c      | 1988 +++++++++++++++
 libxfs/xfs_dir2_sf.c        | 1119 +++++++++
 libxfs/xfs_dir_leaf.c       | 1695 +++++++++++++
 libxfs/xfs_ialloc.c         | 1113 +++++++++
 libxfs/xfs_ialloc_btree.c   | 1552 ++++++++++++
 libxfs/xfs_inode.c          | 1371 +++++++++++
 libxfs/xfs_mount.c          |  214 ++
 libxfs/xfs_rtalloc.c        |  835 +++++++
 libxfs/xfs_rtbit.c          |   61 +
 libxfs/xfs_trans.c          |   79 +
 logprint/Makefile           |   50 +
 logprint/log_misc.c         | 1184 +++++++++
 logprint/log_print_all.c    |  593 +++++
 logprint/log_print_trans.c  |  146 ++
 logprint/logprint.c         |  247 ++
 logprint/logprint.h         |  162 ++
 man/Makefile                |   41 +
 man/man5/Makefile           |   49 +
 man/man5/xfs.5              |  114 +
 man/man8/Makefile           |   49 +
 man/man8/fsck.xfs.8         |   23 +
 man/man8/mkfs.xfs.8         |  485 ++++
 man/man8/xfs_admin.8        |   68 +
 man/man8/xfs_bmap.8         |   54 +
 man/man8/xfs_check.8        |  177 ++
 man/man8/xfs_db.8           | 1187 +++++++++
 man/man8/xfs_growfs.8       |  135 ++
 man/man8/xfs_logprint.8     |   86 +
 man/man8/xfs_mkfile.8       |   27 +
 man/man8/xfs_ncheck.8       |   53 +
 man/man8/xfs_repair.8       |  353 +++
 mkfile/Makefile             |   45 +
 mkfile/xfs_mkfile.c         |  284 +++
 mkfs/Makefile               |   59 +
 mkfs/maxtrres.c             |  193 ++
 mkfs/proto.c                |  769 ++++++
 mkfs/proto.h                |   35 +
 mkfs/xfs_mkfs.c             | 1944 +++++++++++++++
 mkfs/xfs_mkfs.h             |   50 +
 repair/Makefile             |   72 +
 repair/README               |  718 ++++++
 repair/agheader.c           |  432 ++++
 repair/agheader.h           |  113 +
 repair/attr_repair.c        | 1067 +++++++++
 repair/attr_repair.h        |   47 +
 repair/avl.c                | 1465 ++++++++++++
 repair/avl.h                |  143 ++
 repair/avl64.c              | 1458 +++++++++++
 repair/avl64.h              |  151 ++
 repair/bmap.c               |  409 ++++
 repair/bmap.h               |   87 +
 repair/dino_chunks.c        | 1178 +++++++++
 repair/dinode.c             | 2914 ++++++++++++++++++++++
 repair/dinode.h             |  155 ++
 repair/dir.c                | 3033 +++++++++++++++++++++++
 repair/dir.h                |  160 ++
 repair/dir2.c               | 2070 ++++++++++++++++
 repair/dir2.h               |  124 +
 repair/dir_stack.c          |  136 ++
 repair/dir_stack.h          |   47 +
 repair/err_protos.h         |   36 +
 repair/globals.c            |   37 +
 repair/globals.h            |  205 ++
 repair/incore.c             |  308 +++
 repair/incore.h             |  564 +++++
 repair/incore_bmc.c         |   57 +
 repair/incore_ext.c         | 1000 ++++++++
 repair/incore_ino.c         |  834 +++++++
 repair/init.c               |   69 +
 repair/io.c                 |   76 +
 repair/phase1.c             |  128 +
 repair/phase2.c             |  173 ++
 repair/phase3.c             |  215 ++
 repair/phase4.c             | 1337 +++++++++++
 repair/phase5.c             | 1633 +++++++++++++
 repair/phase6.c             | 3971 ++++++++++++++++++++++++++++++
 repair/phase7.c             |  186 ++
 repair/protos.h             |   59 +
 repair/rt.c                 |  297 +++
 repair/rt.h                 |   56 +
 repair/sb.c                 |  824 +++++++
 repair/scan.c               | 1279 ++++++++++
 repair/scan.h               |  116 +
 repair/versions.c           |  307 +++
 repair/versions.h           |   95 +
 repair/xfs_repair.c         |  582 +++++
 278 files changed, 108229 insertions(+)
 create mode 100644 bmap/Makefile
 create mode 100644 bmap/xfs_bmap.c
 create mode 100644 build/Makefile
 create mode 100644 build/rpm/Makefile
 create mode 100644 build/rpm/macros.template
 create mode 100644 build/rpm/rpm-2.rc.template
 create mode 100644 build/tar/Makefile
 create mode 100644 db/Makefile
 create mode 100644 db/addr.c
 create mode 100644 db/addr.h
 create mode 100644 db/agf.c
 create mode 100644 db/agf.h
 create mode 100644 db/agfl.c
 create mode 100644 db/agfl.h
 create mode 100644 db/agi.c
 create mode 100644 db/agi.h
 create mode 100644 db/attr.c
 create mode 100644 db/attr.h
 create mode 100644 db/attrshort.c
 create mode 100644 db/attrshort.h
 create mode 100644 db/bit.c
 create mode 100644 db/bit.h
 create mode 100644 db/block.c
 create mode 100644 db/block.h
 create mode 100644 db/bmap.c
 create mode 100644 db/bmap.h
 create mode 100644 db/bmapbt.c
 create mode 100644 db/bmapbt.h
 create mode 100644 db/bmroot.c
 create mode 100644 db/bmroot.h
 create mode 100644 db/bnobt.c
 create mode 100644 db/bnobt.h
 create mode 100644 db/check.c
 create mode 100644 db/check.h
 create mode 100644 db/cntbt.c
 create mode 100644 db/cntbt.h
 create mode 100644 db/command.c
 create mode 100644 db/command.h
 create mode 100644 db/convert.c
 create mode 100644 db/convert.h
 create mode 100644 db/data.c
 create mode 100644 db/data.h
 create mode 100644 db/dbread.c
 create mode 100644 db/dbread.h
 create mode 100644 db/debug.c
 create mode 100644 db/debug.h
 create mode 100644 db/dir.c
 create mode 100644 db/dir.h
 create mode 100644 db/dir2.c
 create mode 100644 db/dir2.h
 create mode 100644 db/dir2sf.c
 create mode 100644 db/dir2sf.h
 create mode 100644 db/dirshort.c
 create mode 100644 db/dirshort.h
 create mode 100644 db/dquot.c
 create mode 100644 db/dquot.h
 create mode 100644 db/echo.c
 create mode 100644 db/echo.h
 create mode 100644 db/faddr.c
 create mode 100644 db/faddr.h
 create mode 100644 db/field.c
 create mode 100644 db/field.h
 create mode 100644 db/flist.c
 create mode 100644 db/flist.h
 create mode 100644 db/fprint.c
 create mode 100644 db/fprint.h
 create mode 100644 db/frag.c
 create mode 100644 db/frag.h
 create mode 100644 db/freesp.c
 create mode 100644 db/freesp.h
 create mode 100644 db/hash.c
 create mode 100644 db/hash.h
 create mode 100644 db/help.c
 create mode 100644 db/help.h
 create mode 100644 db/init.c
 create mode 100644 db/init.h
 create mode 100644 db/inobt.c
 create mode 100644 db/inobt.h
 create mode 100644 db/inode.c
 create mode 100644 db/inode.h
 create mode 100644 db/input.c
 create mode 100644 db/input.h
 create mode 100644 db/io.c
 create mode 100644 db/io.h
 create mode 100644 db/main.c
 create mode 100644 db/malloc.c
 create mode 100644 db/malloc.h
 create mode 100644 db/mount.c
 create mode 100644 db/mount.h
 create mode 100644 db/output.c
 create mode 100644 db/output.h
 create mode 100644 db/print.c
 create mode 100644 db/print.h
 create mode 100644 db/quit.c
 create mode 100644 db/quit.h
 create mode 100644 db/sb.c
 create mode 100644 db/sb.h
 create mode 100644 db/sig.c
 create mode 100644 db/sig.h
 create mode 100644 db/strvec.c
 create mode 100644 db/strvec.h
 create mode 100644 db/type.c
 create mode 100644 db/type.h
 create mode 100644 db/uuid.c
 create mode 100644 db/uuid.h
 create mode 100644 db/write.c
 create mode 100644 db/write.h
 create mode 100755 db/xfs_admin.sh
 create mode 100755 db/xfs_check.sh
 create mode 100755 db/xfs_check64.sh
 create mode 100755 db/xfs_ncheck.sh
 create mode 100755 db/xfs_ncheck64.sh
 create mode 100644 doc/Makefile
 create mode 100644 doc/README.LVM
 create mode 100644 fsck/Makefile
 create mode 100644 fsck/xfs_fsck.c
 create mode 100644 growfs/Makefile
 create mode 100644 growfs/xfs_growfs.c
 create mode 100755 growfs/xfs_info.sh
 create mode 100644 include/Makefile
 create mode 100644 include/arch.h
 create mode 100644 include/builddefs.in
 create mode 100644 include/buildrules
 create mode 100644 include/handle.h
 create mode 100644 include/jdm.h
 create mode 100644 include/libxfs.h
 create mode 100644 include/platform_defs.h.in
 create mode 100644 include/xfs_ag.h
 create mode 100644 include/xfs_alloc.h
 create mode 100644 include/xfs_alloc_btree.h
 create mode 100644 include/xfs_arch.h
 create mode 100644 include/xfs_attr_leaf.h
 create mode 100644 include/xfs_attr_sf.h
 create mode 100644 include/xfs_bit.h
 create mode 100644 include/xfs_bmap.h
 create mode 100644 include/xfs_bmap_btree.h
 create mode 100644 include/xfs_btree.h
 create mode 100644 include/xfs_buf_item.h
 create mode 100644 include/xfs_cred.h
 create mode 100644 include/xfs_da_btree.h
 create mode 100644 include/xfs_dfrag.h
 create mode 100644 include/xfs_dinode.h
 create mode 100644 include/xfs_dir.h
 create mode 100644 include/xfs_dir2.h
 create mode 100644 include/xfs_dir2_block.h
 create mode 100644 include/xfs_dir2_data.h
 create mode 100644 include/xfs_dir2_leaf.h
 create mode 100644 include/xfs_dir2_node.h
 create mode 100644 include/xfs_dir2_sf.h
 create mode 100644 include/xfs_dir_leaf.h
 create mode 100644 include/xfs_dir_sf.h
 create mode 100644 include/xfs_dqblk.h
 create mode 100644 include/xfs_dquot_item.h
 create mode 100644 include/xfs_extfree_item.h
 create mode 100644 include/xfs_fs.h
 create mode 100644 include/xfs_ialloc.h
 create mode 100644 include/xfs_ialloc_btree.h
 create mode 100644 include/xfs_imap.h
 create mode 100644 include/xfs_inode.h
 create mode 100644 include/xfs_inode_item.h
 create mode 100644 include/xfs_inum.h
 create mode 100644 include/xfs_log.h
 create mode 100644 include/xfs_log_priv.h
 create mode 100644 include/xfs_log_recover.h
 create mode 100644 include/xfs_mount.h
 create mode 100644 include/xfs_quota.h
 create mode 100644 include/xfs_rtalloc.h
 create mode 100644 include/xfs_sb.h
 create mode 100644 include/xfs_trans.h
 create mode 100644 include/xfs_trans_space.h
 create mode 100644 include/xfs_types.h
 create mode 100644 include/xqm.h
 create mode 100644 libxfs/Makefile
 create mode 100644 libxfs/init.c
 create mode 100644 libxfs/logitem.c
 create mode 100644 libxfs/rdwr.c
 create mode 100644 libxfs/trans.c
 create mode 100644 libxfs/util.c
 create mode 100644 libxfs/xfs.h
 create mode 100644 libxfs/xfs_alloc.c
 create mode 100644 libxfs/xfs_alloc_btree.c
 create mode 100644 libxfs/xfs_attr_leaf.c
 create mode 100644 libxfs/xfs_bit.c
 create mode 100644 libxfs/xfs_bmap.c
 create mode 100644 libxfs/xfs_bmap_btree.c
 create mode 100644 libxfs/xfs_btree.c
 create mode 100644 libxfs/xfs_da_btree.c
 create mode 100644 libxfs/xfs_dir.c
 create mode 100644 libxfs/xfs_dir2.c
 create mode 100644 libxfs/xfs_dir2_block.c
 create mode 100644 libxfs/xfs_dir2_data.c
 create mode 100644 libxfs/xfs_dir2_leaf.c
 create mode 100644 libxfs/xfs_dir2_node.c
 create mode 100644 libxfs/xfs_dir2_sf.c
 create mode 100644 libxfs/xfs_dir_leaf.c
 create mode 100644 libxfs/xfs_ialloc.c
 create mode 100644 libxfs/xfs_ialloc_btree.c
 create mode 100644 libxfs/xfs_inode.c
 create mode 100644 libxfs/xfs_mount.c
 create mode 100644 libxfs/xfs_rtalloc.c
 create mode 100644 libxfs/xfs_rtbit.c
 create mode 100644 libxfs/xfs_trans.c
 create mode 100644 logprint/Makefile
 create mode 100644 logprint/log_misc.c
 create mode 100644 logprint/log_print_all.c
 create mode 100644 logprint/log_print_trans.c
 create mode 100644 logprint/logprint.c
 create mode 100644 logprint/logprint.h
 create mode 100644 man/Makefile
 create mode 100644 man/man5/Makefile
 create mode 100644 man/man5/xfs.5
 create mode 100644 man/man8/Makefile
 create mode 100644 man/man8/fsck.xfs.8
 create mode 100644 man/man8/mkfs.xfs.8
 create mode 100644 man/man8/xfs_admin.8
 create mode 100644 man/man8/xfs_bmap.8
 create mode 100644 man/man8/xfs_check.8
 create mode 100644 man/man8/xfs_db.8
 create mode 100644 man/man8/xfs_growfs.8
 create mode 100644 man/man8/xfs_logprint.8
 create mode 100644 man/man8/xfs_mkfile.8
 create mode 100644 man/man8/xfs_ncheck.8
 create mode 100644 man/man8/xfs_repair.8
 create mode 100644 mkfile/Makefile
 create mode 100644 mkfile/xfs_mkfile.c
 create mode 100644 mkfs/Makefile
 create mode 100644 mkfs/maxtrres.c
 create mode 100644 mkfs/proto.c
 create mode 100644 mkfs/proto.h
 create mode 100644 mkfs/xfs_mkfs.c
 create mode 100644 mkfs/xfs_mkfs.h
 create mode 100644 repair/Makefile
 create mode 100644 repair/README
 create mode 100644 repair/agheader.c
 create mode 100644 repair/agheader.h
 create mode 100644 repair/attr_repair.c
 create mode 100644 repair/attr_repair.h
 create mode 100644 repair/avl.c
 create mode 100644 repair/avl.h
 create mode 100644 repair/avl64.c
 create mode 100644 repair/avl64.h
 create mode 100644 repair/bmap.c
 create mode 100644 repair/bmap.h
 create mode 100644 repair/dino_chunks.c
 create mode 100644 repair/dinode.c
 create mode 100644 repair/dinode.h
 create mode 100644 repair/dir.c
 create mode 100644 repair/dir.h
 create mode 100644 repair/dir2.c
 create mode 100644 repair/dir2.h
 create mode 100644 repair/dir_stack.c
 create mode 100644 repair/dir_stack.h
 create mode 100644 repair/err_protos.h
 create mode 100644 repair/globals.c
 create mode 100644 repair/globals.h
 create mode 100644 repair/incore.c
 create mode 100644 repair/incore.h
 create mode 100644 repair/incore_bmc.c
 create mode 100644 repair/incore_ext.c
 create mode 100644 repair/incore_ino.c
 create mode 100644 repair/init.c
 create mode 100644 repair/io.c
 create mode 100644 repair/phase1.c
 create mode 100644 repair/phase2.c
 create mode 100644 repair/phase3.c
 create mode 100644 repair/phase4.c
 create mode 100644 repair/phase5.c
 create mode 100644 repair/phase6.c
 create mode 100644 repair/phase7.c
 create mode 100644 repair/protos.h
 create mode 100644 repair/rt.c
 create mode 100644 repair/rt.h
 create mode 100644 repair/sb.c
 create mode 100644 repair/scan.c
 create mode 100644 repair/scan.h
 create mode 100644 repair/versions.c
 create mode 100644 repair/versions.h
 create mode 100644 repair/xfs_repair.c

diff --git a/bmap/Makefile b/bmap/Makefile
new file mode 100644
index 000000000..741ae5682
--- /dev/null
+++ b/bmap/Makefile
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_bmap
+CFILES = xfs_bmap.c
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+	$(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
diff --git a/bmap/xfs_bmap.c b/bmap/xfs_bmap.c
new file mode 100644
index 000000000..bd594a7bc
--- /dev/null
+++ b/bmap/xfs_bmap.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/* 
+ * Bmap display utility for xfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libxfs.h>
+
+int aflag = 0;	/* Attribute fork. */
+int lflag = 0;	/* list number of blocks with each extent */
+int nflag = 0;	/* number of extents specified */
+int vflag = 0;	/* Verbose output */
+int bmv_iflags = 0;	/* Input flags for XFS_IOC_GETBMAPX */
+
+int dofile(char *);
+__off64_t file_size(int fd, char * fname);
+int numlen(__off64_t);
+
+int
+main(int argc, char **argv)
+{
+	char	*fname;
+	int	i = 0;
+	int	option;
+
+	fname = basename(argv[0]);
+	while ((option = getopt(argc, argv, "adln:pvV")) != EOF) {
+		switch (option) {
+		case 'a':
+			bmv_iflags |= BMV_IF_ATTRFORK;
+			aflag = 1;
+			break;
+		case 'l':
+			lflag = 1;
+			break;
+		case 'n':
+			nflag = atoi(optarg);
+			break;
+		case 'd':
+		/* do not recall possibly offline DMAPI files */
+			bmv_iflags |= BMV_IF_NO_DMAPI_READ;
+			break;
+		case 'p':
+		/* report unwritten preallocated blocks */
+			bmv_iflags |= BMV_IF_PREALLOC;
+			break;
+		case 'v':
+			vflag++;
+			break;
+		case 'V':
+			printf("%s version %s\n", fname, VERSION);
+			break;
+		default:
+			fprintf(stderr, "Usage: %s [-adlpV] [-n nx] file...\n",
+					fname);
+			exit(1);
+		}
+	}
+	if (aflag) 
+		bmv_iflags &=  ~(BMV_IF_PREALLOC|BMV_IF_NO_DMAPI_READ);
+	while (optind < argc) {
+		fname = argv[optind];
+		i += dofile(fname);
+		optind++;
+	}
+	return(i ? 1 : 0);
+}
+
+__off64_t
+file_size(int	fd, char *fname)
+{
+	struct	stat64	st;
+	int		i;
+	int		errno_save;
+
+	errno_save = errno;	/* in case fstat64 fails */
+	i = fstat64(fd, &st);
+	if (i < 0) {
+		fprintf(stderr,"fstat64 failed for %s", fname);
+		perror("fstat64");
+		errno = errno_save;
+		return -1;
+	}
+	return st.st_size;
+}
+	
+
+int
+dofile(char *fname)
+{
+	int		fd;
+	struct fsxattr	fsx;
+	int		i;
+	struct getbmapx	*map;
+	char		mbuf[1024];
+	int		map_size;
+	int		loop = 0;
+	xfs_fsop_geom_t fsgeo;
+
+	fd = open(fname, O_RDONLY);
+	if (fd < 0) {
+		sprintf(mbuf, "open %s", fname);
+		perror(mbuf);
+		return 1;
+	}
+
+	if (vflag) {
+		if (ioctl(fd, XFS_IOC_FSGEOMETRY, &fsgeo) < 0) {
+			sprintf(mbuf, "Can't get XFS geom, %s", fname);
+			perror(mbuf);
+			close(fd);
+			return 1;
+		}
+		
+		if (vflag > 1)
+			printf(
+	"xfs_bmap: fsgeo.agblocks=%u, fsgeo.blocksize=%u, fsgeo.agcount=%u\n",
+					fsgeo.agblocks, fsgeo.blocksize,
+					fsgeo.agcount);
+
+		if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) {
+			sprintf(mbuf, "Can't read attrs %s", fname);
+			perror(mbuf);
+			close(fd);
+			return 1;
+		}
+
+		if (vflag > 1)
+			printf(
+    "xfs_bmap: fsx.dsx_xflags=%u, fsx.fsx_extsize=%u, fsx.fsx_nextents=%u\n",
+					fsx.fsx_xflags, fsx.fsx_extsize,
+					fsx.fsx_nextents);
+
+		if (fsx.fsx_xflags == XFS_XFLAG_REALTIME) {
+			/* 
+			 * ag info not applicable to rt, continue
+			 * without ag output.
+			 */
+			vflag = 0;  
+		}
+	}
+
+	map_size = nflag ? nflag+1 : 32;	/* initial guess - 256 for checkin KCM */
+	map = malloc(map_size*sizeof(*map));
+	if (map == NULL) {
+		fprintf(stderr, "malloc of %d bytes failed.\n",
+							map_size*sizeof(*map));
+		close(fd);
+		return 1;
+	}
+		
+
+/*	Try the ioctl(XFS_IOC_GETBMAPX) for the number of extents specified by
+ *	nflag, or the initial guess number of extents (256).
+ *
+ *	If there are more extents than we guessed, use ioctl 
+ *	(XFS_IOC_FSGETXATTR[A]) to get the extent count, realloc some more 
+ *	space based on this count, and try again.
+ *
+ *	If the initial FGETBMAPX attempt returns EINVAL, this may mean
+ *	that we tried the FGETBMAPX on a zero length file.  If we get
+ *	EINVAL, check the length with fstat() and return "no extents"
+ *	if the length == 0.
+ *
+ *	Why not do the ioctl(XFS_IOC_FSGETXATTR[A]) first?  Two reasons:
+ *	(1)	The extent count may be wrong for a file with delayed
+ *		allocation blocks.  The XFS_IOC_GETBMAPX forces the real
+ *		allocation and fixes up the extent count.
+ *	(2)	For XFS_IOC_GETBMAP[X] on a DMAPI file that has been moved 
+ *		offline by a DMAPI application (e.g., DMF) the 
+ *		XFS_IOC_FSGETXATTR only reflects the extents actually online.
+ *		Doing XFS_IOC_GETBMAPX call first forces that data blocks online
+ *		and then everything proceeds normally (see PV #545725).
+ *		
+ *		If you don't want this behavior on a DMAPI offline file,
+ *		try the "-d" option which sets the BMV_IF_NO_DMAPI_READ
+ *		iflag for XFS_IOC_GETBMAPX.
+ */
+
+	do {	/* loop a miximum of two times */
+
+		bzero(map, sizeof(*map));	/* zero header */
+
+		map->bmv_length = -1;
+		map->bmv_count = map_size;
+		map->bmv_iflags = bmv_iflags;
+
+		i = ioctl(fd, XFS_IOC_GETBMAPX, map);
+
+		if (vflag > 1)
+			printf(
+		"xfs_bmap: i=%d map.bmv_offset=%lld, map.bmv_block=%lld, "
+		"map.bmv_length=%lld, map.bmv_count=%d, map.bmv_entries=%d\n",
+					i, map->bmv_offset, map->bmv_block,
+					map->bmv_length, map->bmv_count,
+					map->bmv_entries);
+		if (i < 0) {
+			if (   errno == EINVAL
+			    && !aflag && file_size(fd, fname) == 0) {
+				break;
+			} else	{
+				sprintf(mbuf, "ioctl(XFS_IOC_GETBMAPX (iflags 0x%x) %s",
+							map->bmv_iflags, fname);
+				perror(mbuf);
+				close(fd);
+				free(map);
+				return 1;
+			}
+		}
+		if (nflag)
+			break;
+		if (map->bmv_entries < map->bmv_count-1)
+			break;
+		/* Get number of extents from ioctl XFS_IOC_FSGETXATTR[A]
+		 * syscall.
+		 */
+		i = ioctl(fd, aflag ? XFS_IOC_FSGETXATTRA : XFS_IOC_FSGETXATTR, &fsx);
+		if (i < 0) {
+			sprintf(mbuf, "ioctl(XFS_IOC_FSGETXATTR%s) %s",
+				aflag ? "A" : "", fname);
+			perror(mbuf);
+			close(fd);
+			free(map);
+			return 1;
+		}
+		if (fsx.fsx_nextents >= map_size-1) {
+			map_size = 2*(fsx.fsx_nextents+1);
+			map = realloc(map, map_size*sizeof(*map));
+			if (map == NULL) {
+				fprintf(stderr,"cannot realloc %d bytes.\n",
+						map_size*sizeof(*map));
+				close(fd);
+				return 1;
+			}
+		}
+	} while (++loop < 2);
+	if (!nflag) {
+		if (map->bmv_entries <= 0) {
+			printf("%s: no extents\n", fname);
+			close(fd);
+			free(map);
+			return 0;
+		}
+	}
+	close(fd);
+	printf("%s:\n", fname);
+	if (!vflag) {
+		for (i = 0; i < map->bmv_entries; i++) {
+			printf("\t%d: [%lld..%lld]: ", i,
+				map[i + 1].bmv_offset,
+				map[i + 1].bmv_offset + 
+				map[i + 1].bmv_length - 1LL);
+			if (map[i + 1].bmv_block == -1)
+				printf("hole");
+			else {
+				printf("%lld..%lld", map[i + 1].bmv_block,
+					map[i + 1].bmv_block +
+						map[i + 1].bmv_length - 1LL);
+
+			}
+			if (lflag)
+				printf(" %lld blocks\n", map[i+1].bmv_length);
+			else
+				printf("\n");
+		}
+	} else {
+		/*
+		 * Verbose mode displays: 
+		 *   extent: [startoffset..endoffset]: startblock..endblock \
+		 *   	ag# (agoffset..agendoffset) totalbbs
+		 */
+#define MINRANGE_WIDTH	16
+#define MINAG_WIDTH	2
+#define MINTOT_WIDTH	5
+#define	max(a,b)	(a > b ? a : b)
+		int	  agno;
+		__off64_t agoff, bbperag;
+		int 	  foff_w, boff_w, aoff_w, tot_w, agno_w;
+		char 	  rbuf[32], bbuf[32], abuf[32];
+
+		foff_w = boff_w = aoff_w = MINRANGE_WIDTH;
+		tot_w = MINTOT_WIDTH;
+		bbperag = (__off64_t)fsgeo.agblocks * 
+		          (__off64_t)fsgeo.blocksize / BBSIZE;
+
+		/* 
+		 * Go through the extents and figure out the width
+		 * needed for all columns.
+		 */
+		for (i = 0; i < map->bmv_entries; i++) {
+			sprintf(rbuf, "[%lld..%lld]:", 
+				map[i + 1].bmv_offset,
+				map[i + 1].bmv_offset +
+				map[i + 1].bmv_length - 1LL);
+			if (map[i + 1].bmv_block == -1) {
+				foff_w = max(foff_w, strlen(rbuf)); 
+				tot_w = max(tot_w, 
+					numlen(map[i+1].bmv_length));
+			} else {
+				sprintf(bbuf, "%lld..%lld", 
+					map[i + 1].bmv_block,
+					map[i + 1].bmv_block +
+						map[i + 1].bmv_length - 1LL);
+				agno = map[i + 1].bmv_block / bbperag;
+				agoff = map[i + 1].bmv_block - (agno * bbperag);
+				sprintf(abuf, "(%lld..%lld)", 
+					agoff, 
+					(agoff + map[i + 1].bmv_length - 1LL));
+				foff_w = max(foff_w, strlen(rbuf)); 
+				boff_w = max(boff_w, strlen(bbuf)); 
+				aoff_w = max(aoff_w, strlen(abuf)); 
+				tot_w = max(tot_w, 
+					numlen(map[i+1].bmv_length));
+			}
+		}
+		agno_w = max(MINAG_WIDTH, numlen(fsgeo.agcount));
+		printf("%4s: %-*s %-*s %*s %-*s %*s\n", 
+			"EXT", 
+			foff_w, "FILE-OFFSET", 
+			boff_w, "BLOCK-RANGE", 
+			agno_w, "AG", 
+			aoff_w, "AG-OFFSET", 
+			tot_w, "TOTAL");
+		for (i = 0; i < map->bmv_entries; i++) {
+			sprintf(rbuf, "[%lld..%lld]:", 
+				map[i + 1].bmv_offset,
+				map[i + 1].bmv_offset +
+				map[i + 1].bmv_length - 1LL);
+			if (map[i + 1].bmv_block == -1) {
+				printf("%4d: %-*s %-*s %*s %-*s %*lld\n", 
+					i, 
+					foff_w, rbuf, 
+					boff_w, "hole", 
+					agno_w, "",
+					aoff_w, "", 
+					tot_w, map[i+1].bmv_length);
+			} else {
+				sprintf(bbuf, "%lld..%lld", 
+					map[i + 1].bmv_block,
+					map[i + 1].bmv_block +
+						map[i + 1].bmv_length - 1LL);
+				agno = map[i + 1].bmv_block / bbperag;
+				agoff = map[i + 1].bmv_block - (agno * bbperag);
+				sprintf(abuf, "(%lld..%lld)", 
+					agoff, 
+					(agoff + map[i + 1].bmv_length - 1LL));
+				printf("%4d: %-*s %-*s %*d %-*s %*lld\n", 
+					i, 
+					foff_w, rbuf, 
+					boff_w, bbuf, 
+					agno_w, agno, 
+					aoff_w, abuf, 
+					tot_w, map[i+1].bmv_length);
+			}
+		}
+	}
+	free(map);
+	return 0;
+}
+
+int
+numlen( __off64_t val)
+{
+	__off64_t tmp;
+	int len;
+
+	for (len=0, tmp=val; tmp > 0; tmp=tmp/10) len++;
+	return(len == 0 ? 1 : len);
+}
diff --git a/build/Makefile b/build/Makefile
new file mode 100644
index 000000000..7e336de44
--- /dev/null
+++ b/build/Makefile
@@ -0,0 +1,78 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+MANIFEST=src-manifest
+SRCTAR=$(PACKAGE_NAME)-$(PACKAGE_VERSION).src.tar.gz
+SRCTAR2=$(PACKAGE_NAME)_$(PACKAGE_VERSION).orig.tar.gz
+
+LDIRT = $(MANIFEST) $(SRCTAR) $(SRCTAR2) bin-manifest $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION)
+
+# for clean and clobber
+SUBDIRS = tar rpm deb
+
+# nothing to build here (it's all packaging)
+default install :
+
+include $(BUILDRULES)
+
+# Symlink in the TOPDIR is used to pack files relative to
+# product-version directory.
+$(MANIFEST) : $(_FORCE)
+	@if [ ! -L $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION) ] ; then \
+	    $(LN_S) . $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION) ; \
+	fi
+	@CDIR=`pwd`; cd $(TOPDIR); \
+	$(MAKE) --no-print-directory source | \
+	    sed -e 's/^\./$(PACKAGE_NAME)-$(PACKAGE_VERSION)/' > $$CDIR/$@ ;\
+	if [ $$?  -ne 0 ] ; then  \
+	    exit 1; \
+	else \
+	    unset TAPE; \
+	    $(TAR) -T $$CDIR/$@ -cf - | $(ZIP) --best > $$CDIR/$(SRCTAR); \
+	    $(LN_S) $$CDIR/$(SRCTAR) $$CDIR/$(SRCTAR2); \
+	fi
+
+dist : default $(MANIFEST)
+	@DIST_MANIFEST=`pwd`/bin-manifest; DIST_ROOT=/tmp/$$$$; \
+	export DIST_MANIFEST DIST_ROOT; \
+	rm -f $$DIST_MANIFEST; \
+	echo === install === && $(MAKE) -C $(TOPDIR) install || exit $$?; \
+	if [ -x $(TAR) ]; then \
+	    ( echo "=== tar ===" && $(MAKEF) -C tar $@ || exit $$? ); \
+	fi; \
+	if [ -x $(RPM) ]; then \
+	    ( echo "=== rpm ===" && $(MAKEF) -C rpm $@ || exit $$? ); \
+	fi; \
+	test -z "$$KEEP_DIST_ROOT" || rm -rf $$DIST_ROOT; echo Done
diff --git a/build/rpm/Makefile b/build/rpm/Makefile
new file mode 100644
index 000000000..26253705d
--- /dev/null
+++ b/build/rpm/Makefile
@@ -0,0 +1,78 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ../..
+TREEROOT = $(shell cd ${TOPDIR}; pwd)
+include $(TOPDIR)/include/builddefs
+
+SPECF = xfs-cmds.spec
+
+LDIRT = $(PACKAGE_NAME)-$(PACKAGE_VERSION)-[0-9]*.*.rpm $(SPECF) \
+	rpmmacros rpm-$(RPM_VERSION).rc
+
+LSRCFILES = macros.template $(SPECF).in rpm-2.rc.template
+
+default install :
+
+include $(BUILDRULES)
+
+# generate a binary rpm file
+dist : default $(SPECF) rpm-$(RPM_VERSION).rc
+	$(RPM) -ba --rcfile ./rpm-$(RPM_VERSION).rc $(SPECF)
+
+# Because rpm prior to v.2.90 does not support macros and old style config
+# is not supported by rpm v.3, we have to resort to such ugly hacks
+ifneq ($RPM_VERSION,2)
+rpm-$(RPM_VERSION).rc : rpmmacros
+	sed -e '/^macrofiles:/s|~/.rpmmacros|./rpmmacros|' </usr/lib/rpm/rpmrc >$@
+
+rpmmacros : macros.template
+	@sed -e 's|%topdir%|$(TREEROOT)|g' < $< > $@
+else
+rpm-2.rc: rpm-2.rc.template
+	@sed -e 's|%topdir%|$(TOPDIR)|g' < $< > $@
+endif
+
+.PHONY: $(SPECF)
+${SPECF} : ${SPECF}.in
+	sed -e's|@package_name@|$(PACKAGE_NAME)|g' \
+	    -e's|@package_version@|$(PACKAGE_VERSION)|g' \
+	    -e's|@package_release@|$(PACKAGE_RELEASE)|g' \
+	    -e's|@package_distribution@|$(PACKAGE_DISTRIBUTION)|g' \
+	    -e's|@package_builder@|$(PACKAGE_BUILDER)|g' \
+	    -e's|@build_root@|$(DIST_ROOT)|g' \
+	    -e'/^BuildRoot: *$$/d' \
+	    -e's|@xfs_cmds_var_dir@|$(XFS_CMDS_VAR_DIR)|g' \
+	    -e's|@xfs_cmds_share_dir@|$(XFS_CMDS_SHARE_DIR)|g' \
+	    -e's|@xfs_cmds_log_dir@|$(XFS_CMDS_LOG_DIR)|g' \
+	    -e's|@xfs_cmds_tmp_dir@|$(XFS_CMDS_TMP_DIR)|g' \
+	    -e's|@make@|$(MAKE)|g' < $< > $@
diff --git a/build/rpm/macros.template b/build/rpm/macros.template
new file mode 100644
index 000000000..6ab46e1e1
--- /dev/null
+++ b/build/rpm/macros.template
@@ -0,0 +1,30 @@
+#
+# rpmrc.template
+#
+# Template to fudge rpm directory structure inside IRIX-like build
+# environment
+
+# Force 386 build on all platforms
+%_target i386-pc-linux
+%_target_cpu i386
+%_target_os linux
+
+# topdir == $(WORKAREA)
+%_topdir %topdir%
+
+# Following directories are specific to the topdir
+# This is where build is done. In our case it's the same as $WORKAREA
+%_builddir %topdir%
+
+# This is where foo.1.99.tar.gz is living in the real world.
+# Be careful not to run full rpm build as it will override the sources 
+%_sourcedir %topdir%/build
+
+# This is where binary RPM and source RPM would end up
+%_rpmdir    %topdir%/build/rpm
+%_srcrpmdir %topdir%/build/rpm
+%_specdir   %topdir%/build/rpm
+
+# Leave RPM files in the same directory - we're not building for 
+# multiple architectures
+%_rpmfilename %%{NAME}-%%{VERSION}-%%{RELEASE}.%%{ARCH}.rpm
diff --git a/build/rpm/rpm-2.rc.template b/build/rpm/rpm-2.rc.template
new file mode 100644
index 000000000..f3b3eba3a
--- /dev/null
+++ b/build/rpm/rpm-2.rc.template
@@ -0,0 +1,25 @@
+#
+# rpmrc.template
+#
+# Template to fudge rpm directory structure inside IRIX-like build
+# environment
+
+# topdir == $(WORKAREA)
+topdir: %topdir%
+
+# Following directories are specific to the topdir
+# This is where build is done. In out case it's the same as $WORKAREA
+# Be careful not to run full rpm build as it will override the sources 
+builddir: %topdir%
+
+# This is where foo.1.99.tar.gz is living in the real world.
+sourcedir: %topdir%/build
+
+# This is where binary RPM and source RPM would end up
+rpmdir:    %topdir%/build/rpm
+srcrpmdir:  %topdir%/build/rpm
+specdir:   %topdir%/build/rpm
+
+# Leave RPM files in the same directory - we're not building for 
+# multiple architectures
+rpmfilename: %{NAME}-%{VERSION}-%{RELEASE}.%{ARCH}.rpm
diff --git a/build/tar/Makefile b/build/tar/Makefile
new file mode 100644
index 000000000..e010d9736
--- /dev/null
+++ b/build/tar/Makefile
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/include/builddefs
+
+BINTAR=$(PACKAGE_NAME)-$(PACKAGE_VERSION).tar.gz
+LDIRT = $(BINTAR)
+
+default install :
+
+include $(BUILDRULES)
+
+dist : default
+	@HERE=`pwd`; cd $${DIST_ROOT:-/}; \
+	sort $$HERE/../bin-manifest | uniq | $(AWK) ' \
+		$$1 == "f" { printf (".%s\n", $$6); } \
+		$$1 == "d" { next; } \
+		$$1 == "l" { printf (".%s\n", $$3); }' \
+	| $(TAR) -T - -cf - | $(ZIP) --best > $$HERE/$(BINTAR)
+	@echo Wrote: `pwd`/$(BINTAR)
diff --git a/db/Makefile b/db/Makefile
new file mode 100644
index 000000000..74cacbae3
--- /dev/null
+++ b/db/Makefile
@@ -0,0 +1,58 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_db
+CMDDEPS = $(LIBXFS)
+
+HFILES = addr.h agf.h agfl.h agi.h attr.h attrshort.h bit.h block.h bmap.h \
+	bmapbt.h bmroot.h bnobt.h check.h cntbt.h command.h convert.h data.h \
+	dbread.h debug.h dir.h dir2.h dir2sf.h dirshort.h dquot.h echo.h \
+	faddr.h field.h flist.h fprint.h frag.h freesp.h hash.h help.h \
+	init.h inobt.h inode.h input.h io.h malloc.h mount.h output.h \
+	print.h quit.h sb.h uuid.h sig.h strvec.h type.h write.h
+CFILES = $(HFILES:.h=.c) main.c
+LSRCFILES = xfs_admin.sh xfs_check.sh xfs_ncheck.sh
+LLDLIBS	= $(LIBXFS) $(LIBUUID)
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+	$(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
+	$(INSTALL) -m 755 xfs_admin.sh $(XFS_CMDS_BIN_DIR)/xfs_admin
+	$(INSTALL) -m 755 xfs_check.sh $(XFS_CMDS_BIN_DIR)/xfs_check
+	$(INSTALL) -m 755 xfs_ncheck.sh $(XFS_CMDS_BIN_DIR)/xfs_ncheck
diff --git a/db/addr.c b/db/addr.c
new file mode 100644
index 000000000..cb69ecf61
--- /dev/null
+++ b/db/addr.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "addr.h"
+#include "command.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "flist.h"
+#include "inode.h"
+#include "output.h"
+
+static int addr_f(int argc, char **argv);
+static void addr_help(void);
+
+static const cmdinfo_t addr_cmd =
+	{ "addr", "a", addr_f, 0, 1, 1, "[field-expression]",
+	  "set current address", addr_help };
+
+static void
+addr_help(void)
+{
+	dbprintf(
+"\n"
+" 'addr' uses the given field to set the filesystem address and type\n"
+"\n"
+" Examples:\n"
+"\n"
+" sb\n"
+" a rootino - set the type to inode and set position to the root inode\n"
+" a u.bmx[0].startblock (for inode with blockmap)\n"
+"\n"
+);
+
+}
+
+static int
+addr_f(
+	int		argc,
+	char		**argv)
+{
+	adfnc_t		adf;
+	const ftattr_t	*fa;
+	flist_t		*fl;
+	const field_t	*fld;
+	typnm_t		next;
+	flist_t		*tfl;
+
+	if (argc == 1) {
+		print_iocur("current", iocur_top);
+		return 0;
+	}
+	if (cur_typ == NULL) {
+		dbprintf("no current type\n");
+		return 0;
+	}
+	fld = cur_typ->fields;
+	if (fld != NULL && fld->name[0] == '\0') {
+		fa = &ftattrtab[fld->ftyp];
+		ASSERT(fa->ftyp == fld->ftyp);
+		fld = fa->subfld;
+	}
+	if (fld == NULL) {
+		dbprintf("no fields for type %s\n", cur_typ->name);
+		return 0;
+	}
+	fl = flist_scan(argv[1]);
+	if (fl == NULL)
+		return 0;
+	if (!flist_parse(fld, fl, iocur_top->data, 0)) {
+		flist_free(fl);
+		return 0;
+	}
+	flist_print(fl);
+	for (tfl = fl; tfl->child != NULL; tfl = tfl->child) {
+		if ((tfl->flags & FL_OKLOW) && tfl->low < tfl->high) {
+			dbprintf("array not allowed for addr command\n");
+			flist_free(fl);
+			return 0;
+		}
+	}
+	fld = tfl->fld;
+	next = fld->next;
+	if (next == TYP_INODATA)
+		next = inode_next_type();
+	if (next == TYP_NONE) {
+		dbprintf("no next type for field %s\n", fld->name);
+		return 0;
+	}
+	fa = &ftattrtab[fld->ftyp];
+	ASSERT(fa->ftyp == fld->ftyp);
+	adf = fa->adfunc;
+	if (adf == NULL) {
+		dbprintf("no addr function for field %s (type %s)\n",
+			fld->name, fa->name);
+		return 0;
+	}
+	(*adf)(iocur_top->data, tfl->offset, next);
+	flist_free(fl);
+	return 0;
+}
+
+void
+addr_init(void)
+{
+	add_command(&addr_cmd);
+}
diff --git a/db/addr.h b/db/addr.h
new file mode 100644
index 000000000..4b61e4d4e
--- /dev/null
+++ b/db/addr.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	addr_init(void);
diff --git a/db/agf.c b/db/agf.c
new file mode 100644
index 000000000..a9a5c4425
--- /dev/null
+++ b/db/agf.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "agf.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int agf_f(int argc, char **argv);
+static void agf_help(void);
+
+static const cmdinfo_t agf_cmd =
+	{ "agf", NULL, agf_f, 0, 1, 1, "[agno]",
+	  "set address to agf header", agf_help };
+
+const field_t	agf_hfld[] = {
+	{ "", FLDT_AGF, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_agf_t, agf_ ## f))
+#define	SZ(f)	bitszof(xfs_agf_t, agf_ ## f)
+const field_t	agf_flds[] = {
+	{ "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE },
+	{ "versionnum", FLDT_UINT32D, OI(OFF(versionnum)), C1, 0, TYP_NONE },
+	{ "seqno", FLDT_AGNUMBER, OI(OFF(seqno)), C1, 0, TYP_NONE },
+	{ "length", FLDT_AGBLOCK, OI(OFF(length)), C1, 0, TYP_NONE },
+	{ "roots", FLDT_AGBLOCK, OI(OFF(roots)), CI(XFS_BTNUM_AGF),
+	  FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
+	{ "bnoroot", FLDT_AGBLOCK,
+	  OI(OFF(roots) + XFS_BTNUM_BNO * SZ(roots[XFS_BTNUM_BNO])), C1, 0,
+	  TYP_BNOBT },
+	{ "cntroot", FLDT_AGBLOCK,
+	  OI(OFF(roots) + XFS_BTNUM_CNT * SZ(roots[XFS_BTNUM_CNT])), C1, 0,
+	  TYP_CNTBT },
+	{ "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF),
+	  FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
+	{ "bnolevel", FLDT_UINT32D,
+	  OI(OFF(levels) + XFS_BTNUM_BNO * SZ(levels[XFS_BTNUM_BNO])), C1, 0,
+	  TYP_NONE },
+	{ "cntlevel", FLDT_UINT32D,
+	  OI(OFF(levels) + XFS_BTNUM_CNT * SZ(levels[XFS_BTNUM_CNT])), C1, 0,
+	  TYP_NONE },
+	{ "flfirst", FLDT_UINT32D, OI(OFF(flfirst)), C1, 0, TYP_NONE },
+	{ "fllast", FLDT_UINT32D, OI(OFF(fllast)), C1, 0, TYP_NONE },
+	{ "flcount", FLDT_UINT32D, OI(OFF(flcount)), C1, 0, TYP_NONE },
+	{ "freeblks", FLDT_EXTLEN, OI(OFF(freeblks)), C1, 0, TYP_NONE },
+	{ "longest", FLDT_EXTLEN, OI(OFF(longest)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+static void
+agf_help(void)
+{
+	dbprintf(
+"\n"
+" set allocation group free block list\n"
+"\n"
+" Example:\n"
+"\n"
+" agf 2 - move location to AGF in 2nd filesystem allocation group\n"
+"\n"
+" Located in the 2nd 512 byte block of each allocation group,\n"
+" the AGF contains the root of two different freespace btrees:\n"
+" The 'cnt' btree keeps track freespace indexed on section size.\n"
+" The 'bno' btree tracks sections of freespace indexed on block number.\n"
+);
+}
+
+static int
+agf_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	agno;
+	char		*p;
+
+	if (argc > 1) {
+		agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0);
+		if (*p != '\0' || agno >= mp->m_sb.sb_agcount) {
+			dbprintf("bad allocation group number %s\n", argv[1]);
+			return 0;
+		}
+		cur_agno = agno;
+	} else if (cur_agno == NULLAGNUMBER)
+		cur_agno = 0;
+	ASSERT(typtab[TYP_AGF].typnm == TYP_AGF);
+	set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, cur_agno, XFS_AGF_DADDR), 1,
+		DB_RING_ADD, NULL);
+	return 0;
+}
+
+void
+agf_init(void)
+{
+	add_command(&agf_cmd);
+}
+
+int
+agf_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_sectsize);
+}
diff --git a/db/agf.h b/db/agf.h
new file mode 100644
index 000000000..26ce84987
--- /dev/null
+++ b/db/agf.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	agf_flds[];
+extern const struct field	agf_hfld[];
+
+extern void	agf_init(void);
+extern int	agf_size(void *obj, int startoff, int idx);
diff --git a/db/agfl.c b/db/agfl.c
new file mode 100644
index 000000000..3d7f0a06d
--- /dev/null
+++ b/db/agfl.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "agfl.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int agfl_f(int argc, char **argv);
+static void agfl_help(void);
+
+static const cmdinfo_t agfl_cmd =
+	{ "agfl", NULL, agfl_f, 0, 1, 1, "[agno]", 
+	  "set address to agfl block", agfl_help };
+
+const field_t	agfl_hfld[] = {
+	{ "", FLDT_AGFL, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_agfl_t, agfl_ ## f))
+const field_t	agfl_flds[] = {
+	{ "bno", FLDT_AGBLOCKNZ, OI(OFF(bno)), CI(XFS_AGFL_SIZE), FLD_ARRAY,
+	  TYP_DATA },
+	{ NULL }
+};
+
+static void
+agfl_help(void)
+{
+	dbprintf(
+"\n"
+" set allocation group freelist\n"
+"\n"
+" Example:\n"
+"\n"
+" agfl 5"
+"\n"
+" Located in the 4th 512 byte block of each allocation group,\n"
+" the agfl freelist for internal btree space allocation is maintained\n"
+" for each allocation group.  This acts as a reserved pool of space\n" 
+" separate from the general filesystem freespace (not used for user data).\n"
+"\n"
+);
+
+}
+
+static int
+agfl_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	agno;
+	char		*p;
+
+	if (argc > 1) {
+		agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0);
+		if (*p != '\0' || agno >= mp->m_sb.sb_agcount) {
+			dbprintf("bad allocation group number %s\n", argv[1]);
+			return 0;
+		}
+		cur_agno = agno;
+	} else if (cur_agno == NULLAGNUMBER)
+		cur_agno = 0;
+	ASSERT(typtab[TYP_AGFL].typnm == TYP_AGFL);
+	set_cur(&typtab[TYP_AGFL], XFS_AG_DADDR(mp, cur_agno, XFS_AGFL_DADDR),
+		1, DB_RING_ADD, NULL);
+	return 0;
+}
+
+void
+agfl_init(void)
+{
+	add_command(&agfl_cmd);
+}
+
+/*ARGSUSED*/
+int
+agfl_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_sectsize);
+}
diff --git a/db/agfl.h b/db/agfl.h
new file mode 100644
index 000000000..37670c381
--- /dev/null
+++ b/db/agfl.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	agfl_flds[];
+extern const struct field	agfl_hfld[];
+
+extern void	agfl_init(void);
+extern int	agfl_size(void *obj, int startoff, int idx);
diff --git a/db/agi.c b/db/agi.c
new file mode 100644
index 000000000..f1c56516e
--- /dev/null
+++ b/db/agi.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "agi.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int agi_f(int argc, char **argv);
+static void agi_help(void);
+
+static const cmdinfo_t agi_cmd =
+	{ "agi", NULL, agi_f, 0, 1, 1, "[agno]",
+	  "set address to agi header", agi_help };
+
+const field_t	agi_hfld[] = {
+	{ "", FLDT_AGI, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_agi_t, agi_ ## f))
+const field_t	agi_flds[] = {
+	{ "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE },
+	{ "versionnum", FLDT_UINT32D, OI(OFF(versionnum)), C1, 0, TYP_NONE },
+	{ "seqno", FLDT_AGNUMBER, OI(OFF(seqno)), C1, 0, TYP_NONE },
+	{ "length", FLDT_AGBLOCK, OI(OFF(length)), C1, 0, TYP_NONE },
+	{ "count", FLDT_AGINO, OI(OFF(count)), C1, 0, TYP_NONE },
+	{ "root", FLDT_AGBLOCK, OI(OFF(root)), C1, 0, TYP_INOBT },
+	{ "level", FLDT_UINT32D, OI(OFF(level)), C1, 0, TYP_NONE },
+	{ "freecount", FLDT_AGINO, OI(OFF(freecount)), C1, 0, TYP_NONE },
+	{ "newino", FLDT_AGINO, OI(OFF(newino)), C1, 0, TYP_INODE },
+	{ "dirino", FLDT_AGINO, OI(OFF(dirino)), C1, 0, TYP_INODE },
+	{ "unlinked", FLDT_AGINONN, OI(OFF(unlinked)),
+	  CI(XFS_AGI_UNLINKED_BUCKETS), FLD_ARRAY, TYP_NONE },
+	{ NULL }
+};
+
+static void
+agi_help(void)
+{
+	dbprintf(
+"\n"
+" set allocation group inode btree\n"
+"\n"
+" Example:\n"
+"\n"
+" agi 3 (set location to 3rd allocation group inode btree and type to 'agi')\n"
+"\n"
+" Located in the 3rd 512 byte block of each allocation group,\n"
+" the agi inode btree tracks all used/free inodes in the allocation group.\n"
+" Inodes are allocated in 16k 'chunks', each btree entry tracks a 'chunk'.\n"
+"\n"
+);
+}
+
+static int
+agi_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	agno;
+	char		*p;
+
+	if (argc > 1) {
+		agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0);
+		if (*p != '\0' || agno >= mp->m_sb.sb_agcount) {
+			dbprintf("bad allocation group number %s\n", argv[1]);
+			return 0;
+		}
+		cur_agno = agno;
+	} else if (cur_agno == NULLAGNUMBER)
+		cur_agno = 0;
+	ASSERT(typtab[TYP_AGI].typnm == TYP_AGI);
+	set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, cur_agno, XFS_AGI_DADDR), 1,
+		DB_RING_ADD, NULL);
+	return 0;
+}
+
+void
+agi_init(void)
+{
+	add_command(&agi_cmd);
+}
+
+/*ARGSUSED*/
+int
+agi_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_sectsize);
+}
diff --git a/db/agi.h b/db/agi.h
new file mode 100644
index 000000000..6d3881d24
--- /dev/null
+++ b/db/agi.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	agi_flds[];
+extern const struct field	agi_hfld[];
+
+extern void	agi_init(void);
+extern int	agi_size(void *obj, int startoff, int idx);
diff --git a/db/attr.c b/db/attr.c
new file mode 100644
index 000000000..5828d0e2b
--- /dev/null
+++ b/db/attr.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bit.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "attr.h"
+#include "io.h"
+#include "data.h"
+#include "mount.h"
+
+static int	attr_leaf_entries_count(void *obj, int startoff);
+static int	attr_leaf_hdr_count(void *obj, int startoff);
+static int	attr_leaf_name_local_count(void *obj, int startoff);
+static int	attr_leaf_name_local_name_count(void *obj, int startoff);
+static int	attr_leaf_name_local_value_count(void *obj, int startoff);
+static int	attr_leaf_name_local_value_offset(void *obj, int startoff,
+						  int idx);
+static int	attr_leaf_name_remote_count(void *obj, int startoff);
+static int	attr_leaf_name_remote_name_count(void *obj, int startoff);
+static int	attr_leaf_nvlist_count(void *obj, int startoff);
+static int	attr_leaf_nvlist_offset(void *obj, int startoff, int idx);
+static int	attr_node_btree_count(void *obj, int startoff);
+static int	attr_node_hdr_count(void *obj, int startoff);
+
+const field_t	attr_hfld[] = {
+	{ "", FLDT_ATTR, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	LOFF(f)	bitize(offsetof(xfs_attr_leafblock_t, f))
+#define	NOFF(f)	bitize(offsetof(xfs_da_intnode_t, f))
+const field_t	attr_flds[] = {
+	{ "hdr", FLDT_ATTR_LEAF_HDR, OI(LOFF(hdr)), attr_leaf_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "hdr", FLDT_ATTR_NODE_HDR, OI(NOFF(hdr)), attr_node_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "entries", FLDT_ATTR_LEAF_ENTRY, OI(LOFF(entries)),
+	  attr_leaf_entries_count, FLD_ARRAY|FLD_COUNT, TYP_NONE },
+	{ "btree", FLDT_ATTR_NODE_ENTRY, OI(NOFF(btree)), attr_node_btree_count,
+	  FLD_ARRAY|FLD_COUNT, TYP_NONE },
+	{ "nvlist", FLDT_ATTR_LEAF_NAME, attr_leaf_nvlist_offset,
+	  attr_leaf_nvlist_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ NULL }
+};
+
+#define	BOFF(f)	bitize(offsetof(xfs_da_blkinfo_t, f))
+const field_t	attr_blkinfo_flds[] = {
+	{ "forw", FLDT_ATTRBLOCK, OI(BOFF(forw)), C1, 0, TYP_ATTR },
+	{ "back", FLDT_ATTRBLOCK, OI(BOFF(back)), C1, 0, TYP_ATTR },
+	{ "magic", FLDT_UINT16X, OI(BOFF(magic)), C1, 0, TYP_NONE },
+	{ "pad", FLDT_UINT16X, OI(BOFF(pad)), C1, FLD_SKIPALL, TYP_NONE },
+	{ NULL }
+};
+
+#define	LEOFF(f)	bitize(offsetof(xfs_attr_leaf_entry_t, f))
+const field_t	attr_leaf_entry_flds[] = {
+	{ "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE },
+	{ "nameidx", FLDT_UINT16D, OI(LEOFF(nameidx)), C1, 0, TYP_NONE },
+	{ "flags", FLDT_UINT8X, OI(LEOFF(flags)), C1, FLD_SKIPALL, TYP_NONE },
+	{ "incomplete", FLDT_UINT1,
+	  OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_INCOMPLETE_BIT - 1), C1,
+	  0, TYP_NONE },
+	{ "root", FLDT_UINT1,
+	  OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_ROOT_BIT - 1), C1, 0,
+	  TYP_NONE },
+	{ "local", FLDT_UINT1,
+	  OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_LOCAL_BIT - 1), C1, 0,
+	  TYP_NONE },
+	{ "pad2", FLDT_UINT8X, OI(LEOFF(pad2)), C1, FLD_SKIPALL, TYP_NONE },
+	{ NULL }
+};
+
+#define	LHOFF(f)	bitize(offsetof(xfs_attr_leaf_hdr_t, f))
+const field_t	attr_leaf_hdr_flds[] = {
+	{ "info", FLDT_ATTR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE },
+	{ "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE },
+	{ "usedbytes", FLDT_UINT16D, OI(LHOFF(usedbytes)), C1, 0, TYP_NONE },
+	{ "firstused", FLDT_UINT16D, OI(LHOFF(firstused)), C1, 0, TYP_NONE },
+	{ "holes", FLDT_UINT8D, OI(LHOFF(holes)), C1, 0, TYP_NONE },
+	{ "pad1", FLDT_UINT8X, OI(LHOFF(pad1)), C1, FLD_SKIPALL, TYP_NONE },
+	{ "freemap", FLDT_ATTR_LEAF_MAP, OI(LHOFF(freemap)),
+	  CI(XFS_ATTR_LEAF_MAPSIZE), FLD_ARRAY, TYP_NONE },
+	{ NULL }
+};
+
+#define	LMOFF(f)	bitize(offsetof(xfs_attr_leaf_map_t, f))
+const field_t	attr_leaf_map_flds[] = {
+	{ "base", FLDT_UINT16D, OI(LMOFF(base)), C1, 0, TYP_NONE },
+	{ "size", FLDT_UINT16D, OI(LMOFF(size)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	LNOFF(f)	bitize(offsetof(xfs_attr_leaf_name_local_t, f))
+#define	LVOFF(f)	bitize(offsetof(xfs_attr_leaf_name_remote_t, f))
+const field_t	attr_leaf_name_flds[] = {
+	{ "valuelen", FLDT_UINT16D, OI(LNOFF(valuelen)),
+	  attr_leaf_name_local_count, FLD_COUNT, TYP_NONE },
+	{ "namelen", FLDT_UINT8D, OI(LNOFF(namelen)),
+	  attr_leaf_name_local_count, FLD_COUNT, TYP_NONE },
+	{ "name", FLDT_CHARNS, OI(LNOFF(nameval)),
+	  attr_leaf_name_local_name_count, FLD_COUNT, TYP_NONE },
+	{ "value", FLDT_CHARNS, attr_leaf_name_local_value_offset,
+	  attr_leaf_name_local_value_count, FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "valueblk", FLDT_UINT32X, OI(LVOFF(valueblk)),
+	  attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE },
+	{ "valuelen", FLDT_UINT32D, OI(LVOFF(valuelen)),
+	  attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE },
+	{ "namelen", FLDT_UINT8D, OI(LVOFF(namelen)),
+	  attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE },
+	{ "name", FLDT_CHARNS, OI(LVOFF(name)),
+	  attr_leaf_name_remote_name_count, FLD_COUNT, TYP_NONE },
+	{ NULL }
+};
+
+#define	EOFF(f)	bitize(offsetof(xfs_da_node_entry_t, f))
+const field_t	attr_node_entry_flds[] = {
+	{ "hashval", FLDT_UINT32X, OI(EOFF(hashval)), C1, 0, TYP_NONE },
+	{ "before", FLDT_ATTRBLOCK, OI(EOFF(before)), C1, 0, TYP_ATTR },
+	{ NULL }
+};
+
+#define	HOFF(f)	bitize(offsetof(xfs_da_node_hdr_t, f))
+const field_t	attr_node_hdr_flds[] = {
+	{ "info", FLDT_ATTR_BLKINFO, OI(HOFF(info)), C1, 0, TYP_NONE },
+	{ "count", FLDT_UINT16D, OI(HOFF(count)), C1, 0, TYP_NONE },
+	{ "level", FLDT_UINT16D, OI(HOFF(level)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+/*ARGSUSED*/
+static int
+attr_leaf_entries_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_attr_leafblock_t	*block;
+	
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC) {
+		return 0;
+	}
+
+	return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+attr_leaf_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_attr_leafblock_t	*block;
+	
+	ASSERT(startoff == 0);
+	block = obj;
+	return INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC;
+}
+
+static int
+attr_leaf_name_local_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_attr_leafblock_t	*block;
+	xfs_attr_leaf_entry_t	*e;
+	int			i;
+	int			off;
+
+	ASSERT(bitoffs(startoff) == 0);
+	off = byteize(startoff);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC)
+		return 0;
+	for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+		e = &block->entries[i];
+		if (INT_GET(e->nameidx, ARCH_CONVERT) == off)
+			return (INT_GET(e->flags, ARCH_CONVERT)
+						& XFS_ATTR_LOCAL) != 0;
+	}
+	return 0;
+}
+
+static int
+attr_leaf_name_local_name_count(
+	void				*obj,
+	int				startoff)
+{
+	xfs_attr_leafblock_t		*block;
+	xfs_attr_leaf_entry_t		*e;
+	int				i;
+	xfs_attr_leaf_name_local_t	*l;
+	int				off;
+
+	ASSERT(bitoffs(startoff) == 0);
+	off = byteize(startoff);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC)
+		return 0;
+	for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+		e = &block->entries[i];
+		if (INT_GET(e->nameidx, ARCH_CONVERT) == off) {
+			if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) {
+				l = XFS_ATTR_LEAF_NAME_LOCAL(block, i);
+				return INT_GET(l->namelen, ARCH_CONVERT);
+			} else
+				return 0;
+		}
+	}
+	return 0;
+}
+
+static int
+attr_leaf_name_local_value_count(
+	void				*obj,
+	int				startoff)
+{
+	xfs_attr_leafblock_t		*block;
+	xfs_attr_leaf_entry_t		*e;
+	int				i;
+	xfs_attr_leaf_name_local_t	*l;
+	int				off;
+
+	ASSERT(bitoffs(startoff) == 0);
+	off = byteize(startoff);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC)
+		return 0;
+	for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+		e = &block->entries[i];
+		if (INT_GET(e->nameidx, ARCH_CONVERT) == off) {
+			if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) {
+				l = XFS_ATTR_LEAF_NAME_LOCAL(block, i);
+				return INT_GET(l->valuelen, ARCH_CONVERT);
+			} else
+				return 0;
+		}
+	}
+	return 0;
+}
+
+/*ARGSUSED*/
+static int
+attr_leaf_name_local_value_offset(
+	void				*obj,
+	int				startoff,
+	int				idx)
+{
+	xfs_attr_leafblock_t		*block;
+	xfs_attr_leaf_name_local_t	*l;
+	char				*vp;
+	int				off;
+	xfs_attr_leaf_entry_t		*e;
+	int				i;
+
+	ASSERT(bitoffs(startoff) == 0);
+	off = byteize(startoff);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC)
+		return 0;
+        
+	for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+		e = &block->entries[i];
+		if (INT_GET(e->nameidx, ARCH_CONVERT) == off)
+                    break;
+	}
+        if (i>=INT_GET(block->hdr.count, ARCH_CONVERT)) return 0;
+        
+	l = XFS_ATTR_LEAF_NAME_LOCAL(block, i);
+	vp = (char *)&l->nameval[l->namelen];
+	return (int)bitize(vp - (char *)l);
+}
+
+static int
+attr_leaf_name_remote_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_attr_leafblock_t	*block;
+	xfs_attr_leaf_entry_t	*e;
+	int			i;
+	int			off;
+
+	ASSERT(bitoffs(startoff) == 0);
+	off = byteize(startoff);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC)
+		return 0;
+	for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+		e = &block->entries[i];
+		if (INT_GET(e->nameidx, ARCH_CONVERT) == off)
+			return (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) == 0;
+	}
+	return 0;
+}
+
+static int
+attr_leaf_name_remote_name_count(
+	void				*obj,
+	int				startoff)
+{
+	xfs_attr_leafblock_t		*block;
+	xfs_attr_leaf_entry_t		*e;
+	int				i;
+	int				off;
+	xfs_attr_leaf_name_remote_t	*r;
+
+	ASSERT(bitoffs(startoff) == 0);
+	off = byteize(startoff);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC)
+		return 0;
+	for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+		e = &block->entries[i];
+		if (INT_GET(e->nameidx, ARCH_CONVERT) == off) {
+			if (!(INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL)) {
+				r = XFS_ATTR_LEAF_NAME_REMOTE(block, i);
+				return INT_GET(r->namelen, ARCH_CONVERT);
+			} else
+				return 0;
+		}
+	}
+	return 0;
+}
+
+/*ARGSUSED*/
+int
+attr_leaf_name_size(
+	void				*obj,
+	int				startoff,
+	int				idx)
+{
+	xfs_attr_leafblock_t		*block;
+	xfs_attr_leaf_entry_t		*e;
+	xfs_attr_leaf_name_local_t	*l;
+	xfs_attr_leaf_name_remote_t	*r;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC)
+		return 0;
+	e = &block->entries[idx];
+	if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) {
+		l = XFS_ATTR_LEAF_NAME_LOCAL(block, idx);
+		return (int)bitize(XFS_ATTR_LEAF_ENTSIZE_LOCAL(INT_GET(l->namelen, ARCH_CONVERT),
+								INT_GET(l->valuelen, ARCH_CONVERT)));
+	} else {
+		r = XFS_ATTR_LEAF_NAME_REMOTE(block, idx);
+		return (int)bitize(XFS_ATTR_LEAF_ENTSIZE_REMOTE(INT_GET(r->namelen, ARCH_CONVERT)));
+	}
+}
+
+/*ARGSUSED*/
+static int
+attr_leaf_nvlist_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_attr_leafblock_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC)
+		return 0;
+	return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+attr_leaf_nvlist_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_attr_leafblock_t	*block;
+	xfs_attr_leaf_entry_t	*e;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	e = &block->entries[idx];
+	return bitize(INT_GET(e->nameidx, ARCH_CONVERT));
+}
+
+/*ARGSUSED*/
+static int
+attr_node_btree_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_da_intnode_t	*block;
+
+	ASSERT(startoff == 0);		/* this is a base structure */
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_DA_NODE_MAGIC)
+		return 0;
+	return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+attr_node_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_da_intnode_t	*block;
+	
+	ASSERT(startoff == 0);
+	block = obj;
+	return INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+						== XFS_DA_NODE_MAGIC;
+}
+
+/*ARGSUSED*/
+int
+attr_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/attr.h b/db/attr.h
new file mode 100644
index 000000000..00ae7e98a
--- /dev/null
+++ b/db/attr.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t	attr_flds[];
+extern const field_t	attr_hfld[];
+extern const field_t	attr_blkinfo_flds[];
+extern const field_t	attr_leaf_entry_flds[];
+extern const field_t	attr_leaf_hdr_flds[];
+extern const field_t	attr_leaf_map_flds[];
+extern const field_t	attr_leaf_name_flds[];
+extern const field_t	attr_node_entry_flds[];
+extern const field_t	attr_node_hdr_flds[];
+
+extern int	attr_leaf_name_size(void *obj, int startoff, int idx);
+extern int	attr_size(void *obj, int startoff, int idx);
diff --git a/db/attrshort.c b/db/attrshort.c
new file mode 100644
index 000000000..04477fe15
--- /dev/null
+++ b/db/attrshort.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bit.h"
+#include "attrshort.h"
+
+static int	attr_sf_entry_name_count(void *obj, int startoff);
+static int	attr_sf_entry_value_count(void *obj, int startoff);
+static int	attr_sf_entry_value_offset(void *obj, int startoff, int idx);
+static int	attr_shortform_list_count(void *obj, int startoff);
+static int	attr_shortform_list_offset(void *obj, int startoff, int idx);
+
+#define	OFF(f)	bitize(offsetof(xfs_attr_shortform_t, f))
+const field_t	attr_shortform_flds[] = {
+	{ "hdr", FLDT_ATTR_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE },
+	{ "list", FLDT_ATTR_SF_ENTRY, attr_shortform_list_offset,
+	  attr_shortform_list_count, FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ NULL }
+};
+
+#define	HOFF(f)	bitize(offsetof(xfs_attr_sf_hdr_t, f))
+const field_t	attr_sf_hdr_flds[] = {
+	{ "totsize", FLDT_UINT16D, OI(HOFF(totsize)), C1, 0, TYP_NONE },
+	{ "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	EOFF(f)	bitize(offsetof(xfs_attr_sf_entry_t, f))
+const field_t	attr_sf_entry_flds[] = {
+	{ "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE },
+	{ "valuelen", FLDT_UINT8D, OI(EOFF(valuelen)), C1, 0, TYP_NONE },
+	{ "flags", FLDT_UINT8X, OI(EOFF(flags)), C1, FLD_SKIPALL, TYP_NONE },
+	{ "root", FLDT_UINT1,
+	  OI(EOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_ROOT_BIT - 1), C1, 0,
+	  TYP_NONE },
+	{ "name", FLDT_CHARNS, OI(EOFF(nameval)), attr_sf_entry_name_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "value", FLDT_CHARNS, attr_sf_entry_value_offset,
+	  attr_sf_entry_value_count, FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ NULL }
+};
+
+static int
+attr_sf_entry_name_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_attr_sf_entry_t	*e;
+	
+	ASSERT(bitoffs(startoff) == 0);
+	e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff));
+	return e->namelen;
+}
+
+int
+attr_sf_entry_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_attr_sf_entry_t	*e;
+	int			i;
+	xfs_attr_shortform_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff));
+	e = &sf->list[0];
+	for (i = 0; i < idx; i++)
+		e = XFS_ATTR_SF_NEXTENTRY(e);
+	return bitize((int)XFS_ATTR_SF_ENTSIZE(e));
+}
+
+static int
+attr_sf_entry_value_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_attr_sf_entry_t	*e;
+	
+	ASSERT(bitoffs(startoff) == 0);
+	e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff));
+	return e->valuelen;
+}
+
+/*ARGSUSED*/
+static int
+attr_sf_entry_value_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_attr_sf_entry_t	*e;
+	
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff));
+	return bitize((int)((char *)&e->nameval[e->namelen] - (char *)e));
+}
+
+static int
+attr_shortform_list_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_attr_shortform_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff));
+	return sf->hdr.count;
+}
+
+static int
+attr_shortform_list_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_attr_sf_entry_t	*e;
+	int			i;
+	xfs_attr_shortform_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff));
+	e = &sf->list[0];
+	for (i = 0; i < idx; i++)
+		e = XFS_ATTR_SF_NEXTENTRY(e);
+	return bitize((int)((char *)e - (char *)sf));
+}
+
+/*ARGSUSED*/
+int
+attrshort_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_attr_sf_entry_t	*e;
+	int			i;
+	xfs_attr_shortform_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff));
+	e = &sf->list[0];
+	for (i = 0; i < sf->hdr.count; i++)
+		e = XFS_ATTR_SF_NEXTENTRY(e);
+	return bitize((int)((char *)e - (char *)sf));
+}
diff --git a/db/attrshort.h b/db/attrshort.h
new file mode 100644
index 000000000..95c25b64f
--- /dev/null
+++ b/db/attrshort.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t	attr_sf_entry_flds[];
+extern const field_t	attr_sf_hdr_flds[];
+extern const field_t	attr_shortform_flds[];
+extern const field_t	attrshort_hfld[];
+
+extern int	attr_sf_entry_size(void *obj, int startoff, int idx);
+extern int	attrshort_size(void *obj, int startoff, int idx);
diff --git a/db/bit.c b/db/bit.c
new file mode 100644
index 000000000..de3337877
--- /dev/null
+++ b/db/bit.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bit.h"
+
+#undef setbit	/* defined in param.h on Linux */
+
+static int	getbit(char *ptr, int bit);
+static void	setbit(char *ptr, int bit, int val);
+
+static int
+getbit(
+	char	*ptr,
+	int	bit)
+{
+	int	mask;
+	int	shift;
+
+	ptr += byteize(bit);
+	bit = bitoffs(bit);
+	shift = 7 - bit;
+	mask = 1 << shift;
+	return (*ptr & mask) >> shift;
+}
+
+static void
+setbit(
+	char *ptr,
+	int  bit,
+	int  val)
+{
+	int	mask;
+	int	shift;
+
+	ptr += byteize(bit);
+	bit = bitoffs(bit);
+	shift = 7 - bit;
+	mask = (1 << shift);
+	if (val) {
+		*ptr |= mask;
+	} else {
+		mask = ~mask;
+		*ptr &= mask;
+	}
+}
+
+__int64_t
+getbitval(
+	void		*obj,
+	int		bitoff,
+	int		nbits,
+	int		flags)
+{
+	int		bit;
+	int		i;
+	char		*p;
+	__int64_t	rval;
+	int		signext;
+	int		z1, z2, z3, z4;
+        
+        ASSERT(nbits<=64);
+
+	p = (char *)obj + byteize(bitoff);
+	bit = bitoffs(bitoff);
+	signext = (flags & BVSIGNED) != 0;
+	z4 = ((__psint_t)p & 0xf) == 0 && bit == 0;
+	if (nbits == 64 && z4) {
+		if (signext)
+			return (__int64_t)INT_GET(*(__int64_t *)p, ARCH_CONVERT);
+		else
+			return (__int64_t)INT_GET(*(__uint64_t *)p, ARCH_CONVERT);
+	}
+	z3 = ((__psint_t)p & 0x7) == 0 && bit == 0;
+	if (nbits == 32 && z3) {
+		if (signext)
+			return (__int64_t)INT_GET(*(__int32_t *)p, ARCH_CONVERT);
+		else
+			return (__int64_t)INT_GET(*(__uint32_t *)p, ARCH_CONVERT);
+	}
+	z2 = ((__psint_t)p & 0x3) == 0 && bit == 0;
+	if (nbits == 16 && z2) {
+		if (signext)
+			return (__int64_t)INT_GET(*(__int16_t *)p, ARCH_CONVERT);
+		else
+			return (__int64_t)INT_GET(*(__uint16_t *)p, ARCH_CONVERT);
+	}
+	z1 = ((__psint_t)p & 0x1) == 0 && bit == 0;
+	if (nbits == 8 && z1) {
+		if (signext)
+			return (__int64_t)INT_GET(*(__int8_t *)p, ARCH_CONVERT);
+		else
+			return (__int64_t)INT_GET(*(__uint8_t *)p, ARCH_CONVERT);
+	}
+        
+        
+	for (i = 0, rval = 0LL; i < nbits; i++) {
+		if (getbit(p, bit + i)) {
+			/* If the last bit is on and we care about sign 
+                         * bits and we don't have a full 64 bit 
+                         * container, turn all bits on between the 
+                         * sign bit and the most sig bit. 
+                         */
+                    
+                        /* handle endian swap here */
+#if __BYTE_ORDER == LITTLE_ENDIAN
+			if (i == 0 && signext && nbits < 64)
+				rval = -1LL << nbits;
+			rval |= 1LL << (nbits - i - 1);
+#else
+			if ((i == (nbits - 1)) && signext && nbits < 64)
+				rval |= (-1LL << nbits); 
+			rval |= 1LL << i;
+#endif
+		}
+	}
+	return rval;
+}
+
+void
+setbitval(
+	void *obuf,      /* buffer to write into */
+	int bitoff,      /* bit offset of where to write */
+	int nbits,       /* number of bits to write */
+	void *ibuf)      /* source bits */
+{
+	char    *in           = (char *)ibuf;
+	char    *out          = (char *)obuf;
+        
+	int     bit;
+        
+#if BYTE_ORDER == LITTLE_ENDIAN
+        int     big           = 0;
+#else
+        int     big           = 1;
+#endif
+   
+        /* only need to swap LE integers */ 
+        if (big || (nbits!=2 && nbits!=4 && nbits!=8) ) {
+                /* We don't have type info, so we can only assume
+                 * that 2,4 & 8 byte values are integers. sigh.
+                 */
+            
+                /* byte aligned ? */
+                if (bitoff%NBBY) {
+                        /* no - bit copy */
+                        for (bit=0; bit<nbits; bit++)
+                                setbit(out, bit+bitoff, getbit(in, bit));
+                } else {
+                        /* yes - byte copy */
+                        memcpy(out+byteize(bitoff), in, byteize(nbits));
+                }
+                
+        } else {
+	        int     ibit;
+	        int     obit;
+            
+                /* we need to endian swap this value */
+        
+                out+=byteize(bitoff); 
+                obit=bitoffs(bitoff);
+
+                ibit=nbits-NBBY;
+            
+                for (bit=0; bit<nbits; bit++) {
+                        setbit(out, bit+obit, getbit(in, ibit));
+                        if (ibit%NBBY==NBBY-1) 
+                                ibit-=NBBY*2-1;
+                        else
+                                ibit++;
+                }
+        }
+}
diff --git a/db/bit.h b/db/bit.h
new file mode 100644
index 000000000..861755a0d
--- /dev/null
+++ b/db/bit.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#define	bitize(s)	((s) * NBBY)
+#define	bitsz(t)	bitize(sizeof(t))
+#define	bitszof(x,y)	bitize(szof(x,y))
+#define	byteize(s)	((s) / NBBY)
+#define	bitoffs(s)	((s) % NBBY)
+
+#define	BVUNSIGNED	0
+#define	BVSIGNED	1
+
+extern __int64_t	getbitval(void *obj, int bitoff, int nbits, int flags);
+extern void             setbitval(void *obuf, int bitoff, int nbits, void *ibuf);
diff --git a/db/block.c b/db/block.c
new file mode 100644
index 000000000..31962268a
--- /dev/null
+++ b/db/block.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "block.h"
+#include "bmap.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "io.h"
+#include "output.h"
+#include "mount.h"
+
+static int	ablock_f(int argc, char **argv);
+static void     ablock_help(void);
+static int	daddr_f(int argc, char **argv);
+static void     daddr_help(void);
+static int	dblock_f(int argc, char **argv);
+static void     dblock_help(void);
+static int	fsblock_f(int argc, char **argv);
+static void     fsblock_help(void);
+static void	print_rawdata(void *data, int len);
+
+static const cmdinfo_t	ablock_cmd =
+	{ "ablock", NULL, ablock_f, 1, 1, 1, "filoff",
+	  "set address to file offset (attr fork)", ablock_help };
+static const cmdinfo_t	daddr_cmd =
+	{ "daddr", NULL, daddr_f, 0, 1, 1, "[d]", 
+	  "set address to daddr value", daddr_help };
+static const cmdinfo_t	dblock_cmd =
+	{ "dblock", NULL, dblock_f, 1, 1, 1, "filoff",
+	  "set address to file offset (data fork)", dblock_help };
+static const cmdinfo_t	fsblock_cmd =
+	{ "fsblock", "fsb", fsblock_f, 0, 1, 1, "[fsb]",
+	  "set address to fsblock value", fsblock_help };
+
+static void 
+ablock_help(void)
+{
+	dbprintf(
+"\n Example:\n"
+"\n"
+" 'ablock 23' - sets the file position to the 23rd filesystem block in\n"
+" the inode's attribute fork.  The filesystem block size is specified in\n"
+" the superblock.\n\n"
+);
+}
+
+/*ARGSUSED*/
+static int
+ablock_f(
+	int		argc,
+	char		**argv)
+{
+	bmap_ext_t	bm;
+	xfs_dfiloff_t	bno;
+	xfs_dfsbno_t	dfsbno;
+	int		haveattr;
+	int		nex;
+	char		*p;
+
+	bno = (xfs_dfiloff_t)strtoull(argv[1], &p, 0);
+	if (*p != '\0') {
+		dbprintf("bad block number %s\n", argv[1]);
+		return 0;
+	}
+	push_cur();
+	set_cur_inode(iocur_top->ino);
+	haveattr = XFS_DFORK_Q((xfs_dinode_t *)iocur_top->data);
+	pop_cur();
+	if (!haveattr) {
+		dbprintf("no attribute data for file\n");
+		return 0;
+	}
+	nex = 1;
+	bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm);
+	if (nex == 0) {
+		dbprintf("file attr block is unmapped\n");
+		return 0;
+	}
+	dfsbno = bm.startblock + (bno - bm.startoff);
+	ASSERT(typtab[TYP_ATTR].typnm == TYP_ATTR);
+	set_cur(&typtab[TYP_ATTR], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno),
+		blkbb, DB_RING_ADD, NULL);
+	return 0;
+}
+
+void
+block_init(void)
+{
+	add_command(&ablock_cmd);
+	add_command(&daddr_cmd);
+	add_command(&dblock_cmd);
+	add_command(&fsblock_cmd);
+}
+
+static void 
+daddr_help(void)
+{
+	dbprintf(
+"\n Example:\n"
+"\n"
+" 'daddr 102' - sets position to the 102nd absolute disk block\n"
+" (512 byte block).\n"
+);
+}
+
+static int
+daddr_f(
+	int		argc,
+	char		**argv)
+{
+	__int64_t	d;
+	char		*p;
+
+	if (argc == 1) {
+		dbprintf("current daddr is %lld\n", iocur_top->off >> BBSHIFT);
+		return 0;
+	}
+	d = (__int64_t)strtoull(argv[1], &p, 0);
+	if (*p != '\0' ||
+	    d >= mp->m_sb.sb_dblocks << (mp->m_sb.sb_blocklog - BBSHIFT)) {
+		dbprintf("bad daddr %s\n", argv[1]);
+		return 0;
+	}
+	ASSERT(typtab[TYP_DATA].typnm == TYP_DATA);
+	set_cur(&typtab[TYP_DATA], d, 1, DB_RING_ADD, NULL);
+	return 0;
+}
+
+static void 
+dblock_help(void)
+{
+	dbprintf(
+"\n Example:\n"
+"\n"
+" 'dblock 23' - sets the file position to the 23rd filesystem block in\n"
+" the inode's data fork.  The filesystem block size is specified in the\n"
+" superblock.\n\n"
+);
+}
+
+static int
+dblock_f(
+	int		argc,
+	char		**argv)
+{
+	bbmap_t		bbmap;
+	bmap_ext_t	*bmp;
+	xfs_dfiloff_t	bno;
+	xfs_dfsbno_t	dfsbno;
+	int		nb;
+	int		nex;
+	char		*p;
+	typnm_t		type;
+
+	bno = (xfs_dfiloff_t)strtoull(argv[1], &p, 0);
+	if (*p != '\0') {
+		dbprintf("bad block number %s\n", argv[1]);
+		return 0;
+	}
+	push_cur();
+	set_cur_inode(iocur_top->ino);
+	type = inode_next_type();
+	pop_cur();
+	if (type == TYP_NONE) {
+		dbprintf("no type for file data\n");
+		return 0;
+	}
+	nex = nb = type == TYP_DIR2 ? mp->m_dirblkfsbs : 1;
+	bmp = malloc(nb * sizeof(*bmp));
+	bmap(bno, nb, XFS_DATA_FORK, &nex, bmp);
+	if (nex == 0) {
+		dbprintf("file data block is unmapped\n");
+		free(bmp);
+		return 0;
+	}
+	dfsbno = bmp->startblock + (bno - bmp->startoff);
+	ASSERT(typtab[type].typnm == type);
+	if (nex > 1)
+		make_bbmap(&bbmap, nex, bmp);
+	set_cur(&typtab[type], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno),
+		nb * blkbb, DB_RING_ADD, nex > 1 ? &bbmap : NULL);
+	free(bmp);
+	return 0;
+}
+
+static void 
+fsblock_help(void)
+{
+	dbprintf(
+"\n Example:\n"
+"\n"
+" 'fsblock 1023' - sets the file position to the 1023rd filesystem block.\n"
+" The filesystem block size is specified in the superblock and set during\n"
+" mkfs time.  Offset is absolute (not AG relative).\n\n"
+);
+}
+
+static int
+fsblock_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agblock_t	agbno;
+	xfs_agnumber_t	agno;
+	xfs_dfsbno_t	d;
+	char		*p;
+
+	if (argc == 1) {
+		dbprintf("current fsblock is %lld\n",
+			XFS_DADDR_TO_FSB(mp, iocur_top->off >> BBSHIFT));
+		return 0;
+	}
+	d = strtoull(argv[1], &p, 0);
+	if (*p != '\0') {
+		dbprintf("bad fsblock %s\n", argv[1]);
+		return 0;
+	}
+	agno = XFS_FSB_TO_AGNO(mp, d);
+	agbno = XFS_FSB_TO_AGBNO(mp, d);
+	if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks) {
+		dbprintf("bad fsblock %s\n", argv[1]);
+		return 0;
+	}
+	ASSERT(typtab[TYP_DATA].typnm == TYP_DATA);
+	set_cur(&typtab[TYP_DATA], XFS_AGB_TO_DADDR(mp, agno, agbno),
+		blkbb, DB_RING_ADD, NULL);
+	return 0;
+}
+
+void
+print_block(
+	const field_t	*fields,
+	int		argc,
+	char		**argv)
+{
+	print_rawdata(iocur_top->data, iocur_top->len);
+}
+
+static void
+print_rawdata(
+	void	*data,
+	int	len)
+{
+	int	i;
+	int	j;
+	int	lastaddr;
+	int	offchars;
+	unsigned char	*p;
+
+	lastaddr = (len - 1) & ~(32 - 1);
+	if (lastaddr < 0x10)
+		offchars = 1;
+	else if (lastaddr < 0x100)
+		offchars = 2;
+	else if (lastaddr < 0x1000)
+		offchars = 3;
+	else
+		offchars = 4;
+	for (i = 0, p = data; i < len; i += 32) {
+		dbprintf("%-0*.*x:", offchars, offchars, i);
+		for (j = 0; j < 32 && i + j < len; j++, p++) {
+			if ((j & 3) == 0)
+				dbprintf(" ");
+			dbprintf("%02x", *p);
+		}
+		dbprintf("\n");
+	}
+}
diff --git a/db/block.h b/db/block.h
new file mode 100644
index 000000000..cf17f38c2
--- /dev/null
+++ b/db/block.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern void	block_init(void);
+extern void	print_block(const struct field *fields, int argc, char **argv);
diff --git a/db/bmap.c b/db/bmap.c
new file mode 100644
index 000000000..69e2d3dd9
--- /dev/null
+++ b/db/bmap.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "bmap.h"
+#include "io.h"
+#include "inode.h"
+#include "output.h"
+#include "mount.h"
+
+static int		bmap_f(int argc, char **argv);
+static int		bmap_one_extent(xfs_bmbt_rec_64_t *ep,
+					xfs_dfiloff_t *offp, xfs_dfiloff_t eoff,
+					int *idxp, bmap_ext_t *bep);
+static xfs_fsblock_t	select_child(xfs_dfiloff_t off, xfs_bmbt_key_t *kp,
+				     xfs_bmbt_ptr_t *pp, int nrecs);
+
+static const cmdinfo_t	bmap_cmd =
+	{ "bmap", NULL, bmap_f, 0, 3, 0, "[-ad] [block [len]]",
+	  "show block map for current file", NULL };
+
+void
+bmap(
+	xfs_dfiloff_t		offset,
+	xfs_dfilblks_t		len,
+	int			whichfork,
+	int			*nexp,
+	bmap_ext_t		*bep)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_fsblock_t		bno;
+	xfs_dfiloff_t		curoffset;
+	xfs_dinode_t		*dip;
+	xfs_dfiloff_t		eoffset;
+	xfs_bmbt_rec_64_t	*ep;
+	xfs_dinode_fmt_t	fmt;
+	int			fsize;
+	xfs_bmbt_key_t		*kp;
+	int			n;
+	int			nex;
+	xfs_fsblock_t		nextbno;
+	int			nextents;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmdr_block_t	*rblock;
+	typnm_t			typ;
+	xfs_bmbt_rec_64_t	*xp;
+
+	push_cur();
+	set_cur_inode(iocur_top->ino);
+	nex = *nexp;
+	*nexp = 0;
+	ASSERT(nex > 0);
+	dip = iocur_top->data;
+	n = 0;
+	eoffset = offset + len - 1;
+	curoffset = offset;
+	fmt = (xfs_dinode_fmt_t)XFS_DFORK_FORMAT_ARCH(dip, whichfork, ARCH_CONVERT);
+	typ = whichfork == XFS_DATA_FORK ? TYP_BMAPBTD : TYP_BMAPBTA;
+	ASSERT(typtab[typ].typnm == typ);
+	ASSERT(fmt == XFS_DINODE_FMT_EXTENTS || fmt == XFS_DINODE_FMT_BTREE);
+	if (fmt == XFS_DINODE_FMT_EXTENTS) {
+		nextents = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
+		xp = (xfs_bmbt_rec_64_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+		for (ep = xp; ep < &xp[nextents] && n < nex; ep++) {
+			if (!bmap_one_extent(ep, &curoffset, eoffset, &n, bep))
+				break;
+		}
+	} else {
+		push_cur();
+		bno = NULLFSBLOCK;
+		rblock = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+		fsize = XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT);
+		pp = XFS_BTREE_PTR_ADDR(fsize, xfs_bmdr, rblock, 1,
+			XFS_BTREE_BLOCK_MAXRECS(fsize, xfs_bmdr, 0));
+		kp = XFS_BTREE_KEY_ADDR(fsize, xfs_bmdr, rblock, 1,
+			XFS_BTREE_BLOCK_MAXRECS(fsize, xfs_bmdr, 0));
+		bno = select_child(curoffset, kp, pp, INT_GET(rblock->bb_numrecs, ARCH_CONVERT));
+		for (;;) {
+			set_cur(&typtab[typ], XFS_FSB_TO_DADDR(mp, bno),
+				blkbb, DB_RING_IGN, NULL);
+			block = (xfs_bmbt_block_t *)iocur_top->data;
+			if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+				break;
+			pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+				block, 1,
+				XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize,
+					xfs_bmbt, 0));
+			kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+				block, 1,
+				XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize,
+					xfs_bmbt, 0));
+			bno = select_child(curoffset, kp, pp,
+				INT_GET(block->bb_numrecs, ARCH_CONVERT));
+		}
+		for (;;) {
+			nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+			nextents = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+			xp = (xfs_bmbt_rec_64_t *)XFS_BTREE_REC_ADDR(
+				mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+				XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize,
+					xfs_bmbt, 1));
+			for (ep = xp; ep < &xp[nextents] && n < nex; ep++) {
+				if (!bmap_one_extent(ep, &curoffset, eoffset,
+						&n, bep)) {
+					nextbno = NULLFSBLOCK;
+					break;
+				}
+			}
+			bno = nextbno;
+			if (bno == NULLFSBLOCK)
+				break;
+			set_cur(&typtab[typ], XFS_FSB_TO_DADDR(mp, bno),
+				blkbb, DB_RING_IGN, NULL);
+			block = (xfs_bmbt_block_t *)iocur_top->data;
+		}
+		pop_cur();
+	}
+	pop_cur();
+	*nexp = n;
+}
+
+static int
+bmap_f(
+	int		argc,
+	char		**argv)
+{
+	int		afork = 0;
+	bmap_ext_t	be;
+	int		c;
+	xfs_dfiloff_t	co;
+	int		dfork = 0;
+	xfs_dinode_t	*dip;
+	xfs_dfiloff_t	eo;
+	xfs_dfilblks_t	len;
+	int		nex;
+	char		*p;
+	int		whichfork;
+
+	if (iocur_top->ino == NULLFSINO) {
+		dbprintf("no current inode\n");
+		return 0;
+	}
+	optind = 0;
+	if (argc) while ((c = getopt(argc, argv, "ad")) != EOF) {
+		switch (c) {
+		case 'a':
+			afork = 1;
+			break;
+		case 'd':
+			dfork = 1;
+			break;
+		default:
+			dbprintf("bad option for bmap command\n");
+			return 0;
+		}
+	}
+	if (afork + dfork == 0) {
+		push_cur();
+		set_cur_inode(iocur_top->ino);
+		dip = iocur_top->data;
+		if (INT_GET(dip->di_core.di_nextents, ARCH_CONVERT))
+			dfork = 1;
+		if (INT_GET(dip->di_core.di_anextents, ARCH_CONVERT))
+			afork = 1;
+		pop_cur();
+	}
+	if (optind < argc) {
+		co = (xfs_dfiloff_t)strtoull(argv[optind], &p, 0);
+		if (*p != '\0') {
+			dbprintf("bad block number for bmap %s\n",
+				argv[optind]);
+			return 0;
+		}
+		optind++;
+		if (optind < argc) {
+			len = (xfs_dfilblks_t)strtoull(argv[optind], &p, 0);
+			if (*p != '\0') {
+				dbprintf("bad len for bmap %s\n", argv[optind]);
+				return 0;
+			}
+			eo = co + len - 1;
+		} else
+			eo = co;
+	} else {
+		co = 0;
+		eo = -1;
+	}
+	for (whichfork = XFS_DATA_FORK;
+	     whichfork <= XFS_ATTR_FORK;
+	     whichfork++) {
+		if (whichfork == XFS_DATA_FORK && !dfork)
+			continue;
+		if (whichfork == XFS_ATTR_FORK && !afork)
+			continue;
+		for (;;) {
+			nex = 1;
+			bmap(co, eo - co + 1, whichfork, &nex, &be);
+			if (nex == 0)
+				break;
+			dbprintf("%s offset %lld startblock %llu (%u/%u) count "
+				 "%llu flag %u\n",
+				whichfork == XFS_DATA_FORK ? "data" : "attr",
+				be.startoff, be.startblock,
+				XFS_FSB_TO_AGNO(mp, be.startblock),
+				XFS_FSB_TO_AGBNO(mp, be.startblock),
+				be.blockcount, be.flag);
+			co = be.startoff + be.blockcount;
+		}
+	}
+	return 0;
+}
+
+void
+bmap_init(void)
+{
+	add_command(&bmap_cmd);
+}
+
+static int
+bmap_one_extent(
+	xfs_bmbt_rec_64_t	*ep,
+	xfs_dfiloff_t		*offp,
+	xfs_dfiloff_t		eoff,
+	int			*idxp,
+	bmap_ext_t		*bep)
+{
+	xfs_dfilblks_t		c;
+	xfs_dfiloff_t		curoffset;
+	int			f;
+	int			idx;
+	xfs_dfiloff_t		o;
+	xfs_dfsbno_t		s;
+
+	convert_extent(ep, &o, &s, &c, &f);
+	curoffset = *offp;
+	idx = *idxp;
+	if (o + c <= curoffset)
+		return 1;
+	if (o > eoff)
+		return 0;
+	if (o < curoffset) {
+		c -= curoffset - o;
+		s += curoffset - o;
+		o = curoffset;
+	}
+	if (o + c - 1 > eoff)
+		c -= (o + c - 1) - eoff;
+	bep[idx].startoff = o;
+	bep[idx].startblock = s;
+	bep[idx].blockcount = c;
+	bep[idx].flag = f;
+	*idxp = idx + 1;
+	*offp = o + c;
+	return 1;
+}
+
+void
+convert_extent(
+	xfs_bmbt_rec_64_t		*rp,
+	xfs_dfiloff_t		*op,
+	xfs_dfsbno_t		*sp,
+	xfs_dfilblks_t		*cp,
+	int			*fp)
+{
+	xfs_bmbt_irec_t irec, *s = &irec;
+
+	libxfs_bmbt_get_all((xfs_bmbt_rec_t *)rp, s);
+
+	if (s->br_state == XFS_EXT_UNWRITTEN) {
+		*fp = 1;
+	} else {
+		*fp = 0;
+	}
+
+	*op = s->br_startoff;
+	*sp = s->br_startblock;
+	*cp = s->br_blockcount;
+}
+
+void
+make_bbmap(
+	bbmap_t		*bbmap,
+	int		nex,
+	bmap_ext_t	*bmp)
+{
+	int		d;
+	xfs_dfsbno_t	dfsbno;
+	int		i;
+	int		j;
+	int		k;
+
+	for (i = 0, d = 0; i < nex; i++) {
+		dfsbno = bmp[i].startblock;
+		for (j = 0; j < bmp[i].blockcount; j++, dfsbno++) {
+			for (k = 0; k < blkbb; k++)
+				bbmap->b[d++] =
+					XFS_FSB_TO_DADDR(mp, dfsbno) + k;
+		}
+	}
+}
+
+static xfs_fsblock_t
+select_child(
+	xfs_dfiloff_t	off,
+	xfs_bmbt_key_t	*kp,
+	xfs_bmbt_ptr_t	*pp,
+	int		nrecs)
+{
+	int		i;
+
+	for (i = 0; i < nrecs; i++) {
+		if (INT_GET(kp[i].br_startoff, ARCH_CONVERT) == off)
+			return INT_GET(pp[i], ARCH_CONVERT);
+		if (INT_GET(kp[i].br_startoff, ARCH_CONVERT) > off) {
+			if (i == 0)
+				return INT_GET(pp[i], ARCH_CONVERT);
+			else
+				return INT_GET(pp[i - 1], ARCH_CONVERT);
+		}
+	}
+	return INT_GET(pp[nrecs - 1], ARCH_CONVERT);
+}
diff --git a/db/bmap.h b/db/bmap.h
new file mode 100644
index 000000000..2420601e8
--- /dev/null
+++ b/db/bmap.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct	bbmap;
+struct	xfs_bmbt_rec_64;
+
+typedef struct bmap_ext {
+	xfs_dfiloff_t	startoff;
+	xfs_dfsbno_t	startblock;
+	xfs_dfilblks_t	blockcount;
+	int		flag;
+} bmap_ext_t;
+
+extern void	bmap(xfs_dfiloff_t offset, xfs_dfilblks_t len, int whichfork,
+		     int *nexp, bmap_ext_t *bep);
+extern void	bmap_init(void);
+extern void	convert_extent(struct xfs_bmbt_rec_64 *rp, xfs_dfiloff_t *op,
+			       xfs_dfsbno_t *sp, xfs_dfilblks_t *cp, int *fp);
+extern void	make_bbmap(struct bbmap *bbmap, int nex, bmap_ext_t *bmp);
diff --git a/db/bmapbt.c b/db/bmapbt.c
new file mode 100644
index 000000000..3ecfb374f
--- /dev/null
+++ b/db/bmapbt.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bmapbt.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int	bmapbta_key_count(void *obj, int startoff);
+static int	bmapbta_key_offset(void *obj, int startoff, int idx);
+static int	bmapbta_ptr_count(void *obj, int startoff);
+static int	bmapbta_ptr_offset(void *obj, int startoff, int idx);
+static int	bmapbta_rec_count(void *obj, int startoff);
+static int	bmapbta_rec_offset(void *obj, int startoff, int idx);
+static int	bmapbtd_key_count(void *obj, int startoff);
+static int	bmapbtd_key_offset(void *obj, int startoff, int idx);
+static int	bmapbtd_ptr_count(void *obj, int startoff);
+static int	bmapbtd_ptr_offset(void *obj, int startoff, int idx);
+static int	bmapbtd_rec_count(void *obj, int startoff);
+static int	bmapbtd_rec_offset(void *obj, int startoff, int idx);
+
+const field_t	bmapbta_hfld[] = {
+	{ "", FLDT_BMAPBTA, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+const field_t	bmapbtd_hfld[] = {
+	{ "", FLDT_BMAPBTD, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_bmbt_block_t, bb_ ## f))
+const field_t	bmapbta_flds[] = {
+	{ "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+	{ "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+	{ "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+	{ "leftsib", FLDT_DFSBNO, OI(OFF(leftsib)), C1, 0, TYP_BMAPBTA },
+	{ "rightsib", FLDT_DFSBNO, OI(OFF(rightsib)), C1, 0, TYP_BMAPBTA },
+	{ "recs", FLDT_BMAPBTAREC, bmapbta_rec_offset, bmapbta_rec_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "keys", FLDT_BMAPBTAKEY, bmapbta_key_offset, bmapbta_key_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "ptrs", FLDT_BMAPBTAPTR, bmapbta_ptr_offset, bmapbta_ptr_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTA },
+	{ NULL }
+};
+const field_t	bmapbtd_flds[] = {
+	{ "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+	{ "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+	{ "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+	{ "leftsib", FLDT_DFSBNO, OI(OFF(leftsib)), C1, 0, TYP_BMAPBTD },
+	{ "rightsib", FLDT_DFSBNO, OI(OFF(rightsib)), C1, 0, TYP_BMAPBTD },
+	{ "recs", FLDT_BMAPBTDREC, bmapbtd_rec_offset, bmapbtd_rec_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "keys", FLDT_BMAPBTDKEY, bmapbtd_key_offset, bmapbtd_key_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "ptrs", FLDT_BMAPBTDPTR, bmapbtd_ptr_offset, bmapbtd_ptr_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTD },
+	{ NULL }
+};
+
+#define	KOFF(f)	bitize(offsetof(xfs_bmbt_key_t, br_ ## f))
+const field_t	bmapbta_key_flds[] = {
+	{ "startoff", FLDT_DFILOFFA, OI(KOFF(startoff)), C1, 0, TYP_ATTR },
+	{ NULL }
+};
+const field_t	bmapbtd_key_flds[] = {
+	{ "startoff", FLDT_DFILOFFD, OI(KOFF(startoff)), C1, 0, TYP_INODATA },
+	{ NULL }
+};
+
+const field_t	bmapbta_rec_flds[] = {
+	{ "startoff", FLDT_CFILEOFFA, OI(BMBT_STARTOFF_BITOFF), C1, 0,
+	  TYP_ATTR },
+	{ "startblock", FLDT_CFSBLOCK, OI(BMBT_STARTBLOCK_BITOFF), C1, 0,
+	  TYP_ATTR },
+	{ "blockcount", FLDT_CEXTLEN, OI(BMBT_BLOCKCOUNT_BITOFF), C1, 0,
+	  TYP_NONE },
+	{ "extentflag", FLDT_CEXTFLG, OI(BMBT_EXNTFLAG_BITOFF), C1, 0,
+	  TYP_NONE },
+	{ NULL }
+};
+const field_t	bmapbtd_rec_flds[] = {
+	{ "startoff", FLDT_CFILEOFFD, OI(BMBT_STARTOFF_BITOFF), C1, 0,
+	  TYP_INODATA },
+	{ "startblock", FLDT_CFSBLOCK, OI(BMBT_STARTBLOCK_BITOFF), C1, 0,
+	  TYP_INODATA },
+	{ "blockcount", FLDT_CEXTLEN, OI(BMBT_BLOCKCOUNT_BITOFF), C1, 0,
+	  TYP_NONE },
+	{ "extentflag", FLDT_CEXTFLG, OI(BMBT_EXNTFLAG_BITOFF), C1, 0,
+	  TYP_NONE },
+	{ NULL }
+};
+
+static int
+bmapbta_key_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmbt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbta_key_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_bmbt_key_t		*kp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0));
+	return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bmapbta_ptr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmbt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbta_ptr_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_bmbt_ptr_t		*pp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0));
+	return bitize((int)((char *)pp - (char *)block));
+}
+
+static int
+bmapbta_rec_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmbt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbta_rec_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_bmbt_rec_t		*rp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+	rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 1));
+	return bitize((int)((char *)rp - (char *)block));
+}
+
+int
+bmapbta_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_blocksize);
+}
+
+static int
+bmapbtd_key_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmbt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbtd_key_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_bmbt_key_t		*kp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0));
+	return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bmapbtd_ptr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmbt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbtd_ptr_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_bmbt_ptr_t		*pp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0));
+	return bitize((int)((char *)pp - (char *)block));
+}
+
+static int
+bmapbtd_rec_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmbt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbtd_rec_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_bmbt_rec_t		*rp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+	rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 1));
+	return bitize((int)((char *)rp - (char *)block));
+}
+
+int
+bmapbtd_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/bmapbt.h b/db/bmapbt.h
new file mode 100644
index 000000000..8f39c98f3
--- /dev/null
+++ b/db/bmapbt.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	bmapbta_flds[];
+extern const struct field	bmapbta_hfld[];
+extern const struct field	bmapbta_key_flds[];
+extern const struct field	bmapbta_rec_flds[];
+extern const struct field	bmapbtd_flds[];
+extern const struct field	bmapbtd_hfld[];
+extern const struct field	bmapbtd_key_flds[];
+extern const struct field	bmapbtd_rec_flds[];
+
+extern int	bmapbta_size(void *obj, int startoff, int idx);
+extern int	bmapbtd_size(void *obj, int startoff, int idx);
diff --git a/db/bmroot.c b/db/bmroot.c
new file mode 100644
index 000000000..a96c6d29f
--- /dev/null
+++ b/db/bmroot.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bmroot.h"
+#include "io.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int	bmroota_key_count(void *obj, int startoff);
+static int	bmroota_key_offset(void *obj, int startoff, int idx);
+static int	bmroota_ptr_count(void *obj, int startoff);
+static int	bmroota_ptr_offset(void *obj, int startoff, int idx);
+static int	bmrootd_key_count(void *obj, int startoff);
+static int	bmrootd_key_offset(void *obj, int startoff, int idx);
+static int	bmrootd_ptr_count(void *obj, int startoff);
+static int	bmrootd_ptr_offset(void *obj, int startoff, int idx);
+
+#define	OFF(f)	bitize(offsetof(xfs_bmdr_block_t, bb_ ## f))
+const field_t	bmroota_flds[] = {
+	{ "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+	{ "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+	{ "keys", FLDT_BMROOTAKEY, bmroota_key_offset, bmroota_key_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "ptrs", FLDT_BMROOTAPTR, bmroota_ptr_offset, bmroota_ptr_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTA },
+	{ NULL }
+};
+const field_t	bmrootd_flds[] = {
+	{ "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+	{ "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+	{ "keys", FLDT_BMROOTDKEY, bmrootd_key_offset, bmrootd_key_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "ptrs", FLDT_BMROOTDPTR, bmrootd_ptr_offset, bmrootd_ptr_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTD },
+	{ NULL }
+};
+
+#define	KOFF(f)	bitize(offsetof(xfs_bmdr_key_t, br_ ## f))
+const field_t	bmroota_key_flds[] = {
+	{ "startoff", FLDT_DFILOFFA, OI(KOFF(startoff)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+const field_t	bmrootd_key_flds[] = {
+	{ "startoff", FLDT_DFILOFFD, OI(KOFF(startoff)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+static int
+bmroota_key_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmdr_block_t	*block;
+#ifdef DEBUG
+	xfs_dinode_t		*dip = obj;
+#endif
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmroota_key_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmdr_block_t	*block;
+	/* REFERENCED */
+	xfs_dinode_t		*dip;
+	xfs_bmdr_key_t		*kp;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	kp = XFS_BTREE_KEY_ADDR(iocur_top->len, xfs_bmdr, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_ASIZE(dip, mp), xfs_bmdr, 0));
+	return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bmroota_ptr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmdr_block_t	*block;
+#ifdef DEBUG
+	xfs_dinode_t		*dip = obj;
+#endif
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmroota_ptr_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmdr_block_t	*block;
+	xfs_dinode_t		*dip;
+	xfs_bmdr_ptr_t		*pp;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	pp = XFS_BTREE_PTR_ADDR(iocur_top->len, xfs_bmdr, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_ASIZE(dip, mp), xfs_bmdr, 0));
+	return bitize((int)((char *)pp - (char *)block));
+}
+
+int
+bmroota_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dinode_t		*dip;
+#ifdef DEBUG
+	xfs_bmdr_block_t	*block;
+#endif
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	ASSERT(idx == 0);
+	dip = obj;
+#ifdef DEBUG
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+#endif
+	return bitize((int)XFS_DFORK_ASIZE(dip, mp));
+}
+
+static int
+bmrootd_key_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmdr_block_t	*block;
+#ifdef DEBUG
+	xfs_dinode_t		*dip = obj;
+#endif
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT((char *)block == XFS_DFORK_DPTR(dip));
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmrootd_key_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmdr_block_t	*block;
+	xfs_bmdr_key_t		*kp;
+	xfs_dinode_t		*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	kp = XFS_BTREE_KEY_ADDR(iocur_top->len, xfs_bmdr, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_DSIZE(dip, mp), xfs_bmdr, 0));
+	return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bmrootd_ptr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_bmdr_block_t	*block;
+#ifdef DEBUG
+	xfs_dinode_t		*dip = obj;
+#endif
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT((char *)block == XFS_DFORK_DPTR(dip));
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmrootd_ptr_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_bmdr_block_t	*block;
+	xfs_bmdr_ptr_t		*pp;
+	xfs_dinode_t		*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	pp = XFS_BTREE_PTR_ADDR(iocur_top->len, xfs_bmdr, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_DSIZE(dip, mp), xfs_bmdr, 0));
+	return bitize((int)((char *)pp - (char *)block));
+}
+
+int
+bmrootd_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dinode_t		*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	ASSERT(idx == 0);
+	dip = obj;
+	return bitize((int)XFS_DFORK_DSIZE(dip, mp));
+}
diff --git a/db/bmroot.h b/db/bmroot.h
new file mode 100644
index 000000000..3f8ef0cc5
--- /dev/null
+++ b/db/bmroot.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	bmroota_flds[];
+extern const struct field	bmroota_key_flds[];
+extern const struct field	bmrootd_flds[];
+extern const struct field	bmrootd_key_flds[];
+
+extern int	bmroota_size(void *obj, int startoff, int idx);
+extern int	bmrootd_size(void *obj, int startoff, int idx);
diff --git a/db/bnobt.c b/db/bnobt.c
new file mode 100644
index 000000000..3b0960562
--- /dev/null
+++ b/db/bnobt.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bnobt.h"
+#include "io.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int	bnobt_key_count(void *obj, int startoff);
+static int	bnobt_key_offset(void *obj, int startoff, int idx);
+static int	bnobt_ptr_count(void *obj, int startoff);
+static int	bnobt_ptr_offset(void *obj, int startoff, int idx);
+static int	bnobt_rec_count(void *obj, int startoff);
+static int	bnobt_rec_offset(void *obj, int startoff, int idx);
+
+const field_t	bnobt_hfld[] = {
+	{ "", FLDT_BNOBT, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_alloc_block_t, bb_ ## f))
+const field_t	bnobt_flds[] = {
+	{ "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+	{ "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+	{ "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+	{ "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_BNOBT },
+	{ "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_BNOBT },
+	{ "recs", FLDT_BNOBTREC, bnobt_rec_offset, bnobt_rec_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "keys", FLDT_BNOBTKEY, bnobt_key_offset, bnobt_key_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "ptrs", FLDT_BNOBTPTR, bnobt_ptr_offset, bnobt_ptr_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BNOBT },
+	{ NULL }
+};
+
+#define	KOFF(f)	bitize(offsetof(xfs_alloc_key_t, ar_ ## f))
+const field_t	bnobt_key_flds[] = {
+	{ "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+	{ "blockcount", FLDT_EXTLEN, OI(KOFF(blockcount)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	ROFF(f)	bitize(offsetof(xfs_alloc_rec_t, ar_ ## f))
+const field_t	bnobt_rec_flds[] = {
+	{ "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
+	{ "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+static int
+bnobt_key_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_alloc_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bnobt_key_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_alloc_block_t	*block;
+	xfs_alloc_key_t		*kp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+	return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bnobt_ptr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_alloc_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bnobt_ptr_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_alloc_block_t	*block;
+	xfs_alloc_ptr_t		*pp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+	return bitize((int)((char *)pp - (char *)block));
+}
+
+static int
+bnobt_rec_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_alloc_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bnobt_rec_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_alloc_block_t	*block;
+	xfs_alloc_rec_t		*rp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+	rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1));
+	return bitize((int)((char *)rp - (char *)block));
+}
+
+int
+bnobt_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/bnobt.h b/db/bnobt.h
new file mode 100644
index 000000000..07e8b2637
--- /dev/null
+++ b/db/bnobt.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	bnobt_flds[];
+extern const struct field	bnobt_hfld[];
+extern const struct field	bnobt_key_flds[];
+extern const struct field	bnobt_rec_flds[];
+
+extern int	bnobt_size(void *obj, int startoff, int idx);
diff --git a/db/check.c b/db/check.c
new file mode 100644
index 000000000..b40442206
--- /dev/null
+++ b/db/check.c
@@ -0,0 +1,4468 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <math.h>
+#include <getopt.h>
+#include <sys/time.h>
+#include "bmap.h"
+#include "check.h"
+#include "command.h"
+#include "data.h"
+#include "io.h"
+#include "output.h"
+#include "type.h"
+#include "mount.h"
+#include "malloc.h"
+
+typedef enum {
+	DBM_UNKNOWN,	DBM_AGF,	DBM_AGFL,	DBM_AGI,
+	DBM_ATTR,	DBM_BTBMAPA,	DBM_BTBMAPD,	DBM_BTBNO,
+	DBM_BTCNT,	DBM_BTINO,	DBM_DATA,	DBM_DIR,
+	DBM_FREE1,	DBM_FREE2,	DBM_FREELIST,	DBM_INODE,
+	DBM_LOG,	DBM_MISSING,	DBM_QUOTA,	DBM_RTBITMAP,
+	DBM_RTDATA,	DBM_RTFREE,	DBM_RTSUM,	DBM_SB,
+	DBM_SYMLINK,
+	DBM_NDBM
+} dbm_t;
+
+typedef struct inodata {
+	struct inodata	*next;
+	nlink_t		link_set;
+	nlink_t		link_add;
+	char		isdir;
+	char		security;
+	char		ilist;
+	xfs_ino_t	ino;
+	struct inodata	*parent;
+	char		*name;
+} inodata_t;
+#define	MIN_INODATA_HASH_SIZE	256
+#define	MAX_INODATA_HASH_SIZE	65536
+#define	INODATA_AVG_HASH_LENGTH	8
+
+typedef struct qinfo {
+	xfs_qcnt_t	bc;
+	xfs_qcnt_t	ic;
+	xfs_qcnt_t	rc;
+} qinfo_t;
+
+#define	QDATA_HASH_SIZE	256
+typedef	struct qdata {
+	struct qdata	*next;
+	xfs_dqid_t	id;
+	qinfo_t		count;
+	qinfo_t		dq;
+} qdata_t;
+
+typedef struct blkent {
+	xfs_fileoff_t	startoff;
+	int		nblks;
+	xfs_fsblock_t	blks[1];
+} blkent_t;
+#define	BLKENT_SIZE(n)	\
+	(offsetof(blkent_t, blks) + (sizeof(xfs_fsblock_t) * (n)))
+
+typedef	struct blkmap {
+	int		naents;
+	int		nents;
+	blkent_t	*ents[1];
+} blkmap_t;
+#define	BLKMAP_SIZE(n)	\
+	(offsetof(blkmap_t, ents) + (sizeof(blkent_t *) * (n)))
+
+typedef struct freetab {
+	int			naents;
+	int			nents;
+	xfs_dir2_data_off_t	ents[1];
+} freetab_t;
+#define	FREETAB_SIZE(n)	\
+	(offsetof(freetab_t, ents) + (sizeof(xfs_dir2_data_off_t) * (n)))
+
+typedef struct dirhash {
+	struct dirhash		*next;
+	xfs_dir2_leaf_entry_t	entry;
+	int			seen;
+} dirhash_t;
+#define	DIR_HASH_SIZE	1024
+#define	DIR_HASH_FUNC(h,a)	(((h) ^ (a)) % DIR_HASH_SIZE)
+
+static xfs_extlen_t	agffreeblks;
+static xfs_extlen_t	agflongest;
+static xfs_agino_t	agicount;
+static xfs_agino_t	agifreecount;
+static xfs_fsblock_t	*blist;
+static int		blist_size;
+static char		**dbmap;	/* really dbm_t:8 */
+static dirhash_t	**dirhash;
+static int		error;
+static __uint64_t	fdblocks;
+static __uint64_t	frextents;
+static __uint64_t	icount;
+static __uint64_t	ifree;
+static inodata_t	***inodata;
+static int		inodata_hash_size;
+static inodata_t	***inomap;
+static int		nflag;
+static int		pflag;
+static qdata_t		**qpdata;
+static int		qpdo;
+static qdata_t		**qudata;
+static int		qudo;
+static unsigned		sbversion;
+static int		sbver_err;
+static int		serious_error;
+static int		sflag;
+static xfs_suminfo_t	*sumcompute;
+static xfs_suminfo_t	*sumfile;
+static const char	*typename[] = {
+	"unknown",
+	"agf",
+	"agfl",
+	"agi",
+	"attr",
+	"btbmapa",
+	"btbmapd",
+	"btbno",
+	"btcnt",
+	"btino",
+	"data",
+	"dir",
+	"free1",
+	"free2",
+	"freelist",
+	"inode",
+	"log",
+	"missing",
+	"quota",
+	"rtbitmap",
+	"rtdata",
+	"rtfree",
+	"rtsum",
+	"sb",
+	"symlink",
+	NULL
+};
+static int		verbose;
+
+#define	CHECK_BLIST(b)	(blist_size && check_blist(b))
+#define	CHECK_BLISTA(a,b)	\
+	(blist_size && check_blist(XFS_AGB_TO_FSB(mp, a, b)))
+
+typedef void	(*scan_lbtree_f_t)(xfs_btree_lblock_t	*block,
+				   int			level,
+				   dbm_t		type,
+				   xfs_fsblock_t	bno,
+				   inodata_t		*id,
+				   xfs_drfsbno_t	*totd,
+				   xfs_drfsbno_t	*toti,
+				   xfs_extnum_t		*nex,
+				   blkmap_t		**blkmapp,
+				   int			isroot,
+				   typnm_t		btype);
+
+typedef void	(*scan_sbtree_f_t)(xfs_btree_sblock_t	*block,
+				   int			level,
+				   xfs_agf_t		*agf,
+				   xfs_agblock_t	bno,
+				   int			isroot);
+
+static void		add_blist(xfs_fsblock_t	bno);
+static void		add_ilist(xfs_ino_t ino);
+static void		addlink_inode(inodata_t *id);
+static void		addname_inode(inodata_t *id, char *name, int namelen);
+static void		addparent_inode(inodata_t *id, xfs_ino_t parent);
+static void		blkent_append(blkent_t **entp, xfs_fsblock_t b,
+				      xfs_extlen_t c);
+static blkent_t		*blkent_new(xfs_fileoff_t o, xfs_fsblock_t b,
+				    xfs_extlen_t c);
+static void		blkent_prepend(blkent_t **entp, xfs_fsblock_t b,
+				       xfs_extlen_t c);
+static blkmap_t		*blkmap_alloc(xfs_extnum_t);
+static void		blkmap_free(blkmap_t *blkmap);
+static xfs_fsblock_t	blkmap_get(blkmap_t *blkmap, xfs_fileoff_t o);
+static int		blkmap_getn(blkmap_t *blkmap, xfs_fileoff_t o, int nb,
+				    bmap_ext_t **bmpp);
+static void		blkmap_grow(blkmap_t **blkmapp, blkent_t **entp,
+				    blkent_t *newent);
+static xfs_fileoff_t	blkmap_next_off(blkmap_t *blkmap, xfs_fileoff_t o,
+					int *t);
+static void		blkmap_set_blk(blkmap_t **blkmapp, xfs_fileoff_t o,
+				       xfs_fsblock_t b);
+static void		blkmap_set_ext(blkmap_t **blkmapp, xfs_fileoff_t o,
+				       xfs_fsblock_t b, xfs_extlen_t c);
+static void		blkmap_shrink(blkmap_t *blkmap, blkent_t **entp);
+static int		blockfree_f(int argc, char **argv);
+static int		blockget_f(int argc, char **argv);
+#ifdef DEBUG
+static int		blocktrash_f(int argc, char **argv);
+#endif
+static int		blockuse_f(int argc, char **argv);
+static int		check_blist(xfs_fsblock_t bno);
+static void		check_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+				    xfs_extlen_t len, dbm_t type);
+static int		check_inomap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+				     xfs_extlen_t len, xfs_ino_t c_ino);
+static void		check_linkcounts(xfs_agnumber_t agno);
+static int		check_range(xfs_agnumber_t agno, xfs_agblock_t agbno,
+				    xfs_extlen_t len);
+static void		check_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len,
+				     dbm_t type);
+static int		check_rinomap(xfs_drfsbno_t bno, xfs_extlen_t len,
+				      xfs_ino_t c_ino);
+static void		check_rootdir(void);
+static int		check_rrange(xfs_drfsbno_t bno, xfs_extlen_t len);
+static void		check_set_dbmap(xfs_agnumber_t agno,
+					xfs_agblock_t agbno, xfs_extlen_t len,
+					dbm_t type1, dbm_t type2,
+					xfs_agnumber_t c_agno,
+					xfs_agblock_t c_agbno);
+static void		check_set_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len,
+					 dbm_t type1, dbm_t type2);
+static void		check_summary(void);
+static void		checknot_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+				       xfs_extlen_t len, int typemask);
+static void		checknot_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len,
+					int typemask);
+static void		dir_hash_add(xfs_dahash_t hash,
+				     xfs_dir2_dataptr_t addr);
+static void		dir_hash_check(inodata_t *id, int v);
+static void		dir_hash_done(void);
+static void		dir_hash_init(void);
+static int		dir_hash_see(xfs_dahash_t hash,
+				     xfs_dir2_dataptr_t addr);
+static inodata_t	*find_inode(xfs_ino_t ino, int add);
+static void		free_inodata(xfs_agnumber_t agno);
+static int		init(int argc, char **argv);
+static char		*inode_name(xfs_ino_t ino, inodata_t **ipp);
+static int		ncheck_f(int argc, char **argv);
+static char		*prepend_path(char *oldpath, char *parent);
+static xfs_ino_t	process_block_dir_v2(blkmap_t *blkmap, int *dot,
+					     int *dotdot, inodata_t *id);
+static void		process_bmbt_reclist(xfs_bmbt_rec_32_t *rp, int numrecs,
+					     dbm_t type, inodata_t *id,
+					     xfs_drfsbno_t *tot,
+					     blkmap_t **blkmapp);
+static void		process_btinode(inodata_t *id, xfs_dinode_t *dip,
+					dbm_t type, xfs_drfsbno_t *totd,
+					xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+					blkmap_t **blkmapp, int whichfork);
+static xfs_ino_t	process_data_dir_v2(int *dot, int *dotdot,
+					    inodata_t *id, int v,
+					    xfs_dablk_t dabno,
+					    freetab_t **freetabp);
+static xfs_dir2_data_free_t
+			*process_data_dir_v2_freefind(xfs_dir2_data_t *data,
+					           xfs_dir2_data_unused_t *dup);
+static void		process_dir(xfs_dinode_t *dip, blkmap_t *blkmap,
+				    inodata_t *id);
+static int		process_dir_v1(xfs_dinode_t *dip, blkmap_t *blkmap,
+				       int *dot, int *dotdot, inodata_t *id,
+				       xfs_ino_t *parent);
+static int		process_dir_v2(xfs_dinode_t *dip, blkmap_t *blkmap,
+				       int *dot, int *dotdot, inodata_t *id,
+				       xfs_ino_t *parent);
+static void		process_exinode(inodata_t *id, xfs_dinode_t *dip,
+					dbm_t type, xfs_drfsbno_t *totd,
+					xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+					blkmap_t **blkmapp, int whichfork);
+static void		process_inode(xfs_agf_t *agf, xfs_agino_t agino,
+				      xfs_dinode_t *dip, int isfree);
+static void		process_lclinode(inodata_t *id, xfs_dinode_t *dip,
+					 dbm_t type, xfs_drfsbno_t *totd,
+					 xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+					 blkmap_t **blkmapp, int whichfork);
+static xfs_ino_t	process_leaf_dir_v1(blkmap_t *blkmap, int *dot,
+					    int *dotdot, inodata_t *id);
+static xfs_ino_t	process_leaf_dir_v1_int(int *dot, int *dotdot,
+						inodata_t *id);
+static xfs_ino_t	process_leaf_node_dir_v2(blkmap_t *blkmap, int *dot,
+						 int *dotdot, inodata_t *id,
+						 xfs_fsize_t dirsize);
+static void		process_leaf_node_dir_v2_free(inodata_t *id, int v,
+						      xfs_dablk_t dbno,
+						      freetab_t *freetab);
+static void		process_leaf_node_dir_v2_int(inodata_t *id, int v,
+						     xfs_dablk_t dbno,
+						     freetab_t *freetab);
+static xfs_ino_t	process_node_dir_v1(blkmap_t *blkmap, int *dot,
+					    int *dotdot, inodata_t *id);
+static void		process_quota(int isproj, inodata_t *id,
+				      blkmap_t *blkmap);
+static void		process_rtbitmap(blkmap_t *blkmap);
+static void		process_rtsummary(blkmap_t *blkmap);
+static xfs_ino_t	process_sf_dir_v2(xfs_dinode_t *dip, int *dot,
+					  int *dotdot, inodata_t *id);
+static xfs_ino_t	process_shortform_dir_v1(xfs_dinode_t *dip, int *dot,
+						 int *dotdot, inodata_t *id);
+static void		quota_add(xfs_dqid_t projid, xfs_dqid_t userid,
+				  int dq, xfs_qcnt_t bc, xfs_qcnt_t ic,
+				  xfs_qcnt_t rc);
+static void		quota_add1(qdata_t **qt, xfs_dqid_t id, int dq,
+				   xfs_qcnt_t bc, xfs_qcnt_t ic,
+				   xfs_qcnt_t rc);
+static void		quota_check(char *s, qdata_t **qt);
+static void		quota_init(void);
+static void		scan_ag(xfs_agnumber_t agno);
+static void		scan_freelist(xfs_agf_t *agf);
+static void		scan_lbtree(xfs_fsblock_t root, int nlevels,
+				    scan_lbtree_f_t func, dbm_t type,
+				    inodata_t *id, xfs_drfsbno_t *totd,
+				    xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+				    blkmap_t **blkmapp, int isroot,
+				    typnm_t btype);
+static void		scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root,
+				    int nlevels, int isroot,
+				    scan_sbtree_f_t func, typnm_t btype);
+static void		scanfunc_bmap(xfs_btree_lblock_t *ablock, int level,
+				      dbm_t type, xfs_fsblock_t bno,
+				      inodata_t *id, xfs_drfsbno_t *totd,
+				      xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+				      blkmap_t **blkmapp, int isroot,
+				      typnm_t btype);
+static void		scanfunc_bno(xfs_btree_sblock_t *ablock, int level,
+				     xfs_agf_t *agf, xfs_agblock_t bno,
+				     int isroot);
+static void		scanfunc_cnt(xfs_btree_sblock_t *ablock, int level,
+				     xfs_agf_t *agf, xfs_agblock_t bno,
+				     int isroot);
+static void		scanfunc_ino(xfs_btree_sblock_t *ablock, int level,
+				     xfs_agf_t *agf, xfs_agblock_t bno,
+				     int isroot);
+static void		set_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+				  xfs_extlen_t len, dbm_t type,
+				  xfs_agnumber_t c_agno, xfs_agblock_t c_agbno);
+static void		set_inomap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+				   xfs_extlen_t len, inodata_t *id);
+static void		set_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len,
+				   dbm_t type);
+static void		set_rinomap(xfs_drfsbno_t bno, xfs_extlen_t len,
+				    inodata_t *id);
+static void		setlink_inode(inodata_t *id, nlink_t nlink, int isdir,
+				       int security);
+
+static const cmdinfo_t	blockfree_cmd = 
+	{ "blockfree", NULL, blockfree_f, 0, 0, 0,
+	  NULL, "free block usage information", NULL };
+static const cmdinfo_t	blockget_cmd = 
+	{ "blockget", "check", blockget_f, 0, -1, 0,
+	  "[-s|-v] [-n] [-b bno]... [-i ino] ...",
+	  "get block usage and check consistency", NULL };
+#ifdef DEBUG
+static const cmdinfo_t	blocktrash_cmd = 
+	{ "blocktrash", NULL, blocktrash_f, 0, -1, 0,
+	  "[-n count] [-x minlen] [-y maxlen] [-s seed] [-0123] [-t type] ...",
+	  "trash randomly selected block(s)", NULL };
+#endif
+static const cmdinfo_t	blockuse_cmd = 
+	{ "blockuse", NULL, blockuse_f, 0, 3, 0,
+	  "[-n] [-c blockcount]",
+	  "print usage for current block(s)", NULL };
+static const cmdinfo_t	ncheck_cmd = 
+	{ "ncheck", NULL, ncheck_f, 0, -1, 0,
+	  "[-s] [-i ino] ...",
+	  "print inode-name pairs", NULL };
+
+
+static void
+add_blist(
+	xfs_fsblock_t	bno)
+{
+	blist_size++;
+	blist = xrealloc(blist, blist_size * sizeof(bno));
+	blist[blist_size - 1] = bno;
+}
+
+static void
+add_ilist(
+	xfs_ino_t	ino)
+{
+	inodata_t	*id;
+
+	id = find_inode(ino, 1);
+	if (id == NULL) {
+		dbprintf("-i %lld bad inode number\n", ino);
+		return;
+	}
+	id->ilist = 1;
+}
+
+static void
+addlink_inode(
+	inodata_t	*id)
+{
+	id->link_add++;
+	if (verbose || id->ilist)
+		dbprintf("inode %lld add link, now %u\n", id->ino,
+			id->link_add);
+}
+
+static void
+addname_inode(
+	inodata_t	*id,
+	char		*name,
+	int		namelen)
+{
+	if (!nflag || id->name)
+		return;
+	id->name = xmalloc(namelen + 1);
+	memcpy(id->name, name, namelen);
+	id->name[namelen] = '\0';
+}
+
+static void 
+addparent_inode(
+	inodata_t	*id,
+	xfs_ino_t	parent)
+{
+	inodata_t	*pid;
+
+	pid = find_inode(parent, 1);
+	id->parent = pid;
+	if (verbose || id->ilist || (pid && pid->ilist))
+		dbprintf("inode %lld parent %lld\n", id->ino, parent);
+}
+
+static void
+blkent_append(
+	blkent_t	**entp,
+	xfs_fsblock_t	b,
+	xfs_extlen_t	c)
+{
+	blkent_t	*ent;
+	int		i;
+
+	ent = *entp;
+	*entp = ent = xrealloc(ent, BLKENT_SIZE(c + ent->nblks));
+	for (i = 0; i < c; i++)
+		ent->blks[ent->nblks + i] = b + i;
+	ent->nblks += c;
+}
+
+static blkent_t *
+blkent_new(
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b,
+	xfs_extlen_t	c)
+{
+	blkent_t	*ent;
+	int		i;
+
+	ent = xmalloc(BLKENT_SIZE(c));
+	ent->nblks = c;
+	ent->startoff = o;
+	for (i = 0; i < c; i++)
+		ent->blks[i] = b + i;
+	return ent;
+}
+
+static void
+blkent_prepend(
+	blkent_t	**entp,
+	xfs_fsblock_t	b,
+	xfs_extlen_t	c)
+{
+	int		i;
+	blkent_t	*newent;
+	blkent_t	*oldent;
+
+	oldent = *entp;
+	newent = xmalloc(BLKENT_SIZE(oldent->nblks + c));
+	newent->nblks = oldent->nblks + c;
+	newent->startoff = oldent->startoff - c;
+	for (i = 0; i < c; i++)
+		newent->blks[i] = b + c;
+	for (; i < oldent->nblks + c; i++)
+		newent->blks[i] = oldent->blks[i - c];
+	xfree(oldent);
+	*entp = newent;
+}
+
+static blkmap_t *
+blkmap_alloc(
+	xfs_extnum_t	nex)
+{
+	blkmap_t	*blkmap;
+
+	if (nex < 1)
+		nex = 1;
+	blkmap = xmalloc(BLKMAP_SIZE(nex));
+	blkmap->naents = nex;
+	blkmap->nents = 0;
+	return blkmap;
+}
+
+static void
+blkmap_free(
+	blkmap_t	*blkmap)
+{
+	blkent_t	**entp;
+	xfs_extnum_t	i;
+
+	for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++)
+		xfree(*entp);
+	xfree(blkmap);
+}
+
+static xfs_fsblock_t
+blkmap_get(
+	blkmap_t	*blkmap,
+	xfs_fileoff_t	o)
+{
+	blkent_t	*ent;
+	blkent_t	**entp;
+	int		i;
+
+	for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) {
+		ent = *entp;
+		if (o >= ent->startoff && o < ent->startoff + ent->nblks)
+			return ent->blks[o - ent->startoff];
+	}
+	return NULLFSBLOCK;
+}
+
+static int
+blkmap_getn(
+	blkmap_t	*blkmap,
+	xfs_fileoff_t	o,
+	int		nb,
+	bmap_ext_t	**bmpp)
+{
+	bmap_ext_t	*bmp;
+	blkent_t	*ent;
+	xfs_fileoff_t	ento;
+	blkent_t	**entp;
+	int		i;
+	int		nex;
+
+	for (i = nex = 0, bmp = NULL, entp = blkmap->ents;
+	     i < blkmap->nents;
+	     i++, entp++) {
+		ent = *entp;
+		if (ent->startoff >= o + nb)
+			break;
+		if (ent->startoff + ent->nblks <= o)
+			continue;
+		for (ento = ent->startoff;
+		     ento < ent->startoff + ent->nblks && ento < o + nb;
+		     ento++) {
+			if (ento < o)
+				continue;
+			if (bmp &&
+			    bmp[nex - 1].startoff + bmp[nex - 1].blockcount ==
+				    ento &&
+			    bmp[nex - 1].startblock + bmp[nex - 1].blockcount ==
+				    ent->blks[ento - ent->startoff])
+				bmp[nex - 1].blockcount++;
+			else {
+				bmp = realloc(bmp, ++nex * sizeof(*bmp));
+				bmp[nex - 1].startoff = ento;
+				bmp[nex - 1].startblock =
+					ent->blks[ento - ent->startoff];
+				bmp[nex - 1].blockcount = 1;
+				bmp[nex - 1].flag = 0;
+			}
+		}
+	}
+	*bmpp = bmp;
+	return nex;
+}
+
+static void
+blkmap_grow(
+	blkmap_t	**blkmapp,
+	blkent_t	**entp,
+	blkent_t	*newent)
+{
+	blkmap_t	*blkmap;
+	int		i;
+	int		idx;
+
+	blkmap = *blkmapp;
+	idx = (int)(entp - blkmap->ents);
+	if (blkmap->naents == blkmap->nents) {
+		blkmap = xrealloc(blkmap, BLKMAP_SIZE(blkmap->nents + 1));
+		*blkmapp = blkmap;
+		blkmap->naents++;
+	}
+	for (i = blkmap->nents; i > idx; i--)
+		blkmap->ents[i] = blkmap->ents[i - 1];
+	blkmap->ents[idx] = newent;
+	blkmap->nents++;
+}
+
+static xfs_fileoff_t
+blkmap_last_off(
+	blkmap_t	*blkmap)
+{
+	blkent_t	*ent;
+
+	if (!blkmap->nents)
+		return NULLFILEOFF;
+	ent = blkmap->ents[blkmap->nents - 1];
+	return ent->startoff + ent->nblks;
+}
+
+static xfs_fileoff_t
+blkmap_next_off(
+	blkmap_t	*blkmap,
+	xfs_fileoff_t	o,
+	int		*t)
+{
+	blkent_t	*ent;
+	blkent_t	**entp;
+
+	if (!blkmap->nents)
+		return NULLFILEOFF;
+	if (o == NULLFILEOFF) {
+		*t = 0;
+		ent = blkmap->ents[0];
+		return ent->startoff;
+	}
+	entp = &blkmap->ents[*t];
+	ent = *entp;
+	if (o < ent->startoff + ent->nblks - 1)
+		return o + 1;
+	entp++;
+	if (entp >= &blkmap->ents[blkmap->nents])
+		return NULLFILEOFF;
+	(*t)++;
+	ent = *entp;
+	return ent->startoff;
+}
+
+static void
+blkmap_set_blk(
+	blkmap_t	**blkmapp,
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b)
+{
+	blkmap_t	*blkmap;
+	blkent_t	*ent;
+	blkent_t	**entp;
+	blkent_t	*nextent;
+
+	blkmap = *blkmapp;
+	for (entp = blkmap->ents; entp < &blkmap->ents[blkmap->nents]; entp++) {
+		ent = *entp;
+		if (o < ent->startoff - 1) {
+			ent = blkent_new(o, b, 1);
+			blkmap_grow(blkmapp, entp, ent);
+			return;
+		}
+		if (o == ent->startoff - 1) {
+			blkent_prepend(entp, b, 1);
+			return;
+		}
+		if (o >= ent->startoff && o < ent->startoff + ent->nblks) {
+			ent->blks[o - ent->startoff] = b;
+			return;
+		}
+		if (o > ent->startoff + ent->nblks)
+			continue;
+		blkent_append(entp, b, 1);
+		if (entp == &blkmap->ents[blkmap->nents - 1])
+			return;
+		ent = *entp;
+		nextent = entp[1];
+		if (ent->startoff + ent->nblks < nextent->startoff)
+			return;
+		blkent_append(entp, nextent->blks[0], nextent->nblks);
+		blkmap_shrink(blkmap, &entp[1]);
+		return;
+	}
+	ent = blkent_new(o, b, 1);
+	blkmap_grow(blkmapp, entp, ent);
+}
+
+static void
+blkmap_set_ext(
+	blkmap_t	**blkmapp,
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b,
+	xfs_extlen_t	c)
+{
+	blkmap_t	*blkmap;
+	blkent_t	*ent;
+	blkent_t	**entp;
+	xfs_extnum_t	i;
+
+	blkmap = *blkmapp;
+	if (!blkmap->nents) {
+		blkmap->ents[0] = blkent_new(o, b, c);
+		blkmap->nents = 1;
+		return;
+	}
+	entp = &blkmap->ents[blkmap->nents - 1];
+	ent = *entp;
+	if (ent->startoff + ent->nblks == o) {
+		blkent_append(entp, b, c);
+		return;
+	}
+	if (ent->startoff + ent->nblks < o) {
+		ent = blkent_new(o, b, c);
+		blkmap_grow(blkmapp, &blkmap->ents[blkmap->nents], ent);
+		return;
+	}
+	for (i = 0; i < c; i++)
+		blkmap_set_blk(blkmapp, o + i, b + i);
+}
+
+static void
+blkmap_shrink(
+	blkmap_t	*blkmap,
+	blkent_t	**entp)
+{
+	int		i;
+	int		idx;
+
+	xfree(*entp);
+	idx = (int)(entp - blkmap->ents);
+	for (i = idx + 1; i < blkmap->nents; i++)
+		blkmap->ents[i] = blkmap->ents[i - 1];
+	blkmap->nents--;
+}
+
+/* ARGSUSED */
+static int
+blockfree_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	c;
+	int		rt;
+
+	if (!dbmap) {
+		dbprintf("block usage information not allocated\n");
+		return 0;
+	}
+	rt = mp->m_sb.sb_rextents != 0;
+	for (c = 0; c < mp->m_sb.sb_agcount; c++) {
+		xfree(dbmap[c]);
+		xfree(inomap[c]);
+		free_inodata(c);
+	}
+	if (rt) {
+		xfree(dbmap[c]);
+		xfree(inomap[c]);
+		xfree(sumcompute);
+		xfree(sumfile);
+		sumcompute = sumfile = NULL;
+	}
+	xfree(dbmap);
+	xfree(inomap);
+	xfree(inodata);
+	dbmap = NULL;
+	inomap = NULL;
+	inodata = NULL;
+	return 0;
+}
+
+/*
+ * Check consistency of xfs filesystem contents.
+ */
+static int
+blockget_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	agno;
+	int		oldprefix;
+	int		sbyell;
+
+	if (dbmap) {
+		dbprintf("already have block usage information\n");
+		return 0;
+	}
+	if (!init(argc, argv))
+		return 0;
+	oldprefix = dbprefix;
+	dbprefix |= pflag;
+	for (agno = 0, sbyell = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		scan_ag(agno);
+		if (sbver_err > 4 && !sbyell && sbver_err >= agno) {
+			sbyell = 1;
+			dbprintf("WARNING: this may be a newer XFS "
+				 "filesystem.\n");
+		}
+	}
+	if (blist_size) {
+		xfree(blist);
+		blist = NULL;
+		blist_size = 0;
+	}
+	if (serious_error) {
+		exitcode = 2;
+		dbprefix = oldprefix;
+		return 0;
+	}
+	check_rootdir();
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		/*
+		 * Check that there are no blocks either
+		 * a) unaccounted for or 
+		 * b) bno-free but not cnt-free
+		 */
+		checknot_dbmap(agno, 0, mp->m_sb.sb_agblocks,
+			(1 << DBM_UNKNOWN) | (1 << DBM_FREE1));
+		check_linkcounts(agno);
+	}
+	if (mp->m_sb.sb_rblocks) {
+		checknot_rdbmap(0,
+			(xfs_extlen_t)(mp->m_sb.sb_rextents *
+				       mp->m_sb.sb_rextsize),
+			1 << DBM_UNKNOWN);
+		check_summary();
+	}
+	if (mp->m_sb.sb_icount != icount) {
+		if (!sflag)
+			dbprintf("sb_icount %lld, counted %lld\n",
+				mp->m_sb.sb_icount, icount);
+		error++;
+	}
+	if (mp->m_sb.sb_ifree != ifree) {
+		if (!sflag)
+			dbprintf("sb_ifree %lld, counted %lld\n",
+				mp->m_sb.sb_ifree, ifree);
+		error++;
+	}
+	if (mp->m_sb.sb_fdblocks != fdblocks) {
+		if (!sflag)
+			dbprintf("sb_fdblocks %lld, counted %lld\n",
+				mp->m_sb.sb_fdblocks, fdblocks);
+		error++;
+	}
+	if (mp->m_sb.sb_frextents != frextents) {
+		if (!sflag)
+			dbprintf("sb_frextents %lld, counted %lld\n",
+				mp->m_sb.sb_frextents, frextents);
+		error++;
+	}
+	if ((sbversion & XFS_SB_VERSION_ATTRBIT) &&
+	    !XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
+		if (!sflag)
+			dbprintf("sb versionnum missing attr bit %x\n",
+				XFS_SB_VERSION_ATTRBIT);
+		error++;
+	}
+	if ((sbversion & XFS_SB_VERSION_NLINKBIT) &&
+	    !XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+		if (!sflag)
+			dbprintf("sb versionnum missing nlink bit %x\n",
+				XFS_SB_VERSION_NLINKBIT);
+		error++;
+	}
+	if ((sbversion & XFS_SB_VERSION_QUOTABIT) &&
+	    !XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+		if (!sflag)
+			dbprintf("sb versionnum missing quota bit %x\n",
+				XFS_SB_VERSION_QUOTABIT);
+		error++;
+	}
+	if (!(sbversion & XFS_SB_VERSION_ALIGNBIT) &&
+	    XFS_SB_VERSION_HASALIGN(&mp->m_sb)) {
+		if (!sflag)
+			dbprintf("sb versionnum extra align bit %x\n",
+				XFS_SB_VERSION_ALIGNBIT);
+		error++;
+	}
+	if (qudo)
+		quota_check("user", qudata);
+	if (qpdo)
+		quota_check("project", qpdata);
+	if (sbver_err > mp->m_sb.sb_agcount / 2)
+		dbprintf("WARNING: this may be a newer XFS filesystem.\n");
+	if (error)
+		exitcode = 3;
+	dbprefix = oldprefix;
+	return 0;
+}
+
+#ifdef DEBUG
+typedef struct ltab {
+	int	min;
+	int	max;
+} ltab_t;
+
+static void
+blocktrash_b(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	dbm_t		type,
+	ltab_t		*ltabp,
+	int		mode)
+{
+	int		bit;
+	int		bitno;
+	char		*buf;
+	int		byte;
+	int		len;
+	int		mask;
+	int		newbit;
+	int		offset;
+	static char	*modestr[] = {
+		"zeroed", "set", "flipped", "randomized"
+	};
+
+	len = (int)((random() % (ltabp->max - ltabp->min + 1)) + ltabp->min);
+	offset = (int)(random() % (int)(mp->m_sb.sb_blocksize * NBBY));
+	newbit = 0;
+	push_cur();
+	set_cur(&typtab[DBM_UNKNOWN],
+		XFS_AGB_TO_DADDR(mp, agno, agbno), blkbb, DB_RING_IGN, NULL);
+	if ((buf = iocur_top->data) == NULL) {
+		dbprintf("can't read block %u/%u for trashing\n", agno, agbno);
+		pop_cur();
+		return;
+	}
+	for (bitno = 0; bitno < len; bitno++) {
+		bit = (offset + bitno) % (mp->m_sb.sb_blocksize * NBBY);
+		byte = bit / NBBY;
+		bit %= NBBY;
+		mask = 1 << bit;
+		switch (mode) {
+		case 0:
+			newbit = 0;
+			break;
+		case 1:
+			newbit = 1;
+			break;
+		case 2:
+			newbit = (buf[byte] & mask) == 0;
+			break;
+		case 3:
+			newbit = (int)random() & 1;
+			break;
+		}
+		if (newbit)
+			buf[byte] |= mask;
+		else
+			buf[byte] &= ~mask;
+	}
+	write_cur();
+	pop_cur();
+	printf("blocktrash: %u/%u %s block %d bit%s starting %d:%d %s\n",
+		agno, agbno, typename[type], len, len == 1 ? "" : "s",
+		offset / NBBY, offset % NBBY, modestr[mode]);
+}
+
+int
+blocktrash_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agblock_t	agbno;
+	xfs_agnumber_t	agno;
+	xfs_drfsbno_t	bi;
+	xfs_drfsbno_t	blocks;
+	int		c;
+	int		count;
+	int		done;
+	int		goodmask;
+	int		i;
+	ltab_t		*lentab;
+	int		lentablen;
+	int		max;
+	int		min;
+	int		mode;
+	struct timeval	now;
+	char		*p;
+	xfs_drfsbno_t	randb;
+	uint		seed;
+	int		sopt;
+	int		tmask;
+
+	if (!dbmap) {
+		dbprintf("must run blockget first\n");
+		return 0;
+	}
+	optind = 0;
+	count = 1;
+	min = 1;
+	max = 128 * NBBY;
+	mode = 2;
+	gettimeofday(&now, NULL);
+	seed = (unsigned int)(now.tv_sec ^ now.tv_usec);
+	sopt = 0;
+	tmask = 0;
+	goodmask = (1 << DBM_AGF) |
+		   (1 << DBM_AGFL) |
+		   (1 << DBM_AGI) |
+		   (1 << DBM_ATTR) |
+		   (1 << DBM_BTBMAPA) |
+		   (1 << DBM_BTBMAPD) |
+		   (1 << DBM_BTBNO) |
+		   (1 << DBM_BTCNT) |
+		   (1 << DBM_BTINO) |
+		   (1 << DBM_DIR) |
+		   (1 << DBM_INODE) |
+		   (1 << DBM_QUOTA) |
+		   (1 << DBM_RTBITMAP) |
+		   (1 << DBM_RTSUM) |
+		   (1 << DBM_SB);
+	while ((c = getopt(argc, argv, "0123n:s:t:x:y:")) != EOF) {
+		switch (c) {
+		case '0':
+			mode = 0;
+			break;
+		case '1':
+			mode = 1;
+			break;
+		case '2':
+			mode = 2;
+			break;
+		case '3':
+			mode = 3;
+			break;
+		case 'n':
+			count = (int)strtol(optarg, &p, 0);
+			if (*p != '\0' || count <= 0) {
+				dbprintf("bad blocktrash count %s\n", optarg);
+				return 0;
+			}
+			break;
+		case 's':
+			seed = (uint)strtoul(optarg, &p, 0);
+			sopt = 1;
+			break;
+		case 't':
+			for (i = 0; typename[i]; i++) {
+				if (strcmp(typename[i], optarg) == 0)
+					break;
+			}
+			if (!typename[i] || (((1 << i) & goodmask) == 0)) {
+				dbprintf("bad blocktrash type %s\n", optarg);
+				return 0;
+			}
+			tmask |= 1 << i;
+			break;
+		case 'x':
+			min = (int)strtol(optarg, &p, 0);
+			if (*p != '\0' || min <= 0 ||
+			    min > mp->m_sb.sb_blocksize * NBBY) {
+				dbprintf("bad blocktrash min %s\n", optarg);
+				return 0;
+			}
+			break;
+		case 'y':
+			max = (int)strtol(optarg, &p, 0);
+			if (*p != '\0' || max <= 0 ||
+			    max > mp->m_sb.sb_blocksize * NBBY) {
+				dbprintf("bad blocktrash max %s\n", optarg);
+				return 0;
+			}
+			break;
+		default:
+			dbprintf("bad option for blocktrash command\n");
+			return 0;
+		}
+	}
+	if (min > max) {
+		dbprintf("bad min/max for blocktrash command\n");
+		return 0;
+	}
+	if (tmask == 0)
+		tmask = goodmask;
+	lentab = xmalloc(sizeof(ltab_t));
+	lentab->min = lentab->max = min;
+	lentablen = 1;
+	for (i = min + 1; i <= max; i++) {
+		if ((i & (i - 1)) == 0) {
+			lentab = xrealloc(lentab,
+				sizeof(ltab_t) * (lentablen + 1));
+			lentab[lentablen].min = lentab[lentablen].max = i;
+			lentablen++;
+		} else
+			lentab[lentablen - 1].max = i;
+	}
+	for (blocks = 0, agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		for (agbno = 0, p = dbmap[agno];
+		     agbno < mp->m_sb.sb_agblocks;
+		     agbno++, p++) {
+			if ((1 << *p) & tmask)
+				blocks++;
+		}
+	}
+	if (blocks == 0) {
+		dbprintf("blocktrash: no matching blocks\n");
+		return 0;
+	}
+	if (!sopt)
+		dbprintf("blocktrash: seed %u\n", seed);
+	srandom(seed);
+	for (i = 0; i < count; i++) {
+		randb = (xfs_drfsbno_t)((((__int64_t)random() << 32) |
+					 random()) % blocks);
+		for (bi = 0, agno = 0, done = 0;
+		     !done && agno < mp->m_sb.sb_agcount;
+		     agno++) {
+			for (agbno = 0, p = dbmap[agno];
+			     agbno < mp->m_sb.sb_agblocks;
+			     agbno++, p++) {
+				if (!((1 << *p) & tmask))
+					continue;
+				if (bi++ < randb)
+					continue;
+				blocktrash_b(agno, agbno, (dbm_t)*p,
+					&lentab[random() % lentablen], mode);
+				done = 1;
+				break;
+			}
+		}
+	}
+	xfree(lentab);
+	return 0;
+}
+#endif
+
+int
+blockuse_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agblock_t	agbno;
+	xfs_agnumber_t	agno;
+	int		c;
+	int		count;
+	xfs_agblock_t	end;
+	xfs_fsblock_t	fsb;
+	inodata_t	*i;
+	char		*p;
+	int		shownames;
+
+	if (!dbmap) {
+		dbprintf("must run blockget first\n");
+		return 0;
+	}
+	optind = 0;
+	count = 1;
+	shownames = 0;
+	fsb = XFS_DADDR_TO_FSB(mp, iocur_top->off >> BBSHIFT);
+	agno = XFS_FSB_TO_AGNO(mp, fsb);
+	end = agbno = XFS_FSB_TO_AGBNO(mp, fsb);
+	while ((c = getopt(argc, argv, "c:n")) != EOF) {
+		switch (c) {
+		case 'c':
+			count = (int)strtol(optarg, &p, 0);
+			end = agbno + count - 1;
+			if (*p != '\0' || count <= 0 ||
+			    end >= mp->m_sb.sb_agblocks) {
+				dbprintf("bad blockuse count %s\n", optarg);
+				return 0;
+			}
+			break;
+		case 'n':
+			if (!nflag) {
+				dbprintf("must run blockget -n first\n");
+				return 0;
+			}
+			shownames = 1;
+			break;
+		default:
+			dbprintf("bad option for blockuse command\n");
+			return 0;
+		}
+	}
+	while (agbno <= end) {
+		p = &dbmap[agno][agbno];
+		i = inomap[agno][agbno];
+		dbprintf("block %llu (%u/%u) type %s",
+			(xfs_dfsbno_t)XFS_AGB_TO_FSB(mp, agno, agbno),
+			agno, agbno, typename[(dbm_t)*p]);
+		if (i) {
+			dbprintf(" inode %lld", i->ino);
+			if (shownames && (p = inode_name(i->ino, NULL))) {
+				dbprintf(" %s", p);
+				xfree(p);
+			}
+		}
+		dbprintf("\n");
+		agbno++;
+	}
+	return 0;
+}
+
+static int
+check_blist(
+	xfs_fsblock_t	bno)
+{
+	int		i;
+
+	for (i = 0; i < blist_size; i++) {
+		if (blist[i] == bno)
+			return 1;
+	}
+	return 0;
+}
+
+static void
+check_dbmap(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	xfs_extlen_t	len,
+	dbm_t		type)
+{
+	xfs_extlen_t	i;
+	char		*p;
+
+	for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) {
+		if ((dbm_t)*p != type) {
+			if (!sflag || CHECK_BLISTA(agno, agbno + i))
+				dbprintf("block %u/%u expected type %s got "
+					 "%s\n",
+					agno, agbno + i, typename[type],
+					typename[(dbm_t)*p]);
+			error++;
+		}
+	}
+}
+
+void
+check_init(void)
+{
+	add_command(&blockfree_cmd);
+	add_command(&blockget_cmd);
+#ifdef DEBUG
+	add_command(&blocktrash_cmd);
+#endif
+	add_command(&blockuse_cmd);
+	add_command(&ncheck_cmd);
+}
+
+static int
+check_inomap(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	xfs_extlen_t	len,
+	xfs_ino_t	c_ino)
+{
+	xfs_extlen_t	i;
+	inodata_t	**idp;
+	int		rval;
+
+	if (!check_range(agno, agbno, len))  {
+		dbprintf("blocks %u/%u..%u claimed by inode %lld\n",
+			agno, agbno, agbno + len - 1, c_ino);
+		return 0;
+	}
+	for (i = 0, rval = 1, idp = &inomap[agno][agbno]; i < len; i++, idp++) {
+		if (*idp) {
+			if (!sflag || (*idp)->ilist ||
+			    CHECK_BLISTA(agno, agbno + i))
+				dbprintf("block %u/%u claimed by inode %lld, "
+					 "previous inum %lld\n",
+					agno, agbno + i, c_ino, (*idp)->ino);
+			error++;
+			rval = 0;
+		}
+	}
+	return rval;
+}
+
+static void
+check_linkcounts(
+	xfs_agnumber_t	agno)
+{
+	inodata_t	*ep;
+	inodata_t	**ht;
+	int		idx;
+	char		*path;
+
+	ht = inodata[agno];
+	for (idx = 0; idx < inodata_hash_size; ht++, idx++) {
+		ep = *ht;
+		while (ep) {
+			if (ep->link_set != ep->link_add || ep->link_set == 0) {
+				path = inode_name(ep->ino, NULL);
+				if (!path && ep->link_add)
+					path = xstrdup("?");
+				if (!sflag || ep->ilist) {
+					if (ep->link_add)
+						dbprintf("link count mismatch "
+							 "for inode %lld (name "
+							 "%s), nlink %d, "
+							 "counted %d\n",
+							ep->ino, path,
+							ep->link_set,
+							ep->link_add);
+					else if (ep->link_set)
+						dbprintf("disconnected inode "
+							 "%lld, nlink %d\n",
+							ep->ino, ep->link_set);
+					else
+						dbprintf("allocated inode %lld "
+							 "has 0 link count\n",
+							ep->ino);
+				}
+				if (path)
+					xfree(path);
+				error++;
+			} else if (verbose || ep->ilist) {
+				path = inode_name(ep->ino, NULL);
+				if (path) {
+					dbprintf("inode %lld name %s\n",
+						ep->ino, path);
+					xfree(path);
+				}
+			}
+			ep = ep->next;
+		}
+	}
+		
+}
+
+static int
+check_range(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	xfs_extlen_t	len)
+{
+	xfs_extlen_t	i;
+
+	if (agno >= mp->m_sb.sb_agcount ||
+	    agbno + len - 1 >= mp->m_sb.sb_agblocks) {
+		for (i = 0; i < len; i++) {
+			if (!sflag || CHECK_BLISTA(agno, agbno + i))
+				dbprintf("block %u/%u out of range\n",
+					agno, agbno + i);
+		}
+		error++;
+		return 0;
+	}
+	return 1;
+}
+
+static void
+check_rdbmap(
+	xfs_drfsbno_t	bno,
+	xfs_extlen_t	len,
+	dbm_t		type)
+{
+	xfs_extlen_t	i;
+	char		*p;
+
+	for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) {
+		if ((dbm_t)*p != type) {
+			if (!sflag || CHECK_BLIST(bno + i))
+				dbprintf("rtblock %llu expected type %s got "
+					 "%s\n",
+					bno + i, typename[type],
+					typename[(dbm_t)*p]);
+			error++;
+		}
+	}
+}
+
+static int
+check_rinomap(
+	xfs_drfsbno_t	bno,
+	xfs_extlen_t	len,
+	xfs_ino_t	c_ino)
+{
+	xfs_extlen_t	i;
+	inodata_t	**idp;
+	int		rval;
+
+	if (!check_rrange(bno, len)) {
+		dbprintf("rtblocks %llu..%llu claimed by inode %lld\n",
+			bno, bno + len - 1, c_ino);
+		return 0;
+	}
+	for (i = 0, rval = 1, idp = &inomap[mp->m_sb.sb_agcount][bno];
+	     i < len;
+	     i++, idp++) {
+		if (*idp) {
+			if (!sflag || (*idp)->ilist || CHECK_BLIST(bno + i))
+				dbprintf("rtblock %llu claimed by inode %lld, "
+					 "previous inum %lld\n",
+					bno + i, c_ino, (*idp)->ino);
+			error++;
+			rval = 0;
+		}
+	}
+	return rval;
+}
+
+static void
+check_rootdir(void)
+{
+	inodata_t	*id;
+
+	id = find_inode(mp->m_sb.sb_rootino, 0);
+	if (id == NULL) {
+		if (!sflag)
+			dbprintf("root inode %lld is missing\n",
+				mp->m_sb.sb_rootino);
+		error++;
+	} else if (!id->isdir) {
+		if (!sflag || id->ilist)
+			dbprintf("root inode %lld is not a directory\n",
+				mp->m_sb.sb_rootino);
+		error++;
+	}
+}
+
+static int
+check_rrange(
+	xfs_drfsbno_t	bno,
+	xfs_extlen_t	len)
+{
+	xfs_extlen_t	i;
+
+	if (bno + len - 1 >= mp->m_sb.sb_rblocks) {
+		for (i = 0; i < len; i++) {
+			if (!sflag || CHECK_BLIST(bno + i))
+				dbprintf("rtblock %llu out of range\n",
+					bno + i);
+		}
+		error++;
+		return 0;
+	}
+	return 1;
+}
+
+static void
+check_set_dbmap(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	xfs_extlen_t	len,
+	dbm_t		type1,
+	dbm_t		type2,
+	xfs_agnumber_t	c_agno,
+	xfs_agblock_t	c_agbno)
+{
+	xfs_extlen_t	i;
+	int		mayprint;
+	char		*p;
+
+	if (!check_range(agno, agbno, len))  {
+		dbprintf("blocks %u/%u..%u claimed by block %u/%u\n", agno,
+			agbno, agbno + len - 1, c_agno, c_agbno);
+		return;
+	}
+	check_dbmap(agno, agbno, len, type1);
+	mayprint = verbose | blist_size;
+	for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) {
+		*p = (char)type2;
+		if (mayprint && (verbose || CHECK_BLISTA(agno, agbno + i)))
+			dbprintf("setting block %u/%u to %s\n", agno, agbno + i,
+				typename[type2]);
+	}
+}
+
+static void
+check_set_rdbmap(
+	xfs_drfsbno_t	bno,
+	xfs_extlen_t	len,
+	dbm_t		type1,
+	dbm_t		type2)
+{
+	xfs_extlen_t	i;
+	int		mayprint;
+	char		*p;
+
+	if (!check_rrange(bno, len))
+		return;
+	check_rdbmap(bno, len, type1);
+	mayprint = verbose | blist_size;
+	for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) {
+		*p = (char)type2;
+		if (mayprint && (verbose || CHECK_BLIST(bno + i)))
+			dbprintf("setting rtblock %llu to %s\n",
+				bno + i, typename[type2]);
+	}
+}
+
+static void
+check_summary(void)
+{
+	xfs_drfsbno_t	bno;
+	xfs_suminfo_t	*csp;
+	xfs_suminfo_t	*fsp;
+	int		log;
+
+	csp = sumcompute;
+	fsp = sumfile;
+	for (log = 0; log < mp->m_rsumlevels; log++) {
+		for (bno = 0;
+		     bno < mp->m_sb.sb_rbmblocks;
+		     bno++, csp++, fsp++) {
+			if (*csp != *fsp) {
+				if (!sflag)
+					dbprintf("rt summary mismatch, size %d "
+						 "block %llu, file: %d, "
+						 "computed: %d\n",
+						log, bno, *fsp, *csp);
+				error++;
+			}
+		}
+	}
+}
+
+static void
+checknot_dbmap(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	xfs_extlen_t	len,
+	int		typemask)
+{
+	xfs_extlen_t	i;
+	char		*p;
+
+	if (!check_range(agno, agbno, len))
+		return;
+	for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) {
+		if ((1 << *p) & typemask) {
+			if (!sflag || CHECK_BLISTA(agno, agbno + i))
+				dbprintf("block %u/%u type %s not expected\n",
+					agno, agbno + i, typename[(dbm_t)*p]);
+			error++;
+		}
+	}
+}
+
+static void
+checknot_rdbmap(
+	xfs_drfsbno_t	bno,
+	xfs_extlen_t	len,
+	int		typemask)
+{
+	xfs_extlen_t	i;
+	char		*p;
+
+	if (!check_rrange(bno, len))
+		return;
+	for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) {
+		if ((1 << *p) & typemask) {
+			if (!sflag || CHECK_BLIST(bno + i))
+				dbprintf("rtblock %llu type %s not expected\n",
+					bno + i, typename[(dbm_t)*p]);
+			error++;
+		}
+	}
+}
+
+static void
+dir_hash_add(
+	xfs_dahash_t		hash,
+	xfs_dir2_dataptr_t	addr)
+{
+	int			i;
+	dirhash_t		*p;
+
+	i = DIR_HASH_FUNC(hash, addr);
+	p = malloc(sizeof(*p));
+	p->next = dirhash[i];
+	dirhash[i] = p;
+	p->entry.hashval = hash;
+	p->entry.address = addr;
+	p->seen = 0;
+}
+
+static void
+dir_hash_check(
+	inodata_t	*id,
+	int		v)
+{
+	int		i;
+	dirhash_t	*p;
+
+	for (i = 0; i < DIR_HASH_SIZE; i++) {
+		for (p = dirhash[i]; p; p = p->next) {
+			if (p->seen)
+				continue;
+			if (!sflag || id->ilist || v)
+				dbprintf("dir ino %lld missing leaf entry for "
+					 "%x/%x\n",
+					id->ino, p->entry.hashval,
+					p->entry.address);
+			error++;
+		}
+	}
+}
+
+static void
+dir_hash_done(void)
+{
+	int		i;
+	dirhash_t	*n;
+	dirhash_t	*p;
+
+	for (i = 0; i < DIR_HASH_SIZE; i++) {
+		for (p = dirhash[i]; p; p = n) {
+			n = p->next;
+			free(p);
+		}
+		dirhash[i] = NULL;
+	}
+}
+
+static void
+dir_hash_init(void)
+{
+	if (!dirhash)
+		dirhash = calloc(DIR_HASH_SIZE, sizeof(*dirhash));
+}
+
+static int
+dir_hash_see(
+	xfs_dahash_t		hash,
+	xfs_dir2_dataptr_t	addr)
+{
+	int			i;
+	dirhash_t		*p;
+
+	i = DIR_HASH_FUNC(hash, addr);
+	for (p = dirhash[i]; p; p = p->next) {
+		if (p->entry.hashval == hash && p->entry.address == addr) {
+			if (p->seen)
+				return 1;
+			p->seen = 1;
+			return 0;
+		}
+	}
+	return -1;
+}
+
+static inodata_t *
+find_inode(
+	xfs_ino_t	ino,
+	int		add)
+{
+	xfs_agino_t	agino;
+	xfs_agnumber_t	agno;
+	inodata_t	*ent;
+	inodata_t	**htab;
+	xfs_agino_t	ih;
+
+	agno = XFS_INO_TO_AGNO(mp, ino);
+	agino = XFS_INO_TO_AGINO(mp, ino);
+	if (agno >= mp->m_sb.sb_agcount ||
+	    XFS_AGINO_TO_INO(mp, agno, agino) != ino)
+		return NULL;
+	htab = inodata[agno];
+	ih = agino % inodata_hash_size;
+	ent = htab[ih];
+	while (ent) {
+		if (ent->ino == ino)
+			return ent;
+		ent = ent->next;
+	}
+	if (!add)
+		return NULL;
+	ent = xcalloc(1, sizeof(*ent));
+	ent->ino = ino;
+	ent->next = htab[ih];
+	htab[ih] = ent;
+	return ent;
+}
+
+static void
+free_inodata(
+	xfs_agnumber_t	agno)
+{
+	inodata_t	*hp;
+	inodata_t	**ht;
+	int		i;
+	inodata_t	*next;
+
+	ht = inodata[agno];
+	for (i = 0; i < inodata_hash_size; i++) {
+		hp = ht[i];
+		while (hp) {
+			next = hp->next;
+			if (hp->name)
+				xfree(hp->name);
+			xfree(hp);
+			hp = next;
+		}
+	}
+	xfree(ht);
+}
+
+static int
+init(
+	int		argc,
+	char		**argv)
+{
+	xfs_fsblock_t	bno;
+	int		c;
+	xfs_ino_t	ino;
+	int		rt;
+
+	if (mp->m_sb.sb_magicnum != XFS_SB_MAGIC) {
+		dbprintf("bad superblock magic number %x, giving up\n",
+			mp->m_sb.sb_magicnum);
+		return 0;
+	}
+	rt = mp->m_sb.sb_rextents != 0;
+	dbmap = xmalloc((mp->m_sb.sb_agcount + rt) * sizeof(*dbmap));
+	inomap = xmalloc((mp->m_sb.sb_agcount + rt) * sizeof(*inomap));
+	inodata = xmalloc(mp->m_sb.sb_agcount * sizeof(*inodata));
+	inodata_hash_size =
+		(int)MAX(MIN(mp->m_sb.sb_icount /
+				(INODATA_AVG_HASH_LENGTH * mp->m_sb.sb_agcount),
+			     MAX_INODATA_HASH_SIZE),
+			 MIN_INODATA_HASH_SIZE);
+	for (c = 0; c < mp->m_sb.sb_agcount; c++) {
+		dbmap[c] = xcalloc(mp->m_sb.sb_agblocks, sizeof(**dbmap));
+		inomap[c] = xcalloc(mp->m_sb.sb_agblocks, sizeof(**inomap));
+		inodata[c] = xcalloc(inodata_hash_size, sizeof(**inodata));
+	}
+	if (rt) {
+		dbmap[c] = xcalloc(mp->m_sb.sb_rblocks, sizeof(**dbmap));
+		inomap[c] = xcalloc(mp->m_sb.sb_rblocks, sizeof(**inomap));
+		sumfile = xcalloc(mp->m_rsumsize, 1);
+		sumcompute = xcalloc(mp->m_rsumsize, 1);
+	}
+	nflag = sflag = verbose = optind = 0;
+	while ((c = getopt(argc, argv, "b:i:npsv")) != EOF) {
+		switch (c) {
+		case 'b':
+			bno = atoll(optarg);
+			add_blist(bno);
+			break;
+		case 'i':
+			ino = atoll(optarg);
+			add_ilist(ino);
+			break;
+		case 'n':
+			nflag = 1;
+			break;
+		case 'p':
+			pflag = 1;
+			break;
+		case 's':
+			sflag = 1;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		default:
+			dbprintf("bad option for blockget command\n");
+			return 0;
+		}
+	}
+	error = sbver_err = serious_error = 0;
+	fdblocks = frextents = icount = ifree = 0;
+	sbversion = XFS_SB_VERSION_4;
+	if (mp->m_sb.sb_inoalignmt)
+		sbversion |= XFS_SB_VERSION_ALIGNBIT;
+	if ((mp->m_sb.sb_uquotino && mp->m_sb.sb_uquotino != NULLFSINO) ||
+	    (mp->m_sb.sb_pquotino && mp->m_sb.sb_pquotino != NULLFSINO))
+		sbversion |= XFS_SB_VERSION_QUOTABIT;
+	quota_init();
+	return 1;
+}
+
+static char *
+inode_name(
+	xfs_ino_t	ino,
+	inodata_t	**ipp)
+{
+	inodata_t	*id;
+	char		*npath;
+	char		*path;
+
+	id = find_inode(ino, 0);
+	if (ipp)
+		*ipp = id;
+	if (id == NULL)
+		return NULL;
+	if (id->name == NULL)
+		return NULL;
+	path = xstrdup(id->name);
+	while (id->parent) {
+		id = id->parent;
+		if (id->name == NULL)
+			break;
+		npath = prepend_path(path, id->name);
+		xfree(path);
+		path = npath;
+	}
+	return path;
+}
+
+static int
+ncheck_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	agno;
+	int		c;
+	inodata_t	*hp;
+	inodata_t	**ht;
+	int		i;
+	inodata_t	*id;
+	xfs_ino_t	*ilist;
+	int		ilist_size;
+	xfs_ino_t	*ilp;
+	xfs_ino_t	ino;
+	char		*p;
+	int		security;
+
+	if (!inodata || !nflag) {
+		dbprintf("must run blockget -n first\n");
+		return 0;
+	}
+	security = optind = ilist_size = 0;
+	ilist = NULL;
+	while ((c = getopt(argc, argv, "i:s")) != EOF) {
+		switch (c) {
+		case 'i':
+			ino = atoll(optarg);
+			ilist = xrealloc(ilist, (ilist_size + 1) *
+				sizeof(*ilist));
+			ilist[ilist_size++] = ino;
+			break;
+		case 's':
+			security = 1;
+			break;
+		default:
+			dbprintf("bad option -%c for ncheck command\n", c);
+			return 0;
+		}
+	}
+	if (ilist) {
+		for (ilp = ilist; ilp < &ilist[ilist_size]; ilp++) {
+			ino = *ilp;
+			if (p = inode_name(ino, &hp)) {
+				dbprintf("%11llu %s", ino, p);
+				if (hp->isdir)
+					dbprintf("/.");
+				dbprintf("\n");
+				xfree(p);
+			}
+		}
+		xfree(ilist);
+		return 0;
+	}
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		ht = inodata[agno];
+		for (i = 0; i < inodata_hash_size; i++) {
+			hp = ht[i];
+			for (hp = ht[i]; hp; hp = hp->next) {
+				ino = XFS_AGINO_TO_INO(mp, agno, hp->ino);
+				p = inode_name(ino, &id);
+				if (!p || !id)
+					continue;
+				if (!security || id->security) {
+					dbprintf("%11llu %s", ino, p);
+					if (hp->isdir)
+						dbprintf("/.");
+					dbprintf("\n");
+				}
+				xfree(p);
+			}
+		}
+	}
+	return 0;
+}
+
+static char *
+prepend_path(
+	char	*oldpath,
+	char	*parent)
+{
+	int	len;
+	char	*path;
+
+	len = (int)(strlen(oldpath) + strlen(parent) + 2);
+	path = xmalloc(len);
+	sprintf(path, "%s/%s", parent, oldpath);
+	return path;
+}
+
+static xfs_ino_t
+process_block_dir_v2(
+	blkmap_t	*blkmap,
+	int		*dot,
+	int		*dotdot,
+	inodata_t	*id)
+{
+	xfs_fsblock_t	b;
+	bbmap_t		bbmap;
+	bmap_ext_t	*bmp;
+	int		nex;
+	xfs_ino_t	parent;
+	int		v;
+	int		x;
+
+	nex = blkmap_getn(blkmap, 0, mp->m_dirblkfsbs, &bmp);
+	v = id->ilist || verbose;
+	if (nex == 0) {
+		if (!sflag || v)
+			dbprintf("block 0 for directory inode %lld is "
+				 "missing\n",
+				id->ino);
+		error++;
+		return 0;
+	}
+	push_cur();
+	if (nex > 1)
+		make_bbmap(&bbmap, nex, bmp);
+	set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bmp->startblock),
+		mp->m_dirblkfsbs * blkbb, DB_RING_IGN, nex > 1 ? &bbmap : NULL);
+	for (x = 0; !v && x < nex; x++) {
+		for (b = bmp[x].startblock;
+		     !v && b < bmp[x].startblock + bmp[x].blockcount;
+		     b++)
+			v = CHECK_BLIST(b);
+	}
+	free(bmp);
+	if (iocur_top->data == NULL) {
+		if (!sflag || id->ilist || v)
+			dbprintf("can't read block 0 for directory inode "
+				 "%lld\n",
+				id->ino);
+		error++;
+		return 0;
+	}
+	dir_hash_init();
+	parent = process_data_dir_v2(dot, dotdot, id, v, mp->m_dirdatablk,
+		NULL);
+	dir_hash_check(id, v);
+	dir_hash_done();
+	pop_cur();
+	return parent;
+}
+
+static void
+process_bmbt_reclist(
+	xfs_bmbt_rec_32_t	*rp,
+	int			numrecs,
+	dbm_t			type,
+	inodata_t		*id,
+	xfs_drfsbno_t		*tot,
+	blkmap_t		**blkmapp)
+{
+	xfs_agblock_t		agbno;
+	xfs_agnumber_t		agno;
+	xfs_fsblock_t		b;
+	xfs_dfilblks_t		c;
+	xfs_dfilblks_t		cp;
+	int			f;
+	int			i;
+	xfs_agblock_t		iagbno;
+	xfs_agnumber_t		iagno;
+	xfs_dfiloff_t		o;
+	xfs_dfiloff_t		op;
+	xfs_dfsbno_t		s;
+	int			v;
+
+	cp = op = 0;
+	v = verbose || id->ilist;
+	iagno = XFS_INO_TO_AGNO(mp, id->ino);
+	iagbno = XFS_INO_TO_AGBNO(mp, id->ino);
+	for (i = 0; i < numrecs; i++, rp++) {
+		convert_extent((xfs_bmbt_rec_64_t *)rp, &o, &s, &c, &f);
+		if (v)
+			dbprintf("inode %lld extent [%lld,%lld,%lld,%d]\n",
+				id->ino, o, s, c, f);
+		if (!sflag && i > 0 && op + cp > o)
+			dbprintf("bmap rec out of order, inode %lld entry %d\n",
+				id->ino, i);
+		op = o;
+		cp = c;
+		if (type == DBM_RTDATA) {
+			if (!sflag && s >= mp->m_sb.sb_rblocks) {
+				dbprintf("inode %lld bad rt block number %lld, "
+					 "offset %lld\n",
+					id->ino, s, o);
+				continue;
+			}
+		} else if (!sflag) {
+			agno = XFS_FSB_TO_AGNO(mp, s);
+			agbno = XFS_FSB_TO_AGBNO(mp, s);
+			if (agno >= mp->m_sb.sb_agcount ||
+			    agbno >= mp->m_sb.sb_agblocks) {
+				dbprintf("inode %lld bad block number %lld "
+					 "[%d,%d], offset %lld\n",
+					id->ino, s, agno, agbno, o);
+				continue;
+			}
+			if (agbno + c - 1 >= mp->m_sb.sb_agblocks) {
+				dbprintf("inode %lld bad block number %lld "
+					 "[%d,%d], offset %lld\n",
+					id->ino, s + c - 1, agno,
+					agbno + (xfs_agblock_t)c - 1, o);
+				continue;
+			}
+		}
+		if (blkmapp && *blkmapp)
+			blkmap_set_ext(blkmapp, (xfs_fileoff_t)o,
+				(xfs_fsblock_t)s, (xfs_extlen_t)c);
+		if (type == DBM_RTDATA) {
+			set_rdbmap((xfs_fsblock_t)s, (xfs_extlen_t)c,
+				DBM_RTDATA);
+			set_rinomap((xfs_fsblock_t)s, (xfs_extlen_t)c, id);
+			for (b = (xfs_fsblock_t)s;
+			     blist_size && b < s + c;
+			     b++, o++) {
+				if (CHECK_BLIST(b))
+					dbprintf("inode %lld block %lld at "
+						 "offset %lld\n",
+						id->ino, (xfs_dfsbno_t)b, o);
+			}
+		} else {
+			agno = XFS_FSB_TO_AGNO(mp, (xfs_fsblock_t)s);
+			agbno = XFS_FSB_TO_AGBNO(mp, (xfs_fsblock_t)s);
+			set_dbmap(agno, agbno, (xfs_extlen_t)c, type, iagno,
+				iagbno);
+			set_inomap(agno, agbno, (xfs_extlen_t)c, id);
+			for (b = (xfs_fsblock_t)s;
+			     blist_size && b < s + c;
+			     b++, o++, agbno++) {
+				if (CHECK_BLIST(b))
+					dbprintf("inode %lld block %lld at "
+						 "offset %lld\n",
+						id->ino, (xfs_dfsbno_t)b, o);
+			}
+		}
+		*tot += c;
+	}
+}
+
+static void
+process_btinode(
+	inodata_t		*id,
+	xfs_dinode_t		*dip,
+	dbm_t			type,
+	xfs_drfsbno_t		*totd,
+	xfs_drfsbno_t		*toti,
+	xfs_extnum_t		*nex,
+	blkmap_t		**blkmapp,
+	int			whichfork)
+{
+	xfs_bmdr_block_t	*dib;
+	int			i;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmbt_rec_32_t	*rp;
+
+	dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT);
+	if (INT_GET(dib->bb_level, ARCH_CONVERT) >= XFS_BM_MAXLEVELS(mp, whichfork)) {
+		if (!sflag || id->ilist)
+			dbprintf("level for ino %lld %s fork bmap root too "
+				 "large (%u)\n",
+				id->ino,
+				whichfork == XFS_DATA_FORK ? "data" : "attr",
+				INT_GET(dib->bb_level, ARCH_CONVERT));
+		error++;
+		return;
+	}
+	if (INT_GET(dib->bb_numrecs, ARCH_CONVERT) >
+	    XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT),
+		    xfs_bmdr, INT_GET(dib->bb_level, ARCH_CONVERT) == 0)) {
+		if (!sflag || id->ilist)
+			dbprintf("numrecs for ino %lld %s fork bmap root too "
+				 "large (%u)\n",
+				id->ino, 
+				whichfork == XFS_DATA_FORK ? "data" : "attr",
+				INT_GET(dib->bb_numrecs, ARCH_CONVERT));
+		error++;
+		return;
+	}
+	if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) {
+		rp = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR(
+			XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT),
+			xfs_bmdr, dib, 1,
+			XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp,
+					whichfork),
+				xfs_bmdr, 1));
+		process_bmbt_reclist(rp, INT_GET(dib->bb_numrecs, ARCH_CONVERT), type, id, totd,
+			blkmapp);
+		*nex += INT_GET(dib->bb_numrecs, ARCH_CONVERT);
+		return;
+	} else {
+		pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT),
+			xfs_bmdr, dib, 1,
+			XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp,
+							       whichfork),
+						xfs_bmdr, 0));
+		for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++)
+			scan_lbtree((xfs_fsblock_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT),
+				scanfunc_bmap, type, id, totd, toti, nex,
+				blkmapp, 1,
+				whichfork == XFS_DATA_FORK ?
+					TYP_BMAPBTD : TYP_BMAPBTA);
+	}
+	if (*nex <=
+	    XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT) / sizeof(xfs_bmbt_rec_t)) {
+		if (!sflag || id->ilist)
+			dbprintf("extent count for ino %lld %s fork too low "
+				 "(%d) for file format\n",
+				id->ino,
+				whichfork == XFS_DATA_FORK ? "data" : "attr",
+				*nex);
+		error++;
+	}
+}
+
+static xfs_ino_t
+process_data_dir_v2(
+	int			*dot,
+	int			*dotdot,
+	inodata_t		*id,
+	int			v,
+	xfs_dablk_t		dabno,
+	freetab_t		**freetabp)
+{
+	xfs_dir2_dataptr_t	addr;
+	xfs_dir2_data_free_t	*bf;
+	int			bf_err;
+	xfs_dir2_block_t	*block;
+	xfs_dir2_block_tail_t	*btp = NULL;
+	inodata_t		*cid;
+	int			count;
+	xfs_dir2_data_t		*data;
+	xfs_dir2_db_t		db;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_free_t	*dfp;
+	xfs_dir2_data_unused_t	*dup;
+	char			*endptr;
+	int			freeseen;
+	freetab_t		*freetab;
+	xfs_dahash_t		hash;
+	int			i;
+	int			lastfree;
+	int			lastfree_err;
+	xfs_dir2_leaf_entry_t	*lep = NULL;
+	xfs_ino_t		lino;
+	xfs_ino_t		parent = 0;
+	char			*ptr;
+	int			stale = 0;
+	int			tag_err;
+	xfs_dir2_data_off_t	*tagp;
+
+	data = iocur_top->data;
+	block = iocur_top->data;
+	if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC &&
+	    INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC) {
+		if (!sflag || v)
+			dbprintf("bad directory data magic # %#x for dir ino "
+				 "%lld block %d\n",
+				INT_GET(data->hdr.magic, ARCH_CONVERT), id->ino, dabno);
+		error++;
+		return NULLFSINO;
+	}
+	db = XFS_DIR2_DA_TO_DB(mp, dabno);
+	bf = data->hdr.bestfree;
+	ptr = (char *)data->u;
+	if (INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+		lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+		endptr = (char *)lep;
+		if (endptr <= ptr || endptr > (char *)btp) {
+			endptr = (char *)data + mp->m_dirblksize;
+			lep = NULL;
+			if (!sflag || v)
+				dbprintf("bad block directory tail for dir ino "
+					 "%lld\n",
+					id->ino);
+			error++;
+		}
+	} else
+		endptr = (char *)data + mp->m_dirblksize;
+	bf_err = lastfree_err = tag_err = 0;
+	count = lastfree = freeseen = 0;
+	if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) {
+		bf_err += INT_GET(bf[0].offset, ARCH_CONVERT) != 0;
+		freeseen |= 1 << 0;
+	}
+	if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) {
+		bf_err += INT_GET(bf[1].offset, ARCH_CONVERT) != 0;
+		freeseen |= 1 << 1;
+	}
+	if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) {
+		bf_err += INT_GET(bf[2].offset, ARCH_CONVERT) != 0;
+		freeseen |= 1 << 2;
+	}
+	bf_err += INT_GET(bf[0].length, ARCH_CONVERT) < INT_GET(bf[1].length, ARCH_CONVERT);
+	bf_err += INT_GET(bf[1].length, ARCH_CONVERT) < INT_GET(bf[2].length, ARCH_CONVERT);
+	if (freetabp) {
+		freetab = *freetabp;
+		if (freetab->naents <= db) {
+			*freetabp = freetab =
+				realloc(freetab, FREETAB_SIZE(db + 1));
+			for (i = freetab->naents; i < db; i++)
+				freetab->ents[i] = NULLDATAOFF;
+			freetab->naents = db + 1;
+		}
+		if (freetab->nents < db + 1)
+			freetab->nents = db + 1;
+		freetab->ents[db] = INT_GET(bf[0].length, ARCH_CONVERT);
+	}
+	while (ptr < endptr) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			lastfree_err += lastfree != 0;
+			if ((INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1)) ||
+			    INT_GET(dup->length, ARCH_CONVERT) == 0 ||
+			    (char *)(tagp = XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT)) >=
+			    endptr) {
+				if (!sflag || v)
+					dbprintf("dir %lld block %d bad free "
+						 "entry at %d\n",
+						id->ino, dabno,
+						(int)((char *)dup -
+						      (char *)data));
+				error++;
+				break;
+			}
+			tag_err += INT_GET(*tagp, ARCH_CONVERT) != (char *)dup - (char *)data;
+			dfp = process_data_dir_v2_freefind(data, dup);
+			if (dfp) {
+				i = (int)(dfp - bf);
+				bf_err += (freeseen & (1 << i)) != 0;
+				freeseen |= 1 << i;
+			} else
+				bf_err += INT_GET(dup->length, ARCH_CONVERT) > INT_GET(bf[2].length, ARCH_CONVERT);
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+			lastfree = 1;
+			continue;
+		}
+		dep = (xfs_dir2_data_entry_t *)dup;
+		if (dep->namelen == 0) {
+			if (!sflag || v)
+				dbprintf("dir %lld block %d zero length entry "
+					 "at %d\n",
+					id->ino, dabno,
+					(int)((char *)dep - (char *)data));
+			error++;
+		}
+		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+		if ((char *)tagp >= endptr) {
+			if (!sflag || v)
+				dbprintf("dir %lld block %d bad entry at %d\n",
+					id->ino, dabno,
+					(int)((char *)dep - (char *)data));
+			error++;
+			break;
+		}
+		tag_err += INT_GET(*tagp, ARCH_CONVERT) != (char *)dep - (char *)data;
+		addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, db,
+			(char *)dep - (char *)data);
+		hash = libxfs_da_hashname((char *)dep->name, dep->namelen);
+		dir_hash_add(hash, addr);
+		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		count++;
+		lastfree = 0;
+		lino = INT_GET(dep->inumber, ARCH_CONVERT);
+		cid = find_inode(lino, 1);
+		if (v)
+			dbprintf("dir %lld block %d entry %*.*s %lld\n",
+				id->ino, dabno, dep->namelen, dep->namelen,
+				dep->name, lino);
+		if (cid)
+			addlink_inode(cid);
+		else {
+			if (!sflag || v)
+				dbprintf("dir %lld block %d entry %*.*s bad "
+					 "inode number %lld\n",
+					id->ino, dabno, dep->namelen,
+					dep->namelen, dep->name, lino);
+			error++;
+		}
+		if (dep->namelen == 2 && dep->name[0] == '.' &&
+		    dep->name[1] == '.') {
+			if (parent) {
+				if (!sflag || v)
+					dbprintf("multiple .. entries in dir "
+						 "%lld (%lld, %lld)\n",
+						id->ino, parent, lino);
+				error++;
+			} else
+				parent = cid ? lino : NULLFSINO;
+			(*dotdot)++;
+		} else if (dep->namelen != 1 || dep->name[0] != '.') {
+			if (cid != NULL) {
+				if (!cid->parent)
+					cid->parent = id;
+				addname_inode(cid, (char *)dep->name,
+					dep->namelen);
+			}
+		} else {
+			if (lino != id->ino) {
+				if (!sflag || v)
+					dbprintf("dir %lld entry . inode "
+						 "number mismatch (%lld)\n",
+						id->ino, lino);
+				error++;
+			}
+			(*dot)++;
+		}
+	}
+	if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		endptr = (char *)data + mp->m_dirblksize;
+		for (i = stale = 0; lep && i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+			if ((char *)&lep[i] >= endptr) {
+				if (!sflag || v)
+					dbprintf("dir %lld block %d bad count "
+						 "%u\n",
+						id->ino, dabno, INT_GET(btp->count, ARCH_CONVERT));
+				error++;
+				break;
+			}
+			if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+				stale++;
+			else if (dir_hash_see(INT_GET(lep[i].hashval, ARCH_CONVERT), INT_GET(lep[i].address, ARCH_CONVERT))) {
+				if (!sflag || v)
+					dbprintf("dir %lld block %d extra leaf "
+						 "entry %x %x\n",
+						id->ino, dabno, INT_GET(lep[i].hashval, ARCH_CONVERT),
+						INT_GET(lep[i].address, ARCH_CONVERT));
+				error++;
+			}
+		}
+	}
+	bf_err += freeseen != 7;
+	if (bf_err) {
+		if (!sflag || v)
+			dbprintf("dir %lld block %d bad bestfree data\n",
+				id->ino, dabno);
+		error++;
+	}
+	if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC &&
+	    count != INT_GET(btp->count, ARCH_CONVERT) - INT_GET(btp->stale, ARCH_CONVERT)) {
+		if (!sflag || v)
+			dbprintf("dir %lld block %d bad block tail count %d "
+				 "(stale %d)\n",
+				id->ino, dabno, INT_GET(btp->count, ARCH_CONVERT), INT_GET(btp->stale, ARCH_CONVERT));
+		error++;
+	}
+	if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC && stale != INT_GET(btp->stale, ARCH_CONVERT)) {
+		if (!sflag || v)
+			dbprintf("dir %lld block %d bad stale tail count %d\n",
+				id->ino, dabno, INT_GET(btp->stale, ARCH_CONVERT));
+		error++;
+	}
+	if (lastfree_err) {
+		if (!sflag || v)
+			dbprintf("dir %lld block %d consecutive free entries\n",
+				id->ino, dabno);
+		error++;
+	}
+	if (tag_err) {
+		if (!sflag || v)
+			dbprintf("dir %lld block %d entry/unused tag "
+				 "mismatch\n",
+				id->ino, dabno);
+		error++;
+	}
+	return parent;
+}
+
+static xfs_dir2_data_free_t *
+process_data_dir_v2_freefind(
+	xfs_dir2_data_t		*data,
+	xfs_dir2_data_unused_t	*dup)
+{
+	xfs_dir2_data_free_t	*dfp;
+	xfs_dir2_data_aoff_t	off;
+
+	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)data);
+	if (INT_GET(dup->length, ARCH_CONVERT) < INT_GET(data->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length, ARCH_CONVERT))
+		return NULL;
+	for (dfp = &data->hdr.bestfree[0];
+	     dfp < &data->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+	     dfp++) {
+		if (INT_GET(dfp->offset, ARCH_CONVERT) == 0)
+			return NULL;
+		if (INT_GET(dfp->offset, ARCH_CONVERT) == off)
+			return dfp;
+	}
+	return NULL;
+}
+
+static void
+process_dir(
+	xfs_dinode_t	*dip,
+	blkmap_t	*blkmap,
+	inodata_t	*id)
+{
+	xfs_fsblock_t	bno;
+	int		dot;
+	int		dotdot;
+	xfs_ino_t	parent;
+
+	dot = dotdot = 0;
+	if (XFS_DIR_IS_V2(mp)) {
+		if (process_dir_v2(dip, blkmap, &dot, &dotdot, id, &parent))
+			return;
+	} else
+	{
+		if (process_dir_v1(dip, blkmap, &dot, &dotdot, id, &parent))
+			return;
+	}
+	bno = XFS_INO_TO_FSB(mp, id->ino);
+	if (dot == 0) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("no . entry for directory %lld\n", id->ino);
+		error++;
+	}
+	if (dotdot == 0) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("no .. entry for directory %lld\n", id->ino);
+		error++;
+	} else if (parent == id->ino && id->ino != mp->m_sb.sb_rootino) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf(". and .. same for non-root directory %lld\n",
+				id->ino);
+		error++;
+	} else if (id->ino == mp->m_sb.sb_rootino && id->ino != parent) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("root directory %lld has .. %lld\n", id->ino,
+				parent);
+		error++;
+	} else if (parent != NULLFSINO && id->ino != parent)
+		addparent_inode(id, parent);
+}
+
+static int
+process_dir_v1(
+	xfs_dinode_t	*dip,
+	blkmap_t	*blkmap,
+	int		*dot,
+	int		*dotdot,
+	inodata_t	*id,
+	xfs_ino_t	*parent)
+{
+	if (dip->di_core.di_size <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT) &&
+	    dip->di_core.di_format == XFS_DINODE_FMT_LOCAL)
+		*parent =
+			process_shortform_dir_v1(dip, dot, dotdot, id);
+	else if (dip->di_core.di_size == XFS_LBSIZE(mp) &&
+		 (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+		  dip->di_core.di_format == XFS_DINODE_FMT_BTREE))
+		*parent = process_leaf_dir_v1(blkmap, dot, dotdot, id);
+	else if (dip->di_core.di_size >= XFS_LBSIZE(mp) &&
+		  (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+		   dip->di_core.di_format == XFS_DINODE_FMT_BTREE))
+		*parent = process_node_dir_v1(blkmap, dot, dotdot, id);
+	else  {
+		dbprintf("bad size (%lld) or format (%d) for directory inode "
+			 "%lld\n",
+			dip->di_core.di_size, (int)dip->di_core.di_format,
+			id->ino);
+		error++;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+process_dir_v2(
+	xfs_dinode_t	*dip,
+	blkmap_t	*blkmap,
+	int		*dot,
+	int		*dotdot,
+	inodata_t	*id,
+	xfs_ino_t	*parent)
+{
+	xfs_fileoff_t	last = 0;
+
+	if (blkmap)
+		last = blkmap_last_off(blkmap);
+	if (dip->di_core.di_size <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT) &&
+	    dip->di_core.di_format == XFS_DINODE_FMT_LOCAL)
+		*parent = process_sf_dir_v2(dip, dot, dotdot, id);
+	else if (last == mp->m_dirblkfsbs &&
+		 (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+		  dip->di_core.di_format == XFS_DINODE_FMT_BTREE))
+		*parent = process_block_dir_v2(blkmap, dot, dotdot, id);
+	else if (last >= mp->m_dirleafblk + mp->m_dirblkfsbs &&
+		 (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+		  dip->di_core.di_format == XFS_DINODE_FMT_BTREE))
+		*parent = process_leaf_node_dir_v2(blkmap, dot, dotdot, id,
+			dip->di_core.di_size);
+	else  {
+		dbprintf("bad size (%lld) or format (%d) for directory inode "
+			 "%lld\n",
+			dip->di_core.di_size, (int)dip->di_core.di_format,
+			id->ino);
+		error++;
+		return 1;
+	}
+	return 0;
+}
+
+/* ARGSUSED */
+static void
+process_exinode(
+	inodata_t		*id,
+	xfs_dinode_t		*dip,
+	dbm_t			type,
+	xfs_drfsbno_t		*totd,
+	xfs_drfsbno_t		*toti,
+	xfs_extnum_t		*nex,
+	blkmap_t		**blkmapp,
+	int			whichfork)
+{
+	xfs_bmbt_rec_32_t	*rp;
+
+	rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT);
+	*nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_NOCONVERT);
+	if (*nex < 0 ||
+	    *nex >
+	    XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT) / sizeof(xfs_bmbt_rec_32_t)) {
+		if (!sflag || id->ilist)
+			dbprintf("bad number of extents %d for inode %lld\n",
+				*nex, id->ino);
+		error++;
+		return;
+	}
+	process_bmbt_reclist(rp, *nex, type, id, totd, blkmapp);
+}
+
+static void
+process_inode(
+	xfs_agf_t		*agf,
+	xfs_agino_t		agino,
+	xfs_dinode_t		*dip,
+	int			isfree)
+{
+	blkmap_t		*blkmap;
+	xfs_fsblock_t		bno = 0;
+	xfs_dinode_core_t	tdic;
+	xfs_dinode_core_t	*dic;
+	inodata_t		*id = NULL;
+	xfs_ino_t		ino;
+	xfs_extnum_t		nextents = 0;
+	int			nlink;
+	int			security;
+	xfs_drfsbno_t		totblocks;
+	xfs_drfsbno_t		totdblocks = 0;
+	xfs_drfsbno_t		totiblocks = 0;
+	dbm_t			type;
+	xfs_extnum_t		anextents = 0;
+	xfs_drfsbno_t		atotdblocks = 0;
+	xfs_drfsbno_t		atotiblocks = 0;
+	xfs_qcnt_t		bc = 0;
+	xfs_qcnt_t		ic = 0;
+	xfs_qcnt_t		rc = 0;
+	static char		okfmts[] = {
+		0,				/* type 0 unused */
+		1 << XFS_DINODE_FMT_DEV,	/* FIFO */
+		1 << XFS_DINODE_FMT_DEV,	/* CHR */
+		0,				/* type 3 unused */
+		(1 << XFS_DINODE_FMT_LOCAL) |
+		(1 << XFS_DINODE_FMT_EXTENTS) |
+		(1 << XFS_DINODE_FMT_BTREE),	/* DIR */
+		0,				/* type 5 unused */
+		1 << XFS_DINODE_FMT_DEV,	/* BLK */
+		0,				/* type 7 unused */
+		(1 << XFS_DINODE_FMT_EXTENTS) |
+		(1 << XFS_DINODE_FMT_BTREE),	/* REG */
+		0,				/* type 9 unused */
+		(1 << XFS_DINODE_FMT_LOCAL) |
+		(1 << XFS_DINODE_FMT_EXTENTS),	/* LNK */
+		0,				/* type 11 unused */
+		1 << XFS_DINODE_FMT_DEV,	/* SOCK */
+		0,				/* type 13 unused */
+		1 << XFS_DINODE_FMT_UUID,	/* MNT */
+		0				/* type 15 unused */
+	};
+	static char		*fmtnames[] = {
+		"dev", "local", "extents", "btree", "uuid"
+	};
+
+        /* convert the core, then copy it back into the inode */
+	libxfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, &tdic, 1,
+				 ARCH_CONVERT);
+	memcpy(&dip->di_core, &tdic, sizeof(xfs_dinode_core_t));
+	dic=&dip->di_core;
+
+	ino = XFS_AGINO_TO_INO(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), agino);
+	if (!isfree) {
+		id = find_inode(ino, 1);
+		bno = XFS_INO_TO_FSB(mp, ino);
+		blkmap = NULL;
+	}
+	if (dic->di_magic != XFS_DINODE_MAGIC) {
+		if (!sflag || isfree || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad magic number %#x for inode %lld\n",
+				dic->di_magic, ino);
+		error++;
+		return;
+	}
+	if (!XFS_DINODE_GOOD_VERSION(dic->di_version)) {
+		if (!sflag || isfree || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad version number %#x for inode %lld\n",
+				dic->di_version, ino);
+		error++;
+		return;
+	}
+	if (isfree) {
+		if (dic->di_nblocks != 0) {
+			if (!sflag || id->ilist || CHECK_BLIST(bno))
+				dbprintf("bad nblocks %lld for free inode "
+					 "%lld\n",
+					dic->di_nblocks, ino);
+			error++;
+		}
+		if (dic->di_version == XFS_DINODE_VERSION_1)
+			nlink = dic->di_onlink;
+		else
+			nlink = dic->di_nlink;
+		if (nlink != 0) {
+			if (!sflag || id->ilist || CHECK_BLIST(bno))
+				dbprintf("bad nlink %d for free inode %lld\n",
+					nlink, ino);
+			error++;
+		}
+		if (dic->di_mode != 0) {
+			if (!sflag || id->ilist || CHECK_BLIST(bno))
+				dbprintf("bad mode %#o for free inode %lld\n",
+					dic->di_mode, ino);
+			error++;
+		}
+		return;
+	}
+	/*
+	 * di_mode is a 16-bit uint so no need to check the < 0 case
+	 */
+	if ((((dic->di_mode & IFMT) >> 12) > 15) ||
+	    (!(okfmts[(dic->di_mode & IFMT) >> 12] & (1 << dic->di_format)))) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad format %d for inode %lld type %#o\n",
+				dic->di_format, id->ino, dic->di_mode & IFMT);
+		error++;
+		return;
+	}
+	if ((unsigned int)XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_NOCONVERT) >= XFS_LITINO(mp))  {
+		if (!sflag || id->ilist)
+			dbprintf("bad fork offset %d for inode %lld\n",
+				dic->di_forkoff, id->ino);
+		error++;
+		return;
+	}
+	if ((unsigned int)dic->di_aformat > XFS_DINODE_FMT_BTREE)  {
+		if (!sflag || id->ilist)
+			dbprintf("bad attribute format %d for inode %lld\n",
+				dic->di_aformat, id->ino);
+		error++;
+		return;
+	}
+	if (verbose || id->ilist || CHECK_BLIST(bno))
+		dbprintf("inode %lld mode %#o fmt %s "
+			 "afmt %s "
+			 "nex %d anex %d nblk %lld sz %lld%s%s\n",
+			id->ino, dic->di_mode, fmtnames[dic->di_format],
+			fmtnames[dic->di_aformat],
+			dic->di_nextents,
+			dic->di_anextents,
+			dic->di_nblocks, dic->di_size,
+			dic->di_flags & XFS_DIFLAG_REALTIME ? " rt" : "",
+			dic->di_flags & XFS_DIFLAG_PREALLOC ? " pre" : ""
+				);
+	security = 0;
+	switch (dic->di_mode & IFMT) {
+	case IFDIR:
+		type = DBM_DIR;
+		if (dic->di_format == XFS_DINODE_FMT_LOCAL)
+			break;
+		blkmap = blkmap_alloc(dic->di_nextents);
+		break;
+	case IFREG:
+		if (dic->di_flags & XFS_DIFLAG_REALTIME)
+			type = DBM_RTDATA;
+		else if (id->ino == mp->m_sb.sb_rbmino) {
+			type = DBM_RTBITMAP;
+			blkmap = blkmap_alloc(dic->di_nextents);
+			addlink_inode(id);
+		} else if (id->ino == mp->m_sb.sb_rsumino) {
+			type = DBM_RTSUM;
+			blkmap = blkmap_alloc(dic->di_nextents);
+			addlink_inode(id);
+		}
+		else if (id->ino == mp->m_sb.sb_uquotino ||
+			 id->ino == mp->m_sb.sb_pquotino) {
+			type = DBM_QUOTA;
+			blkmap = blkmap_alloc(dic->di_nextents);
+			addlink_inode(id);
+		}
+		else
+			type = DBM_DATA;
+		if (dic->di_mode & (ISUID | ISGID))
+			security = 1;
+		break;
+	case IFLNK:
+		type = DBM_SYMLINK;
+		break;
+	default:
+		security = 1;
+		type = DBM_UNKNOWN;
+		break;
+	}
+	if (dic->di_version == XFS_DINODE_VERSION_1)
+		setlink_inode(id, dic->di_onlink, type == DBM_DIR, security);
+	else {
+		sbversion |= XFS_SB_VERSION_NLINKBIT;
+		setlink_inode(id, dic->di_nlink, type == DBM_DIR, security);
+	}
+	switch (dic->di_format) {
+	case XFS_DINODE_FMT_LOCAL:
+		process_lclinode(id, dip, type, &totdblocks, &totiblocks,
+			&nextents, &blkmap, XFS_DATA_FORK);
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		process_exinode(id, dip, type, &totdblocks, &totiblocks,
+			&nextents, &blkmap, XFS_DATA_FORK);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		process_btinode(id, dip, type, &totdblocks, &totiblocks,
+			&nextents, &blkmap, XFS_DATA_FORK);
+		break;
+	}
+	if (XFS_DFORK_Q_ARCH(dip, ARCH_NOCONVERT)) {
+		sbversion |= XFS_SB_VERSION_ATTRBIT;
+		switch (dic->di_aformat) {
+		case XFS_DINODE_FMT_LOCAL:
+			process_lclinode(id, dip, DBM_ATTR, &atotdblocks,
+				&atotiblocks, &anextents, NULL, XFS_ATTR_FORK);
+			break;
+		case XFS_DINODE_FMT_EXTENTS:
+			process_exinode(id, dip, DBM_ATTR, &atotdblocks,
+				&atotiblocks, &anextents, NULL, XFS_ATTR_FORK);
+			break;
+		case XFS_DINODE_FMT_BTREE:
+			process_btinode(id, dip, DBM_ATTR, &atotdblocks,
+				&atotiblocks, &anextents, NULL, XFS_ATTR_FORK);
+			break;
+		}
+	}
+	if (qpdo || qudo) {
+		switch (type) {
+		case DBM_DATA:
+		case DBM_DIR:
+		case DBM_RTBITMAP:
+		case DBM_RTSUM:
+		case DBM_SYMLINK:
+		case DBM_UNKNOWN:
+			bc = totdblocks + totiblocks +
+			     atotdblocks + atotiblocks;
+			ic = 1;
+			break;
+		case DBM_RTDATA:
+			bc = totiblocks + atotdblocks + atotiblocks;
+			rc = totdblocks;
+			ic = 1;
+			break;
+		default:
+		}
+		if (ic)
+			quota_add(dic->di_version >= XFS_DINODE_VERSION_2 ?
+					dic->di_projid : -1,
+				  dic->di_uid, 0, bc, ic, rc);
+	}
+	totblocks = totdblocks + totiblocks + atotdblocks + atotiblocks;
+	if (totblocks != dic->di_nblocks) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad nblocks %lld for inode %lld, counted "
+				 "%lld\n",
+				dic->di_nblocks, id->ino, totblocks);
+		error++;
+	}
+	if (nextents != dic->di_nextents) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad nextents %d for inode %lld, counted %d\n",
+				dic->di_nextents, id->ino, nextents);
+		error++;
+	}
+	if (anextents != dic->di_anextents) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad anextents %d for inode %lld, counted "
+				 "%d\n",
+				dic->di_anextents, id->ino, anextents);
+		error++;
+	}
+	if (type == DBM_DIR)
+		process_dir(dip, blkmap, id);
+	else if (type == DBM_RTBITMAP)
+		process_rtbitmap(blkmap);
+	else if (type == DBM_RTSUM)
+		process_rtsummary(blkmap);
+	/*
+	 * If the CHKD flag is not set, this can legitimately contain garbage;
+	 * xfs_repair may have cleared that bit.
+	 */
+	else if (type == DBM_QUOTA && (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD))
+		process_quota(id->ino == mp->m_sb.sb_pquotino, id, blkmap);
+	if (blkmap)
+		blkmap_free(blkmap);
+}
+
+/* ARGSUSED */
+static void
+process_lclinode(
+	inodata_t		*id,
+	xfs_dinode_t		*dip,
+	dbm_t			type,
+	xfs_drfsbno_t		*totd,
+	xfs_drfsbno_t		*toti,
+	xfs_extnum_t		*nex,
+	blkmap_t		**blkmapp,
+	int			whichfork)
+{
+	xfs_attr_shortform_t	*asf;
+	xfs_fsblock_t		bno;
+	xfs_dinode_core_t	*dic;
+
+	dic = &dip->di_core;
+	bno = XFS_INO_TO_FSB(mp, id->ino);
+	if (whichfork == XFS_DATA_FORK &&
+	    dic->di_size > XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT)) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("local inode %lld data is too large (size "
+				 "%lld)\n",
+				id->ino, dic->di_size);
+		error++;
+	}
+	else if (whichfork == XFS_ATTR_FORK) {
+		asf = (xfs_attr_shortform_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT);
+		if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) > XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_NOCONVERT)) {
+			if (!sflag || id->ilist || CHECK_BLIST(bno))
+				dbprintf("local inode %lld attr is too large "
+					 "(size %d)\n",
+					id->ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT));
+			error++;
+		}
+	}
+}
+
+static xfs_ino_t
+process_leaf_dir_v1(
+	blkmap_t	*blkmap,
+	int		*dot,
+	int		*dotdot,
+	inodata_t	*id)
+{
+	xfs_fsblock_t	bno;
+	xfs_ino_t	parent;
+
+	bno = blkmap_get(blkmap, 0);
+	if (bno == NULLFSBLOCK) {
+		if (!sflag || id->ilist)
+			dbprintf("block 0 for directory inode %lld is "
+				 "missing\n",
+				id->ino);
+		error++;
+		return 0;
+	}
+	push_cur();
+	set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_IGN,
+		NULL);
+	if (iocur_top->data == NULL) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("can't read block 0 for directory inode "
+				 "%lld\n",
+				id->ino);
+		error++;
+		return 0;
+	}
+	parent = process_leaf_dir_v1_int(dot, dotdot, id);
+	pop_cur();
+	return parent;
+}
+
+static xfs_ino_t
+process_leaf_dir_v1_int(
+	int			*dot,
+	int			*dotdot,
+	inodata_t		*id)
+{
+	xfs_fsblock_t		bno;
+	inodata_t		*cid;
+	xfs_dir_leaf_entry_t	*entry;
+	int			i;
+	xfs_dir_leafblock_t	*leaf;
+	xfs_ino_t		lino;
+	xfs_dir_leaf_name_t	*namest;
+	xfs_ino_t		parent = 0;
+	int			v;
+
+	bno = XFS_DADDR_TO_FSB(mp, iocur_top->bb);
+	v = verbose || id->ilist || CHECK_BLIST(bno);
+	leaf = iocur_top->data;
+	if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad directory leaf magic # %#x for dir ino "
+				 "%lld\n",
+				INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), id->ino);
+		error++;
+		return NULLFSINO;
+	}
+	entry = &leaf->entries[0];
+	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+                lino=DIRINO_GET_ARCH(&namest->inumber, ARCH_CONVERT);
+		cid = find_inode(lino, 1);
+		if (v)
+			dbprintf("dir %lld entry %*.*s %lld\n", id->ino,
+				entry->namelen, entry->namelen, namest->name,
+				lino);
+		if (cid)
+			addlink_inode(cid);
+		else {
+			if (!sflag)
+				dbprintf("dir %lld entry %*.*s bad inode "
+					 "number %lld\n",
+					id->ino, entry->namelen, entry->namelen,
+					namest->name, lino);
+			error++;
+		}
+		if (entry->namelen == 2 && namest->name[0] == '.' &&
+		    namest->name[1] == '.') {
+			if (parent) {
+				if (!sflag || id->ilist || CHECK_BLIST(bno))
+					dbprintf("multiple .. entries in dir "
+						 "%lld (%lld, %lld)\n",
+						id->ino, parent, lino);
+				error++;
+			} else
+				parent = cid ? lino : NULLFSINO;
+			(*dotdot)++;
+		} else if (entry->namelen != 1 || namest->name[0] != '.') {
+			if (cid != NULL) {
+				if (!cid->parent)
+					cid->parent = id;
+				addname_inode(cid, (char *)namest->name,
+					entry->namelen);
+			}
+		} else {
+			if (lino != id->ino) {
+				if (!sflag)
+					dbprintf("dir %lld entry . inode "
+						 "number mismatch (%lld)\n",
+						id->ino, lino);
+				error++;
+			}
+			(*dot)++;
+		}
+	}
+	return parent;
+}
+
+static xfs_ino_t
+process_leaf_node_dir_v2(
+	blkmap_t		*blkmap,
+	int			*dot,
+	int			*dotdot,
+	inodata_t		*id,
+	xfs_fsize_t		dirsize)
+{
+	xfs_fsblock_t		b;
+	bbmap_t			bbmap;
+	bmap_ext_t		*bmp;
+	xfs_fileoff_t		dbno;
+	freetab_t		*freetab;
+	int			i;
+	xfs_ino_t		lino;
+	int			nex;
+	xfs_ino_t		parent;
+	int			t;
+	int			v;
+	int			v2;
+	int			x;
+
+	v2 = verbose || id->ilist;
+	v = parent = 0;
+	dbno = NULLFILEOFF;
+	freetab = malloc(FREETAB_SIZE(dirsize / mp->m_dirblksize));
+	freetab->naents = (int)(dirsize / mp->m_dirblksize);
+	freetab->nents = 0;
+	for (i = 0; i < freetab->naents; i++)
+		freetab->ents[i] = NULLDATAOFF;
+	dir_hash_init();
+	while ((dbno = blkmap_next_off(blkmap, dbno, &t)) != NULLFILEOFF) {
+		nex = blkmap_getn(blkmap, dbno, mp->m_dirblkfsbs, &bmp);
+		ASSERT(nex > 0);
+		for (v = v2, x = 0; !v && x < nex; x++) {
+			for (b = bmp[x].startblock;
+			     !v && b < bmp[x].startblock + bmp[x].blockcount;
+			     b++)
+				v = CHECK_BLIST(b);
+		}
+		if (v)
+			dbprintf("dir inode %lld block %u=%llu\n", id->ino,
+				(__uint32_t)dbno,
+				(xfs_dfsbno_t)bmp->startblock);
+		push_cur();
+		if (nex > 1)
+			make_bbmap(&bbmap, nex, bmp);
+		set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bmp->startblock),
+			mp->m_dirblkfsbs * blkbb, DB_RING_IGN,
+			nex > 1 ? &bbmap : NULL);
+		free(bmp);
+		if (iocur_top->data == NULL) {
+			if (!sflag || v)
+				dbprintf("can't read block %u for directory "
+					 "inode %lld\n",
+					(__uint32_t)dbno, id->ino);
+			error++;
+			pop_cur();
+			dbno += mp->m_dirblkfsbs - 1;
+			continue;
+		}
+		if (dbno < mp->m_dirleafblk) {
+			lino = process_data_dir_v2(dot, dotdot, id, v,
+				(xfs_dablk_t)dbno, &freetab);
+			if (lino) {
+				if (parent) {
+					if (!sflag || v)
+						dbprintf("multiple .. entries "
+							 "in dir %lld\n",
+							id->ino);
+					error++;
+				} else
+					parent = lino;
+			}
+		} else if (dbno < mp->m_dirfreeblk) {
+			process_leaf_node_dir_v2_int(id, v, (xfs_dablk_t)dbno,
+				freetab);
+		} else {
+			process_leaf_node_dir_v2_free(id, v, (xfs_dablk_t)dbno,
+				freetab);
+		}
+		pop_cur();
+		dbno += mp->m_dirblkfsbs - 1;
+	}
+	dir_hash_check(id, v);
+	dir_hash_done();
+	for (i = 0; i < freetab->nents; i++) {
+		if (freetab->ents[i] != NULLDATAOFF) {
+			if (!sflag || v)
+				dbprintf("missing free index for data block %d "
+					 "in dir ino %lld\n",
+					XFS_DIR2_DB_TO_DA(mp, i), id->ino);
+			error++;
+		}
+	}
+	free(freetab);
+	return parent;
+}
+
+static void
+process_leaf_node_dir_v2_free(
+	inodata_t		*id,
+	int			v,
+	xfs_dablk_t		dabno,
+	freetab_t		*freetab)
+{
+	xfs_dir2_data_off_t	ent;
+	xfs_dir2_free_t		*free;
+	int			i;
+	int			maxent;
+	int			used;
+
+	free = iocur_top->data;
+	if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC) {
+		if (!sflag || v)
+			dbprintf("bad free block magic # %#x for dir ino %lld "
+				 "block %d\n",
+				INT_GET(free->hdr.magic, ARCH_CONVERT), id->ino, dabno);
+		error++;
+		return;
+	}
+	maxent = XFS_DIR2_MAX_FREE_BESTS(mp);
+	if (INT_GET(free->hdr.firstdb, ARCH_CONVERT) !=
+	    XFS_DIR2_DA_TO_DB(mp, dabno - mp->m_dirfreeblk) * maxent) {
+		if (!sflag || v)
+			dbprintf("bad free block firstdb %d for dir ino %lld "
+				 "block %d\n",
+				INT_GET(free->hdr.firstdb, ARCH_CONVERT), id->ino, dabno);
+		error++;
+		return;
+	}
+	if (INT_GET(free->hdr.nvalid, ARCH_CONVERT) > maxent || INT_GET(free->hdr.nvalid, ARCH_CONVERT) < 0 ||
+	    INT_GET(free->hdr.nused, ARCH_CONVERT) > maxent || INT_GET(free->hdr.nused, ARCH_CONVERT) < 0 ||
+	    INT_GET(free->hdr.nused, ARCH_CONVERT) > INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+		if (!sflag || v)
+			dbprintf("bad free block nvalid/nused %d/%d for dir "
+				 "ino %lld block %d\n",
+				INT_GET(free->hdr.nvalid, ARCH_CONVERT), INT_GET(free->hdr.nused, ARCH_CONVERT), id->ino,
+				dabno);
+		error++;
+		return;
+	}
+	for (used = i = 0; i < INT_GET(free->hdr.nvalid, ARCH_CONVERT); i++) {
+		if (freetab->nents <= INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i)
+			ent = NULLDATAOFF;
+		else
+			ent = freetab->ents[INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i];
+		if (ent != INT_GET(free->bests[i], ARCH_CONVERT)) {
+			if (!sflag || v)
+				dbprintf("bad free block ent %d is %d should "
+					 "be %d for dir ino %lld block %d\n",
+					i, INT_GET(free->bests[i], ARCH_CONVERT), ent, id->ino, dabno);
+			error++;
+		}
+		if (INT_GET(free->bests[i], ARCH_CONVERT) != NULLDATAOFF)
+			used++;
+		if (ent != NULLDATAOFF)
+			freetab->ents[INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i] = NULLDATAOFF;
+	}
+	if (used != INT_GET(free->hdr.nused, ARCH_CONVERT)) {
+		if (!sflag || v)
+			dbprintf("bad free block nused %d should be %d for dir "
+				 "ino %lld block %d\n",
+				INT_GET(free->hdr.nused, ARCH_CONVERT), used, id->ino, dabno);
+		error++;
+	}
+}
+
+static void
+process_leaf_node_dir_v2_int(
+	inodata_t		*id,
+	int			v,
+	xfs_dablk_t		dabno,
+	freetab_t		*freetab)
+{
+	int			i;
+	xfs_dir2_data_off_t	*lbp;
+	xfs_dir2_leaf_t		*leaf;
+	xfs_dir2_leaf_entry_t	*lep;
+	xfs_dir2_leaf_tail_t	*ltp;
+	xfs_da_intnode_t	*node;
+	int			stale;
+
+	leaf = iocur_top->data;
+	switch (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)) {
+	case XFS_DIR2_LEAF1_MAGIC:
+		if (INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) || INT_GET(leaf->hdr.info.back, ARCH_CONVERT)) {
+			if (!sflag || v)
+				dbprintf("bad leaf block forw/back pointers "
+					 "%d/%d for dir ino %lld block %d\n",
+					INT_GET(leaf->hdr.info.forw, ARCH_CONVERT),
+					INT_GET(leaf->hdr.info.back, ARCH_CONVERT), id->ino, dabno);
+			error++;
+		}
+		if (dabno != mp->m_dirleafblk) {
+			if (!sflag || v)
+				dbprintf("single leaf block for dir ino %lld "
+					 "block %d should be at block %d\n",
+					id->ino, dabno,
+					(xfs_dablk_t)mp->m_dirleafblk);
+			error++;
+		}
+		ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+		lbp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+		for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
+			if (freetab->nents <= i || freetab->ents[i] != INT_GET(lbp[i], ARCH_CONVERT)) {
+				if (!sflag || v)
+					dbprintf("bestfree %d for dir ino %lld "
+						 "block %d doesn't match table "
+						 "value %d\n",
+						freetab->nents <= i ?
+							NULLDATAOFF :
+							freetab->ents[i],
+						id->ino,
+						XFS_DIR2_DB_TO_DA(mp, i),
+						INT_GET(lbp[i], ARCH_CONVERT));
+			}
+			if (freetab->nents > i)
+				freetab->ents[i] = NULLDATAOFF;
+		}
+		break;
+	case XFS_DIR2_LEAFN_MAGIC:
+		/* if it's at the root location then we can check the 
+		 * pointers are null XXX */
+		break;
+	case XFS_DA_NODE_MAGIC:
+		node = iocur_top->data;
+		if (INT_GET(node->hdr.level, ARCH_CONVERT) < 1 ||
+		    INT_GET(node->hdr.level, ARCH_CONVERT) > XFS_DA_NODE_MAXDEPTH) {
+			if (!sflag || v)
+				dbprintf("bad node block level %d for dir ino "
+					 "%lld block %d\n",
+					INT_GET(node->hdr.level, ARCH_CONVERT), id->ino, dabno);
+			error++;
+		}
+		return;
+	default:
+		if (!sflag || v)
+			dbprintf("bad directory data magic # %#x for dir ino "
+				 "%lld block %d\n",
+				INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), id->ino, dabno);
+		error++;
+		return;
+	}
+	lep = leaf->ents;
+	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
+		if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			stale++;
+		else if (dir_hash_see(INT_GET(lep[i].hashval, ARCH_CONVERT), INT_GET(lep[i].address, ARCH_CONVERT))) {
+			if (!sflag || v)
+				dbprintf("dir %lld block %d extra leaf entry "
+					 "%x %x\n",
+					id->ino, dabno, INT_GET(lep[i].hashval, ARCH_CONVERT),
+					INT_GET(lep[i].address, ARCH_CONVERT));
+			error++;
+		}
+	}
+	if (stale != INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
+		if (!sflag || v)
+			dbprintf("dir %lld block %d stale mismatch "
+				 "%d/%d\n",
+				 id->ino, dabno, stale,
+				 INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+		error++;
+	}
+}
+
+static xfs_ino_t
+process_node_dir_v1(
+	blkmap_t		*blkmap,
+	int			*dot,
+	int			*dotdot,
+	inodata_t		*id)
+{
+	xfs_fsblock_t		bno;
+	xfs_fileoff_t		dbno;
+	xfs_ino_t		lino;
+	xfs_da_intnode_t	*node;
+	xfs_ino_t		parent;
+	int			t;
+	int			v;
+	int			v2;
+
+	v = verbose || id->ilist;
+	parent = 0;
+	dbno = NULLFILEOFF;
+	while ((dbno = blkmap_next_off(blkmap, dbno, &t)) != NULLFILEOFF) {
+		bno = blkmap_get(blkmap, dbno);
+		v2 = bno != NULLFSBLOCK && CHECK_BLIST(bno);
+		if (bno == NULLFSBLOCK && dbno == 0) {
+			if (!sflag || v)
+				dbprintf("can't read root block for directory "
+					 "inode %lld\n",
+					id->ino);
+			error++;
+		}
+		if (v || v2)
+			dbprintf("dir inode %lld block %u=%llu\n", id->ino,
+				(__uint32_t)dbno, (xfs_dfsbno_t)bno);
+		if (bno == NULLFSBLOCK)
+			continue;
+		push_cur();
+		set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bno), blkbb,
+			DB_RING_IGN, NULL);
+		if ((node = iocur_top->data) == NULL) {
+			if (!sflag || v || v2)
+				dbprintf("can't read block %u for directory "
+					 "inode %lld\n",
+					(__uint32_t)dbno, id->ino);
+			error++;
+			continue;
+		}
+#if VERS >= V_62
+		if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC)
+#else
+		if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_NODE_MAGIC)
+#endif
+		{
+			pop_cur();
+			continue;
+		}
+		lino = process_leaf_dir_v1_int(dot, dotdot, id);
+		if (lino) {
+			if (parent) {
+				if (!sflag || v || v2)
+					dbprintf("multiple .. entries in dir "
+						 "%lld\n",
+						id->ino);
+				error++;
+			} else
+				parent = lino;
+		}
+		pop_cur();
+	}
+	return parent;
+}
+
+static void
+process_quota(
+	int		isproj,
+	inodata_t	*id,
+	blkmap_t	*blkmap)
+{
+	xfs_fsblock_t	bno;
+	int		cb;
+	xfs_dqblk_t	*dqb;
+	xfs_dqid_t	dqid;
+	u_int8_t	exp_flags;
+	int		i;
+	int		perblock;
+	xfs_fileoff_t	qbno;
+	char		*s;
+	int		scicb;
+	int		t;
+
+	perblock = (int)(mp->m_sb.sb_blocksize / sizeof(*dqb));
+	s = isproj ? "project" : "user";
+	exp_flags = isproj ? XFS_DQ_PROJ : XFS_DQ_USER;
+	dqid = 0;
+	qbno = NULLFILEOFF;
+	while ((qbno = blkmap_next_off(blkmap, qbno, &t)) !=
+	       NULLFILEOFF) {
+		bno = blkmap_get(blkmap, qbno);
+		dqid = (xfs_dqid_t)qbno * perblock;
+		cb = CHECK_BLIST(bno);
+		scicb = !sflag || id->ilist || cb;
+		push_cur();
+		set_cur(&typtab[TYP_DQBLK], XFS_FSB_TO_DADDR(mp, bno), blkbb,
+			DB_RING_IGN, NULL);
+		if ((dqb = iocur_top->data) == NULL) {
+			pop_cur();
+			if (scicb)
+				dbprintf("can't read block %lld for %s quota "	
+					 "inode (fsblock %lld)\n",
+					(xfs_dfiloff_t)qbno, s,
+					(xfs_dfsbno_t)bno);
+			error++;
+			continue;
+		}
+		for (i = 0; i < perblock; i++, dqid++, dqb++) {
+			if (verbose || id->ilist || cb)
+				dbprintf("%s dqblk %lld entry %d id %d bc "
+					 "%lld ic %lld rc %lld\n",
+					s, (xfs_dfiloff_t)qbno, i, dqid,
+					INT_GET(dqb->dd_diskdq.d_bcount, ARCH_CONVERT),
+					INT_GET(dqb->dd_diskdq.d_icount, ARCH_CONVERT),
+					INT_GET(dqb->dd_diskdq.d_rtbcount, ARCH_CONVERT));
+			if (INT_GET(dqb->dd_diskdq.d_magic, ARCH_CONVERT) != XFS_DQUOT_MAGIC) {
+				if (scicb)
+					dbprintf("bad magic number %#x for %s "	
+						 "dqblk %lld entry %d id %d\n",
+						INT_GET(dqb->dd_diskdq.d_magic, ARCH_CONVERT), s,
+						(xfs_dfiloff_t)qbno, i, dqid);
+				error++;
+				continue;
+			}
+			if (INT_GET(dqb->dd_diskdq.d_version, ARCH_CONVERT) != XFS_DQUOT_VERSION) {
+				if (scicb)
+					dbprintf("bad version number %#x for "
+						 "%s dqblk %lld entry %d id "
+						 "%d\n",
+						INT_GET(dqb->dd_diskdq.d_version, ARCH_CONVERT), s,
+						(xfs_dfiloff_t)qbno, i, dqid);
+				error++;
+				continue;
+			}
+			if (INT_GET(dqb->dd_diskdq.d_flags, ARCH_CONVERT) != exp_flags) {
+				if (scicb)
+					dbprintf("bad flags %#x for %s dqblk "
+						 "%lld entry %d id %d\n",
+						INT_GET(dqb->dd_diskdq.d_flags, ARCH_CONVERT), s,
+						(xfs_dfiloff_t)qbno, i, dqid);
+				error++;
+				continue;
+			}
+			if (INT_GET(dqb->dd_diskdq.d_id, ARCH_CONVERT) != dqid) {
+				if (scicb)
+					dbprintf("bad id %d for %s dqblk %lld "
+						 "entry %d id %d\n",
+						INT_GET(dqb->dd_diskdq.d_id, ARCH_CONVERT), s,
+						(xfs_dfiloff_t)qbno, i, dqid);
+				error++;
+				continue;
+			}
+			quota_add(isproj ? dqid : -1, isproj ? -1 : dqid, 1,
+				  INT_GET(dqb->dd_diskdq.d_bcount, ARCH_CONVERT),
+				  INT_GET(dqb->dd_diskdq.d_icount, ARCH_CONVERT),
+				  INT_GET(dqb->dd_diskdq.d_rtbcount, ARCH_CONVERT));
+		}
+		pop_cur();
+	}
+}
+
+static void
+process_rtbitmap(
+	blkmap_t	*blkmap)
+{
+#define xfs_highbit64 libxfs_highbit64	/* for XFS_RTBLOCKLOG macro */
+	int		bit;
+	int		bitsperblock;
+	xfs_fileoff_t	bmbno;
+	xfs_fsblock_t	bno;
+	xfs_drtbno_t	extno;
+	int		len;
+	int		log;
+	int		offs;
+	int		prevbit;
+	xfs_drfsbno_t	rtbno;
+	int		start_bmbno;
+	int		start_bit;
+	int		t;
+	xfs_rtword_t	*words;
+
+	bitsperblock = mp->m_sb.sb_blocksize * NBBY;
+	bit = extno = prevbit = start_bmbno = start_bit = 0;
+	bmbno = NULLFILEOFF;
+	while ((bmbno = blkmap_next_off(blkmap, bmbno, &t)) !=
+	       NULLFILEOFF) {
+		bno = blkmap_get(blkmap, bmbno);
+		if (bno == NULLFSBLOCK) {
+			if (!sflag)
+				dbprintf("block %lld for rtbitmap inode is "
+					 "missing\n",
+					(xfs_dfiloff_t)bmbno);
+			error++;
+			continue;
+		}
+		push_cur();
+		set_cur(&typtab[TYP_RTBITMAP], XFS_FSB_TO_DADDR(mp, bno), blkbb,
+			DB_RING_IGN, NULL);
+		if ((words = iocur_top->data) == NULL) {
+			pop_cur();
+			if (!sflag)
+				dbprintf("can't read block %lld for rtbitmap "
+					 "inode\n",
+					(xfs_dfiloff_t)bmbno);
+			error++;
+			continue;
+		}
+		for (bit = 0;
+		     bit < bitsperblock && extno < mp->m_sb.sb_rextents;
+		     bit++, extno++) {
+			if (isset(words, bit)) {
+				rtbno = extno * mp->m_sb.sb_rextsize;
+				set_rdbmap(rtbno, mp->m_sb.sb_rextsize,
+					DBM_RTFREE);
+				frextents++;
+				if (prevbit == 0) {
+					start_bmbno = (int)bmbno;
+					start_bit = bit;
+					prevbit = 1;
+				}
+			} else if (prevbit == 1) {
+				len = ((int)bmbno - start_bmbno) *
+					bitsperblock + (bit - start_bit);
+				log = XFS_RTBLOCKLOG(len);
+				offs = XFS_SUMOFFS(mp, log, start_bmbno);
+				sumcompute[offs]++;
+				prevbit = 0;
+			}
+		}
+		pop_cur();
+		if (extno == mp->m_sb.sb_rextents)
+			break;
+	}
+	if (prevbit == 1) {
+		len = ((int)bmbno - start_bmbno) * bitsperblock +
+			(bit - start_bit);
+		log = XFS_RTBLOCKLOG(len);
+		offs = XFS_SUMOFFS(mp, log, start_bmbno);
+		sumcompute[offs]++;
+	}
+}
+
+static void
+process_rtsummary(
+	blkmap_t	*blkmap)
+{
+	xfs_fsblock_t	bno;
+	char		*bytes;
+	xfs_fileoff_t	sumbno;
+	int		t;
+
+	sumbno = NULLFILEOFF;
+	while ((sumbno = blkmap_next_off(blkmap, sumbno, &t)) !=
+	       NULLFILEOFF) {
+		bno = blkmap_get(blkmap, sumbno);
+		if (bno == NULLFSBLOCK) {
+			if (!sflag)
+				dbprintf("block %lld for rtsummary inode is "
+					 "missing\n",
+					(xfs_dfiloff_t)sumbno);
+			error++;
+			continue;
+		}
+		push_cur();
+		set_cur(&typtab[TYP_RTSUMMARY], XFS_FSB_TO_DADDR(mp, bno),
+			blkbb, DB_RING_IGN, NULL);
+		if ((bytes = iocur_top->data) == NULL) {
+			if (!sflag)
+				dbprintf("can't read block %lld for rtsummary "
+					 "inode\n",
+					(xfs_dfiloff_t)sumbno);
+			error++;
+			continue;
+		}
+		memcpy((char *)sumfile + sumbno * mp->m_sb.sb_blocksize, bytes,
+			mp->m_sb.sb_blocksize);
+		pop_cur();
+	}
+}
+
+static xfs_ino_t
+process_sf_dir_v2(
+	xfs_dinode_t		*dip,
+	int			*dot,
+	int			*dotdot,
+	inodata_t		*id)
+{
+	inodata_t		*cid;
+	int			i;
+	int			i8;
+	xfs_ino_t		lino;
+	int			offset;
+	xfs_dir2_sf_t		*sf;
+	xfs_dir2_sf_entry_t	*sfe;
+	int			v;
+
+	sf = &dip->di_u.di_dir2sf;
+	addlink_inode(id);
+	v = verbose || id->ilist;
+	if (v)
+		dbprintf("dir %lld entry . %lld\n", id->ino, id->ino);
+	(*dot)++;
+	sfe = XFS_DIR2_SF_FIRSTENTRY(sf);
+	offset = XFS_DIR2_DATA_FIRST_OFFSET;
+	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT) - 1, i8 = 0; i >= 0; i--) {
+		if ((__psint_t)sfe + XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sfe) -
+		    (__psint_t)sf > dip->di_core.di_size) {
+			if (!sflag)
+				dbprintf("dir %llu bad size in entry at %d\n",
+					id->ino,
+					(int)((char *)sfe - (char *)sf));
+			error++;
+			break;
+		}
+		lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf, XFS_DIR2_SF_INUMBERP(sfe), ARCH_CONVERT);
+		if (lino > XFS_DIR2_MAX_SHORT_INUM)
+			i8++;
+		cid = find_inode(lino, 1);
+		if (cid == NULL) {
+			if (!sflag)
+				dbprintf("dir %lld entry %*.*s bad inode "
+					 "number %lld\n",
+					id->ino, sfe->namelen, sfe->namelen,
+					sfe->name, lino);
+			error++;
+		} else {
+			addlink_inode(cid);
+			if (!cid->parent)
+				cid->parent = id;
+			addname_inode(cid, (char *)sfe->name, sfe->namelen);
+		}
+		if (v)
+			dbprintf("dir %lld entry %*.*s offset %d %lld\n",
+				id->ino, sfe->namelen, sfe->namelen, sfe->name,
+				XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT), lino);
+		if (XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT) < offset) {
+			if (!sflag)
+				dbprintf("dir %lld entry %*.*s bad offset %d\n",
+					id->ino, sfe->namelen, sfe->namelen,
+					sfe->name, XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT));
+			error++;
+		}
+		offset =
+			XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT) +
+			XFS_DIR2_DATA_ENTSIZE(sfe->namelen);
+		sfe = XFS_DIR2_SF_NEXTENTRY(sf, sfe);
+	}
+	if (i < 0 && (__psint_t)sfe - (__psint_t)sf != dip->di_core.di_size) {
+		if (!sflag)
+			dbprintf("dir %llu size is %lld, should be %u\n",
+				id->ino, dip->di_core.di_size,
+				(uint)((char *)sfe - (char *)sf));
+		error++;
+	}
+	if (offset + (INT_GET(sf->hdr.count, ARCH_CONVERT) + 2) * sizeof(xfs_dir2_leaf_entry_t) +
+	    sizeof(xfs_dir2_block_tail_t) > mp->m_dirblksize) {
+		if (!sflag)
+			dbprintf("dir %llu offsets too high\n", id->ino);
+		error++;
+	}
+	lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf, &sf->hdr.parent, ARCH_CONVERT);
+	if (lino > XFS_DIR2_MAX_SHORT_INUM)
+		i8++;
+	cid = find_inode(lino, 1);
+	if (cid)
+		addlink_inode(cid);
+	else {
+		if (!sflag)
+			dbprintf("dir %lld entry .. bad inode number %lld\n",
+				id->ino, lino);
+		error++;
+	}
+	if (v)
+		dbprintf("dir %lld entry .. %lld\n", id->ino, lino);
+	if (i8 != sf->hdr.i8count) {
+		if (!sflag)
+			dbprintf("dir %lld i8count mismatch is %d should be "
+				 "%d\n",
+				id->ino, sf->hdr.i8count, i8);
+		error++;
+	}
+	(*dotdot)++;
+	return cid ? lino : NULLFSINO;
+}
+
+static xfs_ino_t
+process_shortform_dir_v1(
+	xfs_dinode_t		*dip,
+	int			*dot,
+	int			*dotdot,
+	inodata_t		*id)
+{
+	inodata_t		*cid;
+	int			i;
+	xfs_ino_t		lino;
+	xfs_dir_shortform_t	*sf;
+	xfs_dir_sf_entry_t	*sfe;
+	int			v;
+
+	sf = &dip->di_u.di_dirsf;
+	addlink_inode(id);
+	v = verbose || id->ilist;
+	if (v)
+		dbprintf("dir %lld entry . %lld\n", id->ino, id->ino);
+	(*dot)++;
+	sfe = &sf->list[0];
+	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT) - 1; i >= 0; i--) {
+                lino = DIRINO_GET_ARCH(&sfe->inumber, ARCH_CONVERT);
+		cid = find_inode(lino, 1);
+		if (cid == NULL) {
+			if (!sflag)
+				dbprintf("dir %lld entry %*.*s bad inode "
+					 "number %lld\n",
+					id->ino, sfe->namelen, sfe->namelen,
+					sfe->name, lino);
+			error++;
+		} else {
+			addlink_inode(cid);
+			if (!cid->parent)
+				cid->parent = id;
+			addname_inode(cid, (char *)sfe->name, sfe->namelen);
+		}
+		if (v)
+			dbprintf("dir %lld entry %*.*s %lld\n", id->ino,
+				sfe->namelen, sfe->namelen, sfe->name, lino);
+		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+	}
+	if ((__psint_t)sfe - (__psint_t)sf != dip->di_core.di_size)
+		dbprintf("dir %llu size is %lld, should be %d\n",
+			id->ino, dip->di_core.di_size,
+			(int)((char *)sfe - (char *)sf));
+        lino=DIRINO_GET_ARCH(&sf->hdr.parent, ARCH_CONVERT);
+	cid = find_inode(lino, 1);
+	if (cid)
+		addlink_inode(cid);
+	else {
+		if (!sflag)
+			dbprintf("dir %lld entry .. bad inode number %lld\n",
+				id->ino, lino);
+		error++;
+	}
+	if (v)
+		dbprintf("dir %lld entry .. %lld\n", id->ino, lino);
+	(*dotdot)++;
+	return cid ? lino : NULLFSINO;
+}
+
+static void
+quota_add(
+	xfs_dqid_t	projid,
+	xfs_dqid_t	userid,
+	int		dq,
+	xfs_qcnt_t	bc,
+	xfs_qcnt_t	ic,
+	xfs_qcnt_t	rc)
+{
+	if (qudo && userid != -1)
+		quota_add1(qudata, userid, dq, bc, ic, rc);
+	if (qpdo && projid != -1)
+		quota_add1(qpdata, projid, dq, bc, ic, rc);
+}
+
+static void
+quota_add1(
+	qdata_t		**qt,
+	xfs_dqid_t	id,
+	int		dq,
+	xfs_qcnt_t	bc,
+	xfs_qcnt_t	ic,
+	xfs_qcnt_t	rc)
+{
+	qdata_t		*qe;
+	int		qh;
+	qinfo_t		*qi;
+
+	qh = (int)((__uint32_t)id % QDATA_HASH_SIZE);
+	qe = qt[qh];
+	while (qe) {
+		if (qe->id == id) {
+			qi = dq ? &qe->dq : &qe->count;
+			qi->bc += bc;
+			qi->ic += ic;
+			qi->rc += rc;
+			return;
+		}
+		qe = qe->next;
+	}
+	qe = xmalloc(sizeof(*qe));
+	qe->id = id;
+	qi = dq ? &qe->dq : &qe->count;
+	qi->bc = bc;
+	qi->ic = ic;
+	qi->rc = rc;
+	qi = dq ? &qe->count : &qe->dq;
+	qi->bc = qi->ic = qi->rc = 0;
+	qe->next = qt[qh];
+	qt[qh] = qe;
+}
+
+static void
+quota_check(
+	char	*s,
+	qdata_t	**qt)
+{
+	int	i;
+	qdata_t	*next;
+	qdata_t	*qp;
+
+	for (i = 0; i < QDATA_HASH_SIZE; i++) {
+		qp = qt[i];
+		while (qp) {
+			next = qp->next;
+			if (qp->count.bc != qp->dq.bc ||
+			    qp->count.ic != qp->dq.ic ||
+			    qp->count.rc != qp->dq.rc) {
+				if (!sflag) {
+					dbprintf("%s quota id %d, have/exp",
+						s, qp->id);
+					if (qp->count.bc != qp->dq.bc)
+						dbprintf(" bc %lld/%lld",
+							qp->dq.bc,
+							qp->count.bc);
+					if (qp->count.ic != qp->dq.ic)
+						dbprintf(" ic %lld/%lld",
+							qp->dq.ic,
+							qp->count.ic);
+					if (qp->count.rc != qp->dq.rc)
+						dbprintf(" rc %lld/%lld",
+							qp->dq.rc,
+							qp->count.rc);
+					dbprintf("\n");
+				}
+				error++;
+			}
+			xfree(qp);
+			qp = next;
+		}
+	}
+	xfree(qt);
+}
+
+static void
+quota_init(void)
+{
+	qudo = mp->m_sb.sb_uquotino != 0 &&
+	       mp->m_sb.sb_uquotino != NULLFSINO &&
+	       (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD);
+	qpdo = mp->m_sb.sb_pquotino != 0 &&
+	       mp->m_sb.sb_pquotino != NULLFSINO &&
+	       (mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD);
+	if (qudo)
+		qudata = xcalloc(QDATA_HASH_SIZE, sizeof(qdata_t *));
+	if (qpdo)
+		qpdata = xcalloc(QDATA_HASH_SIZE, sizeof(qdata_t *));
+}
+
+static void
+scan_ag(
+	xfs_agnumber_t	agno)
+{
+	xfs_agf_t	*agf;
+	xfs_agi_t	*agi;
+	int		i;
+	xfs_sb_t	tsb;
+	xfs_sb_t	*sb=&tsb;
+
+	agffreeblks = agflongest = 0;
+	agicount = agifreecount = 0;
+	push_cur();
+	set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1,
+		DB_RING_IGN, NULL);
+        
+	if (!iocur_top->data) {
+		dbprintf("can't read superblock for ag %u\n", agno);
+		pop_cur();
+		serious_error++;
+		return;
+	}
+ 
+	libxfs_xlate_sb(iocur_top->data, sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+ 
+	if (sb->sb_magicnum != XFS_SB_MAGIC) {
+		if (!sflag)
+			dbprintf("bad sb magic # %#x in ag %u\n",
+				sb->sb_magicnum, agno);
+		error++;
+	}
+	if (!XFS_SB_GOOD_VERSION(sb)) {
+		if (!sflag)
+			dbprintf("bad sb version # %#x in ag %u\n",
+				sb->sb_versionnum, agno);
+		error++;
+		sbver_err++;
+	}
+	if (agno == 0 && sb->sb_inprogress != 0) {
+		if (!sflag)
+			dbprintf("mkfs not completed successfully\n");
+		error++;
+	}
+	set_dbmap(agno, XFS_SB_BLOCK(mp), 1, DBM_SB, agno, XFS_SB_BLOCK(mp));
+	if (sb->sb_logstart && XFS_FSB_TO_AGNO(mp, sb->sb_logstart) == agno)
+		set_dbmap(agno, XFS_FSB_TO_AGBNO(mp, sb->sb_logstart),
+			sb->sb_logblocks, DBM_LOG, agno, XFS_SB_BLOCK(mp));
+	push_cur();
+	set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1,
+		DB_RING_IGN, NULL);
+	if ((agf = iocur_top->data) == NULL) {
+		dbprintf("can't read agf block for ag %u\n", agno);
+		pop_cur();
+		pop_cur();
+		serious_error++;
+		return;
+	}
+	if (INT_GET(agf->agf_magicnum, ARCH_CONVERT) != XFS_AGF_MAGIC) {
+		if (!sflag)
+			dbprintf("bad agf magic # %#x in ag %u\n",
+				INT_GET(agf->agf_magicnum, ARCH_CONVERT), agno);
+		error++;
+	}
+	if (!XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT))) {
+		if (!sflag)
+			dbprintf("bad agf version # %#x in ag %u\n",
+				INT_GET(agf->agf_versionnum, ARCH_CONVERT), agno);
+		error++;
+	}
+	if (XFS_SB_BLOCK(mp) != XFS_AGF_BLOCK(mp))
+		set_dbmap(agno, XFS_AGF_BLOCK(mp), 1, DBM_AGF, agno,
+			XFS_SB_BLOCK(mp));
+	if (sb->sb_agblocks > INT_GET(agf->agf_length, ARCH_CONVERT))
+		set_dbmap(agno, INT_GET(agf->agf_length, ARCH_CONVERT),
+			sb->sb_agblocks - INT_GET(agf->agf_length, ARCH_CONVERT),
+			DBM_MISSING, agno, XFS_SB_BLOCK(mp));
+	push_cur();
+	set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1,
+		DB_RING_IGN, NULL);
+	if ((agi = iocur_top->data) == NULL) {
+		dbprintf("can't read agi block for ag %u\n", agno);
+		serious_error++;
+		pop_cur();
+		pop_cur();
+		pop_cur();
+		return;
+	}
+	if (INT_GET(agi->agi_magicnum, ARCH_CONVERT) != XFS_AGI_MAGIC) {
+		if (!sflag)
+			dbprintf("bad agi magic # %#x in ag %u\n",
+				INT_GET(agi->agi_magicnum, ARCH_CONVERT), agno);
+		error++;
+	}
+	if (!XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT))) {
+		if (!sflag)
+			dbprintf("bad agi version # %#x in ag %u\n",
+				INT_GET(agi->agi_versionnum, ARCH_CONVERT), agno);
+		error++;
+	}
+	if (XFS_SB_BLOCK(mp) != XFS_AGI_BLOCK(mp) &&
+	    XFS_AGF_BLOCK(mp) != XFS_AGI_BLOCK(mp))
+		set_dbmap(agno, XFS_AGI_BLOCK(mp), 1, DBM_AGI, agno,
+			XFS_SB_BLOCK(mp));
+	scan_freelist(agf);
+	fdblocks--;
+	scan_sbtree(agf,
+		INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT),
+		INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT),
+		1, scanfunc_bno, TYP_BNOBT);
+	fdblocks--;
+	scan_sbtree(agf,
+		INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT),
+		INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT),
+		1, scanfunc_cnt, TYP_CNTBT);
+	scan_sbtree(agf,
+		INT_GET(agi->agi_root, ARCH_CONVERT),
+		INT_GET(agi->agi_level, ARCH_CONVERT),
+		1, scanfunc_ino, TYP_INOBT);
+	if (INT_GET(agf->agf_freeblks, ARCH_CONVERT) != agffreeblks) {
+		if (!sflag)
+			dbprintf("agf_freeblks %u, counted %u in ag %u\n",
+				INT_GET(agf->agf_freeblks, ARCH_CONVERT),
+				agffreeblks, agno);
+		error++;
+	}
+	if (INT_GET(agf->agf_longest, ARCH_CONVERT) != agflongest) {
+		if (!sflag)
+			dbprintf("agf_longest %u, counted %u in ag %u\n",
+				INT_GET(agf->agf_longest, ARCH_CONVERT),
+				agflongest, agno);
+		error++;
+	}
+	if (INT_GET(agi->agi_count, ARCH_CONVERT) != agicount) {
+		if (!sflag)
+			dbprintf("agi_count %u, counted %u in ag %u\n",
+				INT_GET(agi->agi_count, ARCH_CONVERT),
+				agicount, agno);
+		error++;
+	}
+	if (INT_GET(agi->agi_freecount, ARCH_CONVERT) != agifreecount) {
+		if (!sflag)
+			dbprintf("agi_freecount %u, counted %u in ag %u\n",
+				INT_GET(agi->agi_freecount, ARCH_CONVERT),
+				agifreecount, agno);
+		error++;
+	}
+	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
+		if (INT_GET(agi->agi_unlinked[i], ARCH_CONVERT) != NULLAGINO) {
+			if (!sflag) {
+                                xfs_agino_t agino=INT_GET(agi->agi_unlinked[i], ARCH_CONVERT);
+				dbprintf("agi unlinked bucket %d is %u in ag "
+					 "%u (inode=%lld)\n", i, agino, agno,
+                                        XFS_AGINO_TO_INO(mp, agno, agino));
+                        }
+			error++;
+		}
+	}
+	pop_cur();
+	pop_cur();
+	pop_cur();
+}
+
+static void
+scan_freelist(
+	xfs_agf_t	*agf)
+{
+	xfs_agnumber_t	seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+	xfs_agfl_t	*agfl;
+	xfs_agblock_t	bno;
+	uint		count;
+	int		i;
+
+	if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
+	    XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
+	    XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
+		set_dbmap(seqno, XFS_AGFL_BLOCK(mp), 1, DBM_AGFL, seqno,
+			XFS_SB_BLOCK(mp));
+	if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0)
+		return;
+	push_cur();
+	set_cur(&typtab[TYP_AGFL],
+		XFS_AG_DADDR(mp, seqno, XFS_AGFL_DADDR), 1, DB_RING_IGN, NULL);
+	if ((agfl = iocur_top->data) == NULL) {
+		dbprintf("can't read agfl block for ag %u\n", seqno);
+		serious_error++;
+		return;
+	}
+	i = INT_GET(agf->agf_flfirst, ARCH_CONVERT);
+	count = 0;
+	for (;;) {
+		bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT);
+		set_dbmap(seqno, bno, 1, DBM_FREELIST, seqno,
+			XFS_AGFL_BLOCK(mp));
+		count++;
+		if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT))
+			break;
+		if (++i == XFS_AGFL_SIZE)
+			i = 0;
+	}
+	if (count != INT_GET(agf->agf_flcount, ARCH_CONVERT)) {
+		if (!sflag)
+			dbprintf("freeblk count %u != flcount %u in ag %u\n",
+				count, INT_GET(agf->agf_flcount, ARCH_CONVERT),
+				seqno);
+		error++;
+	}
+	fdblocks += count;
+	pop_cur();
+}
+
+static void
+scan_lbtree(
+	xfs_fsblock_t	root,
+	int		nlevels,
+	scan_lbtree_f_t	func,
+	dbm_t		type,
+	inodata_t	*id,
+	xfs_drfsbno_t	*totd,
+	xfs_drfsbno_t	*toti,
+	xfs_extnum_t	*nex,
+	blkmap_t	**blkmapp,
+	int		isroot,
+	typnm_t		btype)
+{
+	push_cur();
+	set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, root), blkbb, DB_RING_IGN,
+		NULL);
+	if (iocur_top->data == NULL) {
+		if (!sflag)
+			dbprintf("can't read btree block %u/%u\n",
+				XFS_FSB_TO_AGNO(mp, root),
+				XFS_FSB_TO_AGBNO(mp, root));
+		error++;
+		return;
+	}
+	(*func)(iocur_top->data, nlevels - 1, type, root, id, totd, toti, nex,
+		blkmapp, isroot, btype);
+	pop_cur();
+}
+
+static void
+scan_sbtree(
+	xfs_agf_t	*agf,
+	xfs_agblock_t	root,
+	int		nlevels,
+	int		isroot,
+	scan_sbtree_f_t	func,
+	typnm_t		btype)
+{
+	xfs_agnumber_t	seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+
+	push_cur();
+	set_cur(&typtab[btype],
+		XFS_AGB_TO_DADDR(mp, seqno, root), blkbb, DB_RING_IGN, NULL);
+	if (iocur_top->data == NULL) {
+		if (!sflag)
+			dbprintf("can't read btree block %u/%u\n", seqno, root);
+		error++;
+		return;
+	}
+	(*func)(iocur_top->data, nlevels - 1, agf, root, isroot);
+	pop_cur();
+}
+
+static void
+scanfunc_bmap(
+	xfs_btree_lblock_t	*ablock,
+	int			level,
+	dbm_t			type,
+	xfs_fsblock_t		bno,
+	inodata_t		*id,
+	xfs_drfsbno_t		*totd,
+	xfs_drfsbno_t		*toti,
+	xfs_extnum_t		*nex,
+	blkmap_t		**blkmapp,
+	int			isroot,
+	typnm_t			btype)
+{
+	xfs_agblock_t		agbno;
+	xfs_agnumber_t		agno;
+	xfs_bmbt_block_t	*block = (xfs_bmbt_block_t *)ablock;
+	int			i;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmbt_rec_32_t	*rp;
+
+	agno = XFS_FSB_TO_AGNO(mp, bno);
+	agbno = XFS_FSB_TO_AGBNO(mp, bno);
+	if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_BMAP_MAGIC) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad magic # %#x in inode %lld bmbt block "
+				 "%u/%u\n",
+				INT_GET(block->bb_magic, ARCH_CONVERT), id->ino, agno, agbno);
+		error++;
+	}
+	if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("expected level %d got %d in inode %lld bmbt "
+				 "block %u/%u\n",
+				level, INT_GET(block->bb_level, ARCH_CONVERT), id->ino, agno, agbno);
+		error++;
+	}
+	set_dbmap(agno, agbno, 1, type, agno, agbno);
+	set_inomap(agno, agbno, 1, id);
+	(*toti)++;
+	if (level == 0) {
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0] ||
+		    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[0])  {
+			if (!sflag || id->ilist || CHECK_BLIST(bno))
+				dbprintf("bad btree nrecs (%u, min=%u, max=%u) "
+					 "in inode %lld bmap block %lld\n",
+					INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_bmap_dmnr[0],
+					mp->m_bmap_dmxr[0], id->ino,
+					(xfs_dfsbno_t)bno);
+			error++;
+			return;
+		}
+		rp = (xfs_bmbt_rec_32_t *)
+			XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+			block, 1, mp->m_bmap_dmxr[0]);
+		*nex += INT_GET(block->bb_numrecs, ARCH_CONVERT);
+		process_bmbt_reclist(rp, INT_GET(block->bb_numrecs, ARCH_CONVERT), type, id, totd,
+			blkmapp);
+		return;
+	}
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[1] ||
+	    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[1])  {
+		if (!sflag || id->ilist || CHECK_BLIST(bno))
+			dbprintf("bad btree nrecs (%u, min=%u, max=%u) in "
+				 "inode %lld bmap block %lld\n",
+				INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_bmap_dmnr[1],
+				mp->m_bmap_dmxr[1], id->ino, (xfs_dfsbno_t)bno);
+		error++;
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+		mp->m_bmap_dmxr[0]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+		scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, type, id, totd, toti,
+			nex, blkmapp, 0, btype);
+}
+
+static void
+scanfunc_bno(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agf_t		*agf,
+	xfs_agblock_t		bno,
+	int			isroot)
+{
+	xfs_alloc_block_t	*block = (xfs_alloc_block_t *)ablock;
+	int			i;
+	xfs_alloc_ptr_t		*pp;
+	xfs_alloc_rec_t		*rp;
+	xfs_agnumber_t		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+
+	if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTB_MAGIC) {
+		dbprintf("bad magic # %#x in btbno block %u/%u\n",
+			INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno);
+		serious_error++;
+		return;
+	}
+	fdblocks++;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+		if (!sflag)
+			dbprintf("expected level %d got %d in btbno block "
+				 "%u/%u\n",
+				level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno);
+		error++;
+	}
+	set_dbmap(seqno, bno, 1, DBM_BTBNO, seqno, bno);
+	if (level == 0) {
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0] ||
+		    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0]) {
+			dbprintf("bad btree nrecs (%u, min=%u, max=%u) in "
+				 "btbno block %u/%u\n",
+				INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[0],
+				mp->m_alloc_mxr[0], seqno, bno);
+			serious_error++;
+			return;
+		}
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+			1, mp->m_alloc_mxr[0]);
+		for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+			set_dbmap(seqno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT),
+				INT_GET(rp[i].ar_blockcount, ARCH_CONVERT), DBM_FREE1,
+				seqno, bno);
+		}
+		return;
+	}
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1] ||
+	    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1]) {
+		dbprintf("bad btree nrecs (%u, min=%u, max=%u) in btbno block "
+			 "%u/%u\n",
+			INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[1],
+			mp->m_alloc_mxr[1], seqno, bno);
+		serious_error++;
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+		mp->m_alloc_mxr[1]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+		scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_bno, TYP_BNOBT);
+}
+
+static void
+scanfunc_cnt(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agf_t		*agf,
+	xfs_agblock_t		bno,
+	int			isroot)
+{
+	xfs_alloc_block_t	*block = (xfs_alloc_block_t *)ablock;
+	xfs_agnumber_t		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+	int			i;
+	xfs_alloc_ptr_t		*pp;
+	xfs_alloc_rec_t		*rp;
+
+	if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTC_MAGIC) {
+		dbprintf("bad magic # %#x in btcnt block %u/%u\n",
+			INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno);
+		serious_error++;
+		return;
+	}
+	fdblocks++;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+		if (!sflag)
+			dbprintf("expected level %d got %d in btcnt block "
+				 "%u/%u\n",
+				level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno);
+		error++;
+	}
+	set_dbmap(seqno, bno, 1, DBM_BTCNT, seqno, bno);
+	if (level == 0) {
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0] ||
+		    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0])  {
+			dbprintf("bad btree nrecs (%u, min=%u, max=%u) in "
+				 "btbno block %u/%u\n",
+				INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[0],
+				mp->m_alloc_mxr[0], seqno, bno);
+			serious_error++;
+			return;
+		}
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+			1, mp->m_alloc_mxr[0]);
+		for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+			check_set_dbmap(seqno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT),
+				INT_GET(rp[i].ar_blockcount, ARCH_CONVERT), DBM_FREE1, DBM_FREE2,
+				seqno, bno);
+			fdblocks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+			agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+			if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > agflongest)
+				agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+		}
+		return;
+	}
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1] ||
+	    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1])  {
+		dbprintf("bad btree nrecs (%u, min=%u, max=%u) in btbno block "
+			 "%u/%u\n",
+			INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[1],
+			mp->m_alloc_mxr[1], seqno, bno);
+		serious_error++;
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+		mp->m_alloc_mxr[1]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+		scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_cnt, TYP_CNTBT);
+}
+
+static void
+scanfunc_ino(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agf_t		*agf,
+	xfs_agblock_t		bno,
+	int			isroot)
+{
+	xfs_agino_t		agino;
+	xfs_inobt_block_t	*block = (xfs_inobt_block_t *)ablock;
+	xfs_agnumber_t		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+	int			i;
+	int			isfree;
+	int			j;
+	int			nfree;
+	int			off;
+	xfs_inobt_ptr_t		*pp;
+	xfs_inobt_rec_t		*rp;
+
+	if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_IBT_MAGIC) {
+		dbprintf("bad magic # %#x in inobt block %u/%u\n",
+			INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno);
+		serious_error++;
+		return;
+	}
+	if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+		if (!sflag)
+			dbprintf("expected level %d got %d in inobt block "
+				 "%u/%u\n",
+				level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno);
+		error++;
+	}
+	set_dbmap(seqno, bno, 1, DBM_BTINO, seqno, bno);
+	if (level == 0) {
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[0] ||
+		    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[0]) {
+			dbprintf("bad btree nrecs (%u, min=%u, max=%u) in "
+				 "inobt block %u/%u\n",
+				INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_inobt_mnr[0],
+				mp->m_inobt_mxr[0], seqno, bno);
+			serious_error++;
+			return;
+		}
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block,
+			1, mp->m_inobt_mxr[0]);
+		for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+			agino = INT_GET(rp[i].ir_startino, ARCH_CONVERT);
+			off = XFS_INO_TO_OFFSET(mp, agino);
+			if (off == 0) {
+				if ((sbversion & XFS_SB_VERSION_ALIGNBIT) &&
+				    mp->m_sb.sb_inoalignmt &&
+				    (XFS_INO_TO_AGBNO(mp, agino) %
+				     mp->m_sb.sb_inoalignmt))
+					sbversion &= ~XFS_SB_VERSION_ALIGNBIT;
+				set_dbmap(seqno, XFS_AGINO_TO_AGBNO(mp, agino),
+					(xfs_extlen_t)MAX(1,
+						XFS_INODES_PER_CHUNK >>
+						mp->m_sb.sb_inopblog),
+					DBM_INODE, seqno, bno);
+			}
+			icount += XFS_INODES_PER_CHUNK;
+			agicount += XFS_INODES_PER_CHUNK;
+			ifree += INT_GET(rp[i].ir_freecount, ARCH_CONVERT);
+			agifreecount += INT_GET(rp[i].ir_freecount, ARCH_CONVERT);
+			push_cur();
+			set_cur(&typtab[TYP_INODE],
+				XFS_AGB_TO_DADDR(mp, seqno,
+						 XFS_AGINO_TO_AGBNO(mp, agino)),
+				(int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)),
+				DB_RING_IGN, NULL);
+			if (iocur_top->data == NULL) {
+				if (!sflag)
+					dbprintf("can't read inode block "
+						 "%u/%u\n",
+						seqno,
+						XFS_AGINO_TO_AGBNO(mp, agino));
+				error++;
+				continue;
+			}
+			for (j = 0, nfree = 0; j < XFS_INODES_PER_CHUNK; j++) {
+				if (isfree = XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT))
+					nfree++;
+				process_inode(agf, agino + j,
+					(xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog)),
+						isfree);
+			}
+			if (nfree != INT_GET(rp[i].ir_freecount, ARCH_CONVERT)) {
+				if (!sflag)
+					dbprintf("ir_freecount/free mismatch, "
+						 "inode chunk %u/%u, freecount "
+						 "%d nfree %d\n",
+						seqno, agino,
+						INT_GET(rp[i].ir_freecount, ARCH_CONVERT), nfree);
+				error++;
+			}
+			pop_cur();
+		}
+		return;
+	}
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[1] ||
+	    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[1]) {
+		dbprintf("bad btree nrecs (%u, min=%u, max=%u) in inobt block "
+			 "%u/%u\n",
+			INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_inobt_mnr[1],
+			mp->m_inobt_mxr[1], seqno, bno);
+		serious_error++;
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1,
+		mp->m_inobt_mxr[1]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+		scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_ino, TYP_INOBT);
+}
+
+static void
+set_dbmap(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	xfs_extlen_t	len,
+	dbm_t		type,
+	xfs_agnumber_t	c_agno,
+	xfs_agblock_t	c_agbno)
+{
+	check_set_dbmap(agno, agbno, len, DBM_UNKNOWN, type, c_agno, c_agbno);
+}
+
+static void
+set_inomap(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	xfs_extlen_t	len,
+	inodata_t	*id)
+{
+	xfs_extlen_t	i;
+	inodata_t	**idp;
+	int		mayprint;
+
+	if (!check_inomap(agno, agbno, len, id->ino))
+		return;
+	mayprint = verbose | id->ilist | blist_size;
+	for (i = 0, idp = &inomap[agno][agbno]; i < len; i++, idp++) {
+		*idp = id;
+		if (mayprint &&
+		    (verbose || id->ilist || CHECK_BLISTA(agno, agbno + i)))
+			dbprintf("setting inode to %lld for block %u/%u\n",
+				id->ino, agno, agbno + i);
+	}
+}
+
+static void
+set_rdbmap(
+	xfs_drfsbno_t	bno,
+	xfs_extlen_t	len,
+	dbm_t		type)
+{
+	check_set_rdbmap(bno, len, DBM_UNKNOWN, type);
+}
+
+static void
+set_rinomap(
+	xfs_drfsbno_t	bno,
+	xfs_extlen_t	len,
+	inodata_t	*id)
+{
+	xfs_extlen_t	i;
+	inodata_t	**idp;
+	int		mayprint;
+
+	if (!check_rinomap(bno, len, id->ino))
+		return;
+	mayprint = verbose | id->ilist | blist_size;
+	for (i = 0, idp = &inomap[mp->m_sb.sb_agcount][bno];
+	     i < len;
+	     i++, idp++) {
+		*idp = id;
+		if (mayprint && (verbose || id->ilist || CHECK_BLIST(bno + i)))
+			dbprintf("setting inode to %lld for rtblock %llu\n",
+				id->ino, bno + i);
+	}
+}
+
+static void
+setlink_inode(
+	inodata_t	*id,
+	nlink_t		nlink,
+	int		isdir,
+	int		security)
+{
+	id->link_set = nlink;
+	id->isdir = isdir;
+	id->security = security;
+	if (verbose || id->ilist)
+		dbprintf("inode %lld nlink %u %s dir\n", id->ino, nlink,
+			isdir ? "is" : "not");
+}
diff --git a/db/check.h b/db/check.h
new file mode 100644
index 000000000..71d557688
--- /dev/null
+++ b/db/check.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	check_init(void);
diff --git a/db/cntbt.c b/db/cntbt.c
new file mode 100644
index 000000000..5e4a6dafb
--- /dev/null
+++ b/db/cntbt.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "cntbt.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int	cntbt_key_count(void *obj, int startoff);
+static int	cntbt_key_offset(void *obj, int startoff, int idx);
+static int	cntbt_ptr_count(void *obj, int startoff);
+static int	cntbt_ptr_offset(void *obj, int startoff, int idx);
+static int	cntbt_rec_count(void *obj, int startoff);
+static int	cntbt_rec_offset(void *obj, int startoff, int idx);
+
+const field_t	cntbt_hfld[] = {
+	{ "", FLDT_CNTBT, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_alloc_block_t, bb_ ## f))
+const field_t	cntbt_flds[] = {
+	{ "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+	{ "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+	{ "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+	{ "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_CNTBT },
+	{ "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_CNTBT },
+	{ "recs", FLDT_CNTBTREC, cntbt_rec_offset, cntbt_rec_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "keys", FLDT_CNTBTKEY, cntbt_key_offset, cntbt_key_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "ptrs", FLDT_CNTBTPTR, cntbt_ptr_offset, cntbt_ptr_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_CNTBT },
+	{ NULL }
+};
+
+#define	KOFF(f)	bitize(offsetof(xfs_alloc_key_t, ar_ ## f))
+const field_t	cntbt_key_flds[] = {
+	{ "blockcount", FLDT_EXTLEN, OI(KOFF(blockcount)), C1, 0, TYP_NONE },
+	{ "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+	{ NULL }
+};
+
+#define	ROFF(f)	bitize(offsetof(xfs_alloc_rec_t, ar_ ## f))
+const field_t	cntbt_rec_flds[] = {
+	{ "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
+	{ "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+/*ARGSUSED*/
+static int
+cntbt_key_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_alloc_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+cntbt_key_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_alloc_block_t	*block;
+	xfs_alloc_key_t		*kp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+	return bitize((int)((char *)kp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+cntbt_ptr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_alloc_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+cntbt_ptr_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_alloc_block_t	*block;
+	xfs_alloc_ptr_t		*pp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+	return bitize((int)((char *)pp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+cntbt_rec_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_alloc_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+cntbt_rec_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_alloc_block_t	*block;
+	xfs_alloc_rec_t		*rp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+	rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1));
+	return bitize((int)((char *)rp - (char *)block));
+}
+
+/*ARGSUSED*/
+int
+cntbt_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/cntbt.h b/db/cntbt.h
new file mode 100644
index 000000000..768150385
--- /dev/null
+++ b/db/cntbt.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	cntbt_flds[];
+extern const struct field	cntbt_hfld[];
+extern const struct field	cntbt_key_flds[];
+extern const struct field	cntbt_rec_flds[];
+
+extern int	cntbt_size(void *obj, int startoff, int idx);
diff --git a/db/command.c b/db/command.c
new file mode 100644
index 000000000..5c8153a66
--- /dev/null
+++ b/db/command.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include "addr.h"
+#include "agf.h"
+#include "agfl.h"
+#include "agi.h"
+#include "block.h"
+#include "bmap.h"
+#include "check.h"
+#include "command.h"
+#include "convert.h"
+#include "debug.h"
+#include "type.h"
+#include "echo.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "frag.h"
+#include "freesp.h"
+#include "help.h"
+#include "hash.h"
+#include "inode.h"
+#include "input.h"
+#include "io.h"
+#include "output.h"
+#include "print.h"
+#include "quit.h"
+#include "sb.h"
+#include "uuid.h"
+#include "write.h"
+#include "malloc.h"
+#include "dquot.h"
+
+cmdinfo_t	*cmdtab;
+int		ncmds;
+
+static int	cmd_compare(const void *a, const void *b);
+
+static int
+cmd_compare(const void *a, const void *b)
+{
+	return strcmp(((const cmdinfo_t *)a)->name,
+		      ((const cmdinfo_t *)b)->name);
+}
+
+void
+add_command(
+	const cmdinfo_t	*ci)
+{
+	cmdtab = xrealloc((void *)cmdtab, ++ncmds * sizeof(*cmdtab));
+	cmdtab[ncmds - 1] = *ci;
+	qsort(cmdtab, ncmds, sizeof(*cmdtab), cmd_compare);
+}
+
+int
+command(
+	int		argc,
+	char		**argv)
+{
+	char		*cmd;
+	const cmdinfo_t	*ct;
+
+	cmd = argv[0];
+	ct = find_command(cmd);
+	if (ct == NULL) {
+		dbprintf("command %s not found\n", cmd);
+		return 0;
+	}
+	if (argc-1 < ct->argmin || (ct->argmax != -1 && argc-1 > ct->argmax)) {
+		dbprintf("bad argument count %d to %s, expected ", argc-1, cmd);
+		if (ct->argmax == -1)
+			dbprintf("at least %d", ct->argmin);
+		else if (ct->argmin == ct->argmax)
+			dbprintf("%d", ct->argmin);
+		else
+			dbprintf("between %d and %d", ct->argmin, ct->argmax);
+		dbprintf(" arguments\n");
+		return 0;
+	}
+	optind = 0;
+	return ct->cfunc(argc, argv);
+}
+
+const cmdinfo_t *
+find_command(
+	const char	*cmd)
+{
+	cmdinfo_t	*ct;
+
+	for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) {
+		if (strcmp(ct->name, cmd) == 0 ||
+		    (ct->altname && strcmp(ct->altname, cmd) == 0))
+			return (const cmdinfo_t *)ct;
+	}
+	return NULL;
+}
+
+void
+init_commands(void)
+{
+	addr_init();
+	agf_init();
+	agfl_init();
+	agi_init();
+	block_init();
+	bmap_init();
+	check_init();
+	convert_init();
+	debug_init();
+	echo_init();
+	frag_init();
+	freesp_init();
+	help_init();
+	hash_init();
+	inode_init();
+	input_init();
+	io_init();
+	output_init();
+	print_init();
+	quit_init();
+	sb_init();
+	uuid_init();
+	type_init();
+	write_init();
+	dquot_init();
+}
diff --git a/db/command.h b/db/command.h
new file mode 100644
index 000000000..dd35ed627
--- /dev/null
+++ b/db/command.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef int (*cfunc_t)(int argc, char **argv);
+typedef void (*helpfunc_t)(void);
+
+typedef struct cmdinfo
+{
+	const char	*name;
+	const char	*altname;
+	cfunc_t		cfunc;
+	int		argmin;
+	int		argmax;
+	int		canpush;
+	const char	*args;
+	const char	*oneline;
+	helpfunc_t      help;
+} cmdinfo_t;
+
+extern cmdinfo_t	*cmdtab;
+extern int		ncmds;
+
+extern void		add_command(const cmdinfo_t *ci);
+extern int		command(int argc, char **argv);
+extern const cmdinfo_t	*find_command(const char *cmd);
+extern void		init_commands(void);
diff --git a/db/convert.c b/db/convert.c
new file mode 100644
index 000000000..02a4d24fe
--- /dev/null
+++ b/db/convert.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "convert.h"
+#include "output.h"
+#include "mount.h"
+
+#define	M(A)	(1 << CT_ ## A)
+#define	agblock_to_bytes(x)	\
+	((__uint64_t)(x) << mp->m_sb.sb_blocklog)
+#define	agino_to_bytes(x)	\
+	((__uint64_t)(x) << mp->m_sb.sb_inodelog)
+#define	agnumber_to_bytes(x)	\
+	agblock_to_bytes((__uint64_t)(x) * mp->m_sb.sb_agblocks)
+#define	daddr_to_bytes(x)	\
+	((__uint64_t)(x) << BBSHIFT)
+#define	fsblock_to_bytes(x)	\
+	(agnumber_to_bytes(XFS_FSB_TO_AGNO(mp, (x))) + \
+	 agblock_to_bytes(XFS_FSB_TO_AGBNO(mp, (x))))
+#define	ino_to_bytes(x)		\
+	(agnumber_to_bytes(XFS_INO_TO_AGNO(mp, (x))) + \
+	 agino_to_bytes(XFS_INO_TO_AGINO(mp, (x))))
+#define	inoidx_to_bytes(x)	\
+	((__uint64_t)(x) << mp->m_sb.sb_inodelog)
+
+typedef enum {
+	CT_NONE = -1, 
+	CT_AGBLOCK,		/* xfs_agblock_t */
+	CT_AGINO,		/* xfs_agino_t */
+	CT_AGNUMBER,		/* xfs_agno_t */
+	CT_BBOFF,		/* byte offset in daddr */
+	CT_BLKOFF,		/* byte offset in fsb/agb */
+	CT_BYTE,		/* byte in filesystem */
+	CT_DADDR,		/* daddr_t */
+	CT_FSBLOCK,		/* xfs_fsblock_t */
+	CT_INO,			/* xfs_ino_t */
+	CT_INOIDX,		/* index of inode in fsblock */
+	CT_INOOFF,		/* byte offset in inode */
+	NCTS
+} ctype_t;
+
+typedef struct ctydesc {
+	ctype_t		ctype;
+	int		allowed;
+	const char	**names;
+} ctydesc_t;
+
+typedef union {
+	xfs_agblock_t	agblock;
+	xfs_agino_t	agino;
+	xfs_agnumber_t	agnumber;
+	int		bboff;
+	int		blkoff;
+	__uint64_t	byte;
+	xfs_daddr_t	daddr;
+	xfs_fsblock_t	fsblock;
+	xfs_ino_t	ino;
+	int		inoidx;
+	int		inooff;
+} cval_t;
+
+static __uint64_t		bytevalue(ctype_t ctype, cval_t *val);
+static int		convert_f(int argc, char **argv);
+static int		getvalue(char *s, ctype_t ctype, cval_t *val);
+static ctype_t		lookupcty(char *ctyname);
+
+static const char	*agblock_names[] = { "agblock", "agbno", NULL };
+static const char	*agino_names[] = { "agino", "aginode", NULL };
+static const char	*agnumber_names[] = { "agnumber", "agno", NULL };
+static const char	*bboff_names[] = { "bboff", "daddroff", NULL };
+static const char	*blkoff_names[] = { "blkoff", "fsboff", "agboff",
+					    NULL };
+static const char	*byte_names[] = { "byte", "fsbyte", NULL };
+static const char	*daddr_names[] = { "daddr", "bb", NULL };
+static const char	*fsblock_names[] = { "fsblock", "fsb", "fsbno", NULL };
+static const char	*ino_names[] = { "ino", "inode", NULL };
+static const char	*inoidx_names[] = { "inoidx", "offset", NULL };
+static const char	*inooff_names[] = { "inooff", "inodeoff", NULL };
+
+static const ctydesc_t	ctydescs[NCTS] = {
+	{ CT_AGBLOCK, M(AGNUMBER)|M(BBOFF)|M(BLKOFF)|M(INOIDX)|M(INOOFF),
+	  agblock_names },
+	{ CT_AGINO, M(AGNUMBER)|M(INOOFF), agino_names },
+	{ CT_AGNUMBER,
+	  M(AGBLOCK)|M(AGINO)|M(BBOFF)|M(BLKOFF)|M(INOIDX)|M(INOOFF),
+	  agnumber_names },
+	{ CT_BBOFF, M(AGBLOCK)|M(AGNUMBER)|M(DADDR)|M(FSBLOCK), bboff_names },
+	{ CT_BLKOFF, M(AGBLOCK)|M(AGNUMBER)|M(FSBLOCK), blkoff_names },
+	{ CT_BYTE, 0, byte_names },
+	{ CT_DADDR, M(BBOFF), daddr_names },
+	{ CT_FSBLOCK, M(BBOFF)|M(BLKOFF)|M(INOIDX), fsblock_names },
+	{ CT_INO, M(INOOFF), ino_names },
+	{ CT_INOIDX, M(AGBLOCK)|M(AGNUMBER)|M(FSBLOCK)|M(INOOFF),
+	  inoidx_names },
+	{ CT_INOOFF,
+	  M(AGBLOCK)|M(AGINO)|M(AGNUMBER)|M(FSBLOCK)|M(INO)|M(INOIDX),
+	  inooff_names },
+};
+
+static const cmdinfo_t	convert_cmd =
+	{ "convert", NULL, convert_f, 3, 9, 0, "type num [type num]... type",
+	  "convert from one address form to another", NULL };
+
+static __uint64_t
+bytevalue(ctype_t ctype, cval_t *val)
+{
+	switch (ctype) {
+	case CT_AGBLOCK:
+		return agblock_to_bytes(val->agblock);
+	case CT_AGINO:
+		return agino_to_bytes(val->agino);
+	case CT_AGNUMBER:
+		return agnumber_to_bytes(val->agnumber);
+	case CT_BBOFF:
+		return (__uint64_t)val->bboff;
+	case CT_BLKOFF:
+		return (__uint64_t)val->blkoff;
+	case CT_BYTE:
+		return val->byte;
+	case CT_DADDR:
+		return daddr_to_bytes(val->daddr);
+	case CT_FSBLOCK:
+		return fsblock_to_bytes(val->fsblock);
+	case CT_INO:
+		return ino_to_bytes(val->ino);
+	case CT_INOIDX:
+		return inoidx_to_bytes(val->inoidx);
+	case CT_INOOFF:
+		return (__uint64_t)val->inooff;
+	case CT_NONE:
+	case NCTS:
+	}
+	/* NOTREACHED */
+	return 0;
+}
+
+static int
+convert_f(int argc, char **argv)
+{
+	ctype_t		c;
+	int		conmask;
+	cval_t		cvals[NCTS];
+	int		i;
+	int		mask;
+	__uint64_t	v;
+	ctype_t		wtype;
+
+	/* move past the "convert" command */
+	argc--;
+	argv++;
+
+	if ((argc % 2) != 1) {
+		dbprintf("bad argument count %d to convert, expected 3,5,7,9 "
+			 "arguments\n", argc);
+		return 0;
+	}
+	if ((wtype = lookupcty(argv[argc - 1])) == CT_NONE) {
+		dbprintf("unknown conversion type %s\n", argv[argc - 1]);
+		return 0;
+	}
+
+	for (i = mask = conmask = 0; i < (argc - 1) / 2; i++) {
+		c = lookupcty(argv[i * 2]);
+		if (c == CT_NONE) {
+			dbprintf("unknown conversion type %s\n", argv[i * 2]);
+			return 0;
+		}
+		if (c == wtype) {
+			dbprintf("result type same as argument\n");
+			return 0;
+		}
+		if (conmask & (1 << c)) {
+			dbprintf("conflicting conversion type %s\n",
+				argv[i * 2]);
+			return 0;
+		}
+		if (!getvalue(argv[i * 2 + 1], c, &cvals[c]))
+			return 0;
+		mask |= 1 << c;
+		conmask |= ~ctydescs[c].allowed;
+	}
+	if (cur_agno != NULLAGNUMBER && (conmask & M(AGNUMBER)) == 0) {
+		cvals[CT_AGNUMBER].agnumber = cur_agno;
+		mask |= M(AGNUMBER);
+		conmask |= ~ctydescs[CT_AGNUMBER].allowed;
+	}
+	v = 0;
+	for (c = (ctype_t)0; c < NCTS; c++) {
+		if (!(mask & (1 << c)))
+			continue;
+		v += bytevalue(c, &cvals[c]);
+	}
+	switch (wtype) {
+	case CT_AGBLOCK:
+		v = XFS_DADDR_TO_AGBNO(mp, v >> BBSHIFT);
+		break;
+	case CT_AGINO:
+		v = (v >> mp->m_sb.sb_inodelog) %
+		    (mp->m_sb.sb_agblocks << mp->m_sb.sb_inopblog);
+		break;
+	case CT_AGNUMBER:
+		v = XFS_DADDR_TO_AGNO(mp, v >> BBSHIFT);
+		break;
+	case CT_BBOFF:
+		v &= BBMASK;
+		break;
+	case CT_BLKOFF:
+		v &= mp->m_blockmask;
+		break;
+	case CT_BYTE:
+		break;
+	case CT_DADDR:
+		v >>= BBSHIFT;
+		break;
+	case CT_FSBLOCK:
+		v = XFS_DADDR_TO_FSB(mp, v >> BBSHIFT);
+		break;
+	case CT_INO:
+		v = XFS_AGINO_TO_INO(mp, XFS_DADDR_TO_AGNO(mp, v >> BBSHIFT),
+			(v >> mp->m_sb.sb_inodelog) %
+			(mp->m_sb.sb_agblocks << mp->m_sb.sb_inopblog));
+		break;
+	case CT_INOIDX:
+		v = (v >> mp->m_sb.sb_inodelog) & (mp->m_sb.sb_inopblock - 1);
+		break;
+	case CT_INOOFF:
+		v &= mp->m_sb.sb_inodesize - 1;
+		break;
+	case CT_NONE:
+	case NCTS:
+		/* NOTREACHED */
+	}
+	dbprintf("0x%llx (%llu)\n", v, v);
+	return 0;
+}
+
+void
+convert_init(void)
+{
+	add_command(&convert_cmd);
+}
+
+static int
+getvalue(char *s, ctype_t ctype, cval_t *val)
+{
+	char		*p;
+	__uint64_t	v;
+
+	v = strtoull(s, &p, 0);
+	if (*p != '\0') {
+		dbprintf("%s is not a number\n", s);
+		return 0;
+	}
+	switch (ctype) {
+	case CT_AGBLOCK:
+		val->agblock = (xfs_agblock_t)v;
+		break;
+	case CT_AGINO:
+		val->agino = (xfs_agino_t)v;
+		break;
+	case CT_AGNUMBER:
+		val->agnumber = (xfs_agnumber_t)v;
+		break;
+	case CT_BBOFF:
+		val->bboff = (int)v;
+		break;
+	case CT_BLKOFF:
+		val->blkoff = (int)v;
+		break;
+	case CT_BYTE:
+		val->byte = (__uint64_t)v;
+		break;
+	case CT_DADDR:
+		val->daddr = (xfs_daddr_t)v;
+		break;
+	case CT_FSBLOCK:
+		val->fsblock = (xfs_fsblock_t)v;
+		break;
+	case CT_INO:
+		val->ino = (xfs_ino_t)v;
+		break;
+	case CT_INOIDX:
+		val->inoidx = (int)v;
+		break;
+	case CT_INOOFF:
+		val->inooff = (int)v;
+		break;
+	case CT_NONE:
+	case NCTS:
+		/* NOTREACHED */
+	}
+	return 1;
+}
+
+static ctype_t
+lookupcty(char *ctyname)
+{
+	ctype_t		cty;
+	const char	**name;
+
+	for (cty = (ctype_t)0; cty < NCTS; cty++) {
+		for (name = ctydescs[cty].names; *name; name++) {
+			if (strcmp(ctyname, *name) == 0)
+				return cty;
+		}
+	}
+	return CT_NONE;
+}
diff --git a/db/convert.h b/db/convert.h
new file mode 100644
index 000000000..0ddbca4d1
--- /dev/null
+++ b/db/convert.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	convert_init(void);
diff --git a/db/data.c b/db/data.c
new file mode 100644
index 000000000..c53a5bbc4
--- /dev/null
+++ b/db/data.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+
+int			blkbb;
+xfs_agnumber_t		cur_agno = NULLAGNUMBER;
+int			exitcode;
+int                     flag_expert_mode = 0;
+int                     flag_readonly = 0;
+libxfs_init_t		xfsargs;
diff --git a/db/data.h b/db/data.h
new file mode 100644
index 000000000..77e51ffbc
--- /dev/null
+++ b/db/data.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern int		blkbb;
+extern xfs_agnumber_t	cur_agno;
+extern int		exitcode;
+extern int              flag_expert_mode;
+extern int              flag_readonly;
+extern int              flag_arch;
+extern libxfs_init_t	xfsargs;
diff --git a/db/dbread.c b/db/dbread.c
new file mode 100644
index 000000000..95032ba3b
--- /dev/null
+++ b/db/dbread.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bmap.h"
+#include "data.h"
+#include "dbread.h"
+#include "io.h"
+#include "mount.h"
+
+int
+dbread(void *buf, int nblocks, xfs_fileoff_t bno, int whichfork)
+{
+	bmap_ext_t	bm;
+	char		*bp;
+	xfs_dfiloff_t	eb;
+	xfs_dfiloff_t	end;
+	int		i;
+	int		nex;
+
+	nex = 1;
+	end = bno + nblocks;
+	bp = buf;
+	while (bno < end) {
+		bmap(bno, end - bno, whichfork, &nex, &bm);
+		if (nex == 0) {
+			bm.startoff = end;
+			bm.blockcount = 1;
+		}
+		if (bm.startoff > bno) {
+			eb = end < bm.startoff ? end : bm.startoff;
+			i = (int)XFS_FSB_TO_B(mp, eb - bno);
+			memset(bp, 0, i);
+			bp += i;
+			bno = eb;
+		}
+		if (bno == end)
+			break;
+		if (bno > bm.startoff) {
+			bm.blockcount -= bno - bm.startoff;
+			bm.startblock += bno - bm.startoff;
+			bm.startoff = bno;
+		}
+		if (bm.startoff + bm.blockcount > end)
+			bm.blockcount = end - bm.startoff;
+		i = read_bbs(XFS_FSB_TO_DADDR(mp, bm.startblock),
+			     (int)XFS_FSB_TO_BB(mp, bm.blockcount),
+			     (void **)&bp, NULL);
+		if (i)
+			return i;
+		bp += XFS_FSB_TO_B(mp, bm.blockcount);
+		bno += bm.blockcount;
+	}
+	return 0;
+}
diff --git a/db/dbread.h b/db/dbread.h
new file mode 100644
index 000000000..cab33971f
--- /dev/null
+++ b/db/dbread.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern int	dbread(void *buf, int nblocks, xfs_fileoff_t bno,
+		       int whichfork);
diff --git a/db/debug.c b/db/debug.c
new file mode 100644
index 000000000..093079d23
--- /dev/null
+++ b/db/debug.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "debug.h"
+#include "output.h"
+
+static int	debug_f(int argc, char **argv);
+
+static const cmdinfo_t	debug_cmd =
+	{ "debug", NULL, debug_f, 0, 1, 0, "[flagbits]",
+	  "set debug option bits", NULL };
+
+long	debug_state;
+
+static int
+debug_f(
+	int	argc,
+	char	**argv)
+{
+	char	*p;
+
+	if (argc > 1) {
+		debug_state = strtol(argv[1], &p, 0);
+		if (*p != '\0') {
+			dbprintf("bad value for debug %s\n", argv[1]);
+			return 0;
+		}
+	}
+	dbprintf("debug = %ld\n", debug_state);
+	return 0;
+}
+
+void
+debug_init(void)
+{
+	add_command(&debug_cmd);
+}
diff --git a/db/debug.h b/db/debug.h
new file mode 100644
index 000000000..1224b5953
--- /dev/null
+++ b/db/debug.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#define	DEBUG_FLIST	0x1
+
+extern long	debug_state;
+extern void	debug_init(void);
diff --git a/db/dir.c b/db/dir.c
new file mode 100644
index 000000000..32b03fa6c
--- /dev/null
+++ b/db/dir.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bit.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "dir.h"
+#include "io.h"
+#include "data.h"
+#include "mount.h"
+
+static int	dir_leaf_entries_count(void *obj, int startoff);
+static int	dir_leaf_hdr_count(void *obj, int startoff);
+static int	dir_leaf_name_count(void *obj, int startoff);
+static int	dir_leaf_namelist_count(void *obj, int startoff);
+static int	dir_leaf_namelist_offset(void *obj, int startoff, int idx);
+static int	dir_node_btree_count(void *obj, int startoff);
+static int	dir_node_hdr_count(void *obj, int startoff);
+
+const field_t	dir_hfld[] = {
+	{ "", FLDT_DIR, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	LOFF(f)	bitize(offsetof(xfs_dir_leafblock_t, f))
+#define	NOFF(f)	bitize(offsetof(xfs_da_intnode_t, f))
+const field_t	dir_flds[] = {
+	{ "lhdr", FLDT_DIR_LEAF_HDR, OI(LOFF(hdr)), dir_leaf_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "nhdr", FLDT_DIR_NODE_HDR, OI(NOFF(hdr)), dir_node_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "entries", FLDT_DIR_LEAF_ENTRY, OI(LOFF(entries)),
+	  dir_leaf_entries_count, FLD_ARRAY|FLD_COUNT, TYP_NONE },
+	{ "btree", FLDT_DIR_NODE_ENTRY, OI(NOFF(btree)),
+	  dir_node_btree_count, FLD_ARRAY|FLD_COUNT, TYP_NONE },
+	{ "namelist", FLDT_DIR_LEAF_NAME, dir_leaf_namelist_offset,
+	  dir_leaf_namelist_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ NULL }
+};
+
+#define	BOFF(f)	bitize(offsetof(xfs_da_blkinfo_t, f))
+const field_t	dir_blkinfo_flds[] = {
+	{ "forw", FLDT_DIRBLOCK, OI(BOFF(forw)), C1, 0, TYP_INODATA },
+	{ "back", FLDT_DIRBLOCK, OI(BOFF(back)), C1, 0, TYP_INODATA },
+	{ "magic", FLDT_UINT16X, OI(BOFF(magic)), C1, 0, TYP_NONE },
+	{ "pad", FLDT_UINT16X, OI(BOFF(pad)), C1, FLD_SKIPALL, TYP_NONE },
+	{ NULL }
+};
+
+#define	LEOFF(f)	bitize(offsetof(xfs_dir_leaf_entry_t, f))
+const field_t	dir_leaf_entry_flds[] = {
+	{ "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE },
+	{ "nameidx", FLDT_UINT16D, OI(LEOFF(nameidx)), C1, 0, TYP_NONE },
+	{ "namelen", FLDT_UINT8D, OI(LEOFF(namelen)), C1, 0, TYP_NONE },
+	{ "pad2", FLDT_UINT8X, OI(LEOFF(pad2)), C1, FLD_SKIPALL, TYP_NONE },
+	{ NULL }
+};
+
+#define	LHOFF(f)	bitize(offsetof(xfs_dir_leaf_hdr_t, f))
+const field_t	dir_leaf_hdr_flds[] = {
+	{ "info", FLDT_DIR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE },
+	{ "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE },
+	{ "namebytes", FLDT_UINT16D, OI(LHOFF(namebytes)), C1, 0, TYP_NONE },
+	{ "firstused", FLDT_UINT16D, OI(LHOFF(firstused)), C1, 0, TYP_NONE },
+	{ "holes", FLDT_UINT8D, OI(LHOFF(holes)), C1, 0, TYP_NONE },
+	{ "pad1", FLDT_UINT8X, OI(LHOFF(pad1)), C1, FLD_SKIPALL, TYP_NONE },
+	{ "freemap", FLDT_DIR_LEAF_MAP, OI(LHOFF(freemap)),
+	  CI(XFS_DIR_LEAF_MAPSIZE), FLD_ARRAY, TYP_NONE },
+	{ NULL }
+};
+
+#define	LMOFF(f)	bitize(offsetof(xfs_dir_leaf_map_t, f))
+const field_t	dir_leaf_map_flds[] = {
+	{ "base", FLDT_UINT16D, OI(LMOFF(base)), C1, 0, TYP_NONE },
+	{ "size", FLDT_UINT16D, OI(LMOFF(size)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	LNOFF(f)	bitize(offsetof(xfs_dir_leaf_name_t, f))
+const field_t	dir_leaf_name_flds[] = {
+	{ "inumber", FLDT_DIR_INO, OI(LNOFF(inumber)), C1, 0, TYP_INODE },
+	{ "name", FLDT_CHARNS, OI(LNOFF(name)), dir_leaf_name_count, FLD_COUNT,
+	  TYP_NONE },
+	{ NULL }
+};
+
+#define	EOFF(f)	bitize(offsetof(xfs_da_node_entry_t, f))
+const field_t	dir_node_entry_flds[] = {
+	{ "hashval", FLDT_UINT32X, OI(EOFF(hashval)), C1, 0, TYP_NONE },
+	{ "before", FLDT_DIRBLOCK, OI(EOFF(before)), C1, 0, TYP_INODATA },
+	{ NULL }
+};
+
+#define	HOFF(f)	bitize(offsetof(xfs_da_node_hdr_t, f))
+const field_t	dir_node_hdr_flds[] = {
+	{ "info", FLDT_DIR_BLKINFO, OI(HOFF(info)), C1, 0, TYP_NONE },
+	{ "count", FLDT_UINT16D, OI(HOFF(count)), C1, 0, TYP_NONE },
+	{ "level", FLDT_UINT16D, OI(HOFF(level)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+/*ARGSUSED*/
+static int
+dir_leaf_entries_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir_leafblock_t	*block;
+	
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)
+		return 0;
+	return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir_leaf_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir_leafblock_t	*block;
+	
+	ASSERT(startoff == 0);
+	block = obj;
+	return INT_GET(block->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC;
+}
+
+static int
+dir_leaf_name_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir_leafblock_t	*block;
+	xfs_dir_leaf_entry_t	*e;
+	int			i;
+	int			off;
+
+	ASSERT(bitoffs(startoff) == 0);
+	off = byteize(startoff);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)
+		return 0;
+	for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+		e = &block->entries[i];
+		if (INT_GET(e->nameidx, ARCH_CONVERT) == off)
+			return e->namelen;
+	}
+	return 0;
+}
+
+/*ARGSUSED*/
+int
+dir_leaf_name_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir_leafblock_t	*block;
+	xfs_dir_leaf_entry_t	*e;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)
+		return 0;
+	e = &block->entries[idx];
+	return bitize((int)XFS_DIR_LEAF_ENTSIZE_BYENTRY(e));
+}
+
+/*ARGSUSED*/
+static int
+dir_leaf_namelist_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir_leafblock_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)
+		return 0;
+	return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir_leaf_namelist_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir_leafblock_t	*block;
+	xfs_dir_leaf_entry_t	*e;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	e = &block->entries[idx];
+	return bitize(INT_GET(e->nameidx, ARCH_CONVERT));
+}
+
+/*ARGSUSED*/
+static int
+dir_node_btree_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_da_intnode_t	*block;
+
+	ASSERT(startoff == 0);		/* this is a base structure */
+	block = obj;
+	if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)
+		return 0;
+	return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir_node_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_da_intnode_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	return INT_GET(block->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC;
+}
+
+/*ARGSUSED*/
+int
+dir_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/dir.h b/db/dir.h
new file mode 100644
index 000000000..ee28b4477
--- /dev/null
+++ b/db/dir.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t	dir_flds[];
+extern const field_t	dir_hfld[];
+extern const field_t	dir_blkinfo_flds[];
+extern const field_t	dir_leaf_entry_flds[];
+extern const field_t	dir_leaf_hdr_flds[];
+extern const field_t	dir_leaf_map_flds[];
+extern const field_t	dir_leaf_name_flds[];
+extern const field_t	dir_node_entry_flds[];
+extern const field_t	dir_node_hdr_flds[];
+
+extern int	dir_leaf_name_size(void *obj, int startoff, int idx);
+extern int	dir_size(void *obj, int startoff, int idx);
diff --git a/db/dir2.c b/db/dir2.c
new file mode 100644
index 000000000..b8b440cc5
--- /dev/null
+++ b/db/dir2.c
@@ -0,0 +1,727 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bit.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "dir.h"
+#include "dir2.h"
+#include "mount.h"
+#include "data.h"
+
+static int	dir2_block_hdr_count(void *obj, int startoff);
+static int	dir2_block_leaf_count(void *obj, int startoff);
+static int	dir2_block_leaf_offset(void *obj, int startoff, int idx);
+static int	dir2_block_tail_count(void *obj, int startoff);
+static int	dir2_block_tail_offset(void *obj, int startoff, int idx);
+static int	dir2_block_u_count(void *obj, int startoff);
+static int	dir2_block_u_offset(void *obj, int startoff, int idx);
+static int	dir2_data_union_freetag_count(void *obj, int startoff);
+static int	dir2_data_union_inumber_count(void *obj, int startoff);
+static int	dir2_data_union_length_count(void *obj, int startoff);
+static int	dir2_data_union_name_count(void *obj, int startoff);
+static int	dir2_data_union_namelen_count(void *obj, int startoff);
+static int	dir2_data_union_tag_count(void *obj, int startoff);
+static int	dir2_data_union_tag_offset(void *obj, int startoff, int idx);
+static int	dir2_data_hdr_count(void *obj, int startoff);
+static int	dir2_data_u_count(void *obj, int startoff);
+static int	dir2_data_u_offset(void *obj, int startoff, int idx);
+static int	dir2_free_bests_count(void *obj, int startoff);
+static int	dir2_free_hdr_count(void *obj, int startoff);
+static int	dir2_leaf_bests_count(void *obj, int startoff);
+static int	dir2_leaf_bests_offset(void *obj, int startoff, int idx);
+static int	dir2_leaf_ents_count(void *obj, int startoff);
+static int	dir2_leaf_hdr_count(void *obj, int startoff);
+static int	dir2_leaf_tail_count(void *obj, int startoff);
+static int	dir2_leaf_tail_offset(void *obj, int startoff, int idx);
+static int	dir2_node_btree_count(void *obj, int startoff);
+static int	dir2_node_hdr_count(void *obj, int startoff);
+
+const field_t	dir2_hfld[] = {
+	{ "", FLDT_DIR2, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	BOFF(f)	bitize(offsetof(xfs_dir2_block_t, f))
+#define	DOFF(f)	bitize(offsetof(xfs_dir2_data_t, f))
+#define	FOFF(f)	bitize(offsetof(xfs_dir2_free_t, f))
+#define	LOFF(f)	bitize(offsetof(xfs_dir2_leaf_t, f))
+#define	NOFF(f)	bitize(offsetof(xfs_da_intnode_t, f))
+const field_t	dir2_flds[] = {
+	{ "bhdr", FLDT_DIR2_DATA_HDR, OI(BOFF(hdr)), dir2_block_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "bu", FLDT_DIR2_DATA_UNION, dir2_block_u_offset, dir2_block_u_count,
+	  FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ "bleaf", FLDT_DIR2_LEAF_ENTRY, dir2_block_leaf_offset,
+	  dir2_block_leaf_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ "btail", FLDT_DIR2_BLOCK_TAIL, dir2_block_tail_offset,
+	  dir2_block_tail_count, FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ "dhdr", FLDT_DIR2_DATA_HDR, OI(DOFF(hdr)), dir2_data_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "du", FLDT_DIR2_DATA_UNION, dir2_data_u_offset, dir2_data_u_count,
+	  FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ "lhdr", FLDT_DIR2_LEAF_HDR, OI(LOFF(hdr)), dir2_leaf_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "lbests", FLDT_DIR2_DATA_OFF, dir2_leaf_bests_offset,
+	  dir2_leaf_bests_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ "lents", FLDT_DIR2_LEAF_ENTRY, OI(LOFF(ents)), dir2_leaf_ents_count,
+	  FLD_ARRAY|FLD_COUNT, TYP_NONE },
+	{ "ltail", FLDT_DIR2_LEAF_TAIL, dir2_leaf_tail_offset,
+	  dir2_leaf_tail_count, FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ "nhdr", FLDT_DIR_NODE_HDR, OI(NOFF(hdr)), dir2_node_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "nbtree", FLDT_DIR_NODE_ENTRY, OI(NOFF(btree)), dir2_node_btree_count,
+	  FLD_ARRAY|FLD_COUNT, TYP_NONE },
+	{ "fhdr", FLDT_DIR2_FREE_HDR, OI(FOFF(hdr)), dir2_free_hdr_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "fbests", FLDT_DIR2_DATA_OFFNZ, OI(FOFF(bests)),
+	  dir2_free_bests_count, FLD_ARRAY|FLD_COUNT, TYP_NONE },
+	{ NULL }
+};
+
+#define	BTOFF(f)	bitize(offsetof(xfs_dir2_block_tail_t, f))
+const field_t	dir2_block_tail_flds[] = {
+	{ "count", FLDT_UINT32D, OI(BTOFF(count)), C1, 0, TYP_NONE },
+	{ "stale", FLDT_UINT32D, OI(BTOFF(stale)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	DFOFF(f)	bitize(offsetof(xfs_dir2_data_free_t, f))
+const field_t	dir2_data_free_flds[] = {
+	{ "offset", FLDT_DIR2_DATA_OFF, OI(DFOFF(offset)), C1, 0, TYP_NONE },
+	{ "length", FLDT_DIR2_DATA_OFF, OI(DFOFF(length)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	DHOFF(f)	bitize(offsetof(xfs_dir2_data_hdr_t, f))
+const field_t	dir2_data_hdr_flds[] = {
+	{ "magic", FLDT_UINT32X, OI(DHOFF(magic)), C1, 0, TYP_NONE },
+	{ "bestfree", FLDT_DIR2_DATA_FREE, OI(DHOFF(bestfree)),
+	  CI(XFS_DIR2_DATA_FD_COUNT), FLD_ARRAY, TYP_NONE },
+	{ NULL }
+};
+
+#define	DEOFF(f)	bitize(offsetof(xfs_dir2_data_entry_t, f))
+#define	DUOFF(f)	bitize(offsetof(xfs_dir2_data_unused_t, f))
+const field_t	dir2_data_union_flds[] = {
+	{ "freetag", FLDT_UINT16X, OI(DUOFF(freetag)),
+	  dir2_data_union_freetag_count, FLD_COUNT, TYP_NONE },
+	{ "inumber", FLDT_INO, OI(DEOFF(inumber)),
+	  dir2_data_union_inumber_count, FLD_COUNT, TYP_INODE },
+	{ "length", FLDT_DIR2_DATA_OFF, OI(DUOFF(length)),
+	  dir2_data_union_length_count, FLD_COUNT, TYP_NONE },
+	{ "namelen", FLDT_UINT8D, OI(DEOFF(namelen)),
+	  dir2_data_union_namelen_count, FLD_COUNT, TYP_NONE },
+	{ "name", FLDT_CHARNS, OI(DEOFF(name)), dir2_data_union_name_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "tag", FLDT_DIR2_DATA_OFF, dir2_data_union_tag_offset,
+	  dir2_data_union_tag_count, FLD_OFFSET|FLD_COUNT, TYP_NONE },
+	{ NULL }
+};
+
+#define	LEOFF(f)	bitize(offsetof(xfs_dir2_leaf_entry_t, f))
+const field_t	dir2_leaf_entry_flds[] = {
+	{ "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE },
+	{ "address", FLDT_UINT32X, OI(LEOFF(address)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	LHOFF(f)	bitize(offsetof(xfs_dir2_leaf_hdr_t, f))
+const field_t	dir2_leaf_hdr_flds[] = {
+	{ "info", FLDT_DIR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE },
+	{ "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE },
+	{ "stale", FLDT_UINT16D, OI(LHOFF(stale)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	LTOFF(f)	bitize(offsetof(xfs_dir2_leaf_tail_t, f))
+const field_t	dir2_leaf_tail_flds[] = {
+	{ "bestcount", FLDT_UINT32D, OI(LTOFF(bestcount)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	FHOFF(f)	bitize(offsetof(xfs_dir2_free_hdr_t, f))
+const field_t	dir2_free_hdr_flds[] = {
+	{ "magic", FLDT_UINT32X, OI(FHOFF(magic)), C1, 0, TYP_NONE },
+	{ "firstdb", FLDT_INT32D, OI(FHOFF(firstdb)), C1, 0, TYP_NONE },
+	{ "nvalid", FLDT_INT32D, OI(FHOFF(nvalid)), C1, 0, TYP_NONE },
+	{ "nused", FLDT_INT32D, OI(FHOFF(nused)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+/*ARGSUSED*/
+static int
+dir2_block_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	return INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_leaf_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_block_t	*block;
+	xfs_dir2_block_tail_t	*btp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC)
+		return 0;
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	return INT_GET(btp->count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_leaf_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_block_t	*block;
+	xfs_dir2_block_tail_t	*btp;
+	xfs_dir2_leaf_entry_t	*lep;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT) + idx;
+	return bitize((int)((char *)lep - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_tail_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	return INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_tail_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_block_t	*block;
+	xfs_dir2_block_tail_t	*btp;
+
+	ASSERT(startoff == 0);
+	ASSERT(idx == 0);
+	block = obj;
+	ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	return bitize((int)((char *)btp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_u_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_block_t	*block;
+	xfs_dir2_block_tail_t	*btp;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+	char			*endptr;
+	int			i;
+	char			*ptr;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC)
+		return 0;
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	ptr = (char *)block->u;
+	endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	for (i = 0; ptr < endptr; i++) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+		else {
+			dep = (xfs_dir2_data_entry_t *)ptr;
+			ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		}
+	}
+	return i;
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_u_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_block_t	*block;
+	xfs_dir2_block_tail_t	*btp;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+				/*REFERENCED*/
+	char			*endptr;
+	int			i;
+	char			*ptr;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	ptr = (char *)block->u;
+	endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	for (i = 0; i < idx; i++) {
+		ASSERT(ptr < endptr);
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+		else {
+			dep = (xfs_dir2_data_entry_t *)ptr;
+			ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		}
+	}
+	return bitize((int)(ptr - (char *)block));
+}
+
+static int
+dir2_data_union_freetag_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_data_unused_t	*dup;
+	char			*end;
+
+	ASSERT(bitoffs(startoff) == 0);
+	dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+	end = (char *)&dup->freetag + sizeof(dup->freetag);
+	return end <= (char *)obj + mp->m_dirblksize &&
+	       INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG;
+}
+
+static int
+dir2_data_union_inumber_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+	char			*end;
+
+	ASSERT(bitoffs(startoff) == 0);
+	dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+	dep = (xfs_dir2_data_entry_t *)dup;
+	end = (char *)&dep->inumber + sizeof(dep->inumber);
+	return end <= (char *)obj + mp->m_dirblksize &&
+	       INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG;
+}
+
+static int
+dir2_data_union_length_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_data_unused_t	*dup;
+	char			*end;
+
+	ASSERT(bitoffs(startoff) == 0);
+	dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+	end = (char *)&dup->length + sizeof(dup->length);
+	return end <= (char *)obj + mp->m_dirblksize &&
+	       INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG;
+}
+
+static int
+dir2_data_union_name_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+	char			*end;
+
+	ASSERT(bitoffs(startoff) == 0);
+	dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+	dep = (xfs_dir2_data_entry_t *)dup;
+	end = (char *)&dep->namelen + sizeof(dep->namelen);
+	if (end >= (char *)obj + mp->m_dirblksize ||
+	    INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+		return 0;
+	end = (char *)&dep->name[0] + dep->namelen;
+	return end <= (char *)obj + mp->m_dirblksize ? dep->namelen : 0;
+}
+
+static int
+dir2_data_union_namelen_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+	char			*end;
+
+	ASSERT(bitoffs(startoff) == 0);
+	dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+	dep = (xfs_dir2_data_entry_t *)dup;
+	end = (char *)&dep->namelen + sizeof(dep->namelen);
+	return end <= (char *)obj + mp->m_dirblksize &&
+	       INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG;
+}
+
+static int
+dir2_data_union_tag_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+	char			*end;
+	xfs_dir2_data_off_t	*tagp;
+
+	ASSERT(bitoffs(startoff) == 0);
+	dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+	dep = (xfs_dir2_data_entry_t *)dup;
+	end = (char *)&dup->freetag + sizeof(dup->freetag);
+	if (end > (char *)obj + mp->m_dirblksize)
+		return 0;
+	if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+		end = (char *)&dup->length + sizeof(dup->length);
+		if (end > (char *)obj + mp->m_dirblksize)
+			return 0;
+		tagp = XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT);
+	} else {
+		end = (char *)&dep->namelen + sizeof(dep->namelen);
+		if (end > (char *)obj + mp->m_dirblksize)
+			return 0;
+		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	}
+	end = (char *)tagp + sizeof(*tagp);
+	return end <= (char *)obj + mp->m_dirblksize;
+}
+
+/*ARGSUSED*/
+static int
+dir2_data_union_tag_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+	if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+		return bitize((int)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) -
+				    (char *)dup));
+	dep = (xfs_dir2_data_entry_t *)dup;
+	return bitize((int)((char *)XFS_DIR2_DATA_ENTRY_TAG_P(dep) -
+			    (char *)dep));
+}
+
+/*ARGSUSED*/
+static int
+dir2_data_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_data_t		*data;
+
+	ASSERT(startoff == 0);
+	data = obj;
+	return INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_data_u_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_data_t		*data;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+	char			*endptr;
+	int			i;
+	char			*ptr;
+
+	ASSERT(startoff == 0);
+	data = obj;
+	if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC)
+		return 0;
+	ptr = (char *)data->u;
+	endptr = (char *)data + mp->m_dirblksize;
+	for (i = 0; ptr < endptr; i++) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+		else {
+			dep = (xfs_dir2_data_entry_t *)ptr;
+			ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		}
+	}
+	return i;
+}
+
+/*ARGSUSED*/
+static int
+dir2_data_u_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_data_t		*data;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+				/*REFERENCED*/
+	char			*endptr;
+	int			i;
+	char			*ptr;
+
+	ASSERT(startoff == 0);
+	data = obj;
+	ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+	ptr = (char *)data->u;
+	endptr = (char *)data + mp->m_dirblksize;
+	for (i = 0; i < idx; i++) {
+		ASSERT(ptr < endptr);
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+		else {
+			dep = (xfs_dir2_data_entry_t *)ptr;
+			ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		}
+	}
+	return bitize((int)(ptr - (char *)data));
+}
+
+/*ARGSUSED*/
+int
+dir2_data_union_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+	if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+		return bitize(INT_GET(dup->length, ARCH_CONVERT));
+	else {
+		dep = (xfs_dir2_data_entry_t *)dup;
+		return bitize(XFS_DIR2_DATA_ENTSIZE(dep->namelen));
+	}
+}
+
+/*ARGSUSED*/
+static int
+dir2_free_bests_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_free_t		*free;
+
+	ASSERT(startoff == 0);
+	free = obj;
+	if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC)
+		return 0;
+	return INT_GET(free->hdr.nvalid, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_free_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_free_t		*free;
+
+	ASSERT(startoff == 0);
+	free = obj;
+	return INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_bests_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_leaf_t		*leaf;
+	xfs_dir2_leaf_tail_t	*ltp;
+
+	ASSERT(startoff == 0);
+	leaf = obj;
+	if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC)
+		return 0;
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	return INT_GET(ltp->bestcount, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_bests_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_data_off_t	*lbp;
+	xfs_dir2_leaf_t		*leaf;
+	xfs_dir2_leaf_tail_t	*ltp;
+
+	ASSERT(startoff == 0);
+	leaf = obj;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	lbp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + idx;
+	return bitize((int)((char *)lbp - (char *)leaf));
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_ents_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_leaf_t		*leaf;
+
+	ASSERT(startoff == 0);
+	leaf = obj;
+	if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC &&
+	    INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC)
+		return 0;
+	return INT_GET(leaf->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_leaf_t		*leaf;
+	
+	ASSERT(startoff == 0);
+	leaf = obj;
+	return INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
+	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_tail_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_leaf_t		*leaf;
+
+	ASSERT(startoff == 0);
+	leaf = obj;
+	return INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_tail_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_leaf_t		*leaf;
+	xfs_dir2_leaf_tail_t	*ltp;
+
+	ASSERT(startoff == 0);
+	ASSERT(idx == 0);
+	leaf = obj;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	return bitize((int)((char *)ltp - (char *)leaf));
+}
+
+/*ARGSUSED*/
+static int
+dir2_node_btree_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_da_intnode_t	*node;
+
+	ASSERT(startoff == 0);
+	node = obj;
+	if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)
+		return 0;
+	return INT_GET(node->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_node_hdr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_da_intnode_t	*node;
+
+	ASSERT(startoff == 0);
+	node = obj;
+	return INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC;
+}
+
+/*ARGSUSED*/
+int
+dir2_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_dirblksize);
+}
diff --git a/db/dir2.h b/db/dir2.h
new file mode 100644
index 000000000..a1516c99e
--- /dev/null
+++ b/db/dir2.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t	dir2_flds[];
+extern const field_t	dir2_hfld[];
+extern const field_t	dir2_block_tail_flds[];
+extern const field_t	dir2_data_free_flds[];
+extern const field_t	dir2_data_hdr_flds[];
+extern const field_t	dir2_data_union_flds[];
+extern const field_t	dir2_free_hdr_flds[];
+extern const field_t	dir2_leaf_entry_flds[];
+extern const field_t	dir2_leaf_hdr_flds[];
+extern const field_t	dir2_leaf_tail_flds[];
+
+extern int	dir2_data_union_size(void *obj, int startoff, int idx);
+extern int	dir2_size(void *obj, int startoff, int idx);
diff --git a/db/dir2sf.c b/db/dir2sf.c
new file mode 100644
index 000000000..9d8c35f48
--- /dev/null
+++ b/db/dir2sf.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bit.h"
+#include "dir2sf.h"
+
+static int	dir2_inou_i4_count(void *obj, int startoff);
+static int	dir2_inou_i8_count(void *obj, int startoff);
+static int	dir2_sf_entry_inumber_offset(void *obj, int startoff, int idx);
+static int	dir2_sf_entry_name_count(void *obj, int startoff);
+static int	dir2_sf_list_count(void *obj, int startoff);
+static int	dir2_sf_list_offset(void *obj, int startoff, int idx);
+
+#define	OFF(f)	bitize(offsetof(xfs_dir2_sf_t, f))
+const field_t	dir2sf_flds[] = {
+	{ "hdr", FLDT_DIR2_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE },
+	{ "list", FLDT_DIR2_SF_ENTRY, dir2_sf_list_offset, dir2_sf_list_count,
+	  FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ NULL }
+};
+
+#define UOFF(f)	bitize(offsetof(xfs_dir2_inou_t, f))
+const field_t	dir2_inou_flds[] = {
+	{ "i8", FLDT_DIR2_INO8, OI(UOFF(i8)), dir2_inou_i8_count, FLD_COUNT,
+	  TYP_INODE },
+	{ "i4", FLDT_DIR2_INO4, OI(UOFF(i4)), dir2_inou_i4_count, FLD_COUNT,
+	  TYP_INODE },
+	{ NULL }
+};
+
+#define	HOFF(f)	bitize(offsetof(xfs_dir2_sf_hdr_t, f))
+const field_t	dir2_sf_hdr_flds[] = {
+	{ "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE },
+	{ "i8count", FLDT_UINT8D, OI(HOFF(i8count)), C1, 0, TYP_NONE },
+	{ "parent", FLDT_DIR2_INOU, OI(HOFF(parent)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	EOFF(f)	bitize(offsetof(xfs_dir2_sf_entry_t, f))
+const field_t	dir2_sf_entry_flds[] = {
+	{ "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE },
+	{ "offset", FLDT_DIR2_SF_OFF, OI(EOFF(offset)), C1, 0, TYP_NONE },
+	{ "name", FLDT_CHARNS, OI(EOFF(name)), dir2_sf_entry_name_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "inumber", FLDT_DIR2_INOU, dir2_sf_entry_inumber_offset, C1,
+	  FLD_OFFSET, TYP_NONE },
+	{ NULL }
+};
+
+/*ARGSUSED*/
+static int
+dir2_inou_i4_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dir2_sf_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf;
+	return sf->hdr.i8count == 0;
+}
+
+/*ARGSUSED*/
+static int
+dir2_inou_i8_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dir2_sf_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf;
+	return sf->hdr.i8count != 0;
+}
+
+/*ARGSUSED*/
+int
+dir2_inou_size(
+	void		*obj,
+	int		startoff,
+	int		idx)
+{
+	xfs_dir2_sf_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf;
+	return bitize(sf->hdr.i8count ?
+		      (uint)sizeof(xfs_dir2_ino8_t) :
+		      (uint)sizeof(xfs_dir2_ino4_t));
+}
+
+static int
+dir2_sf_entry_name_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_sf_entry_t	*e;
+
+	ASSERT(bitoffs(startoff) == 0);
+	e = (xfs_dir2_sf_entry_t *)((char *)obj + byteize(startoff));
+	return e->namelen;
+}
+
+/*ARGSUSED*/
+static int
+dir2_sf_entry_inumber_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_sf_entry_t	*e;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	e = (xfs_dir2_sf_entry_t *)((char *)obj + byteize(startoff));
+	return bitize((int)((char *)XFS_DIR2_SF_INUMBERP(e) - (char *)e));
+}
+
+int
+dir2_sf_entry_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_sf_entry_t	*e;
+	int			i;
+	xfs_dir2_sf_t		*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+	e = XFS_DIR2_SF_FIRSTENTRY(sf);
+	for (i = 0; i < idx; i++)
+		e = XFS_DIR2_SF_NEXTENTRY(sf, e);
+	return bitize((int)XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, e));
+}
+
+/*ARGSUSED*/
+int
+dir2_sf_hdr_size(
+	void		*obj,
+	int		startoff,
+	int		idx)
+{
+	xfs_dir2_sf_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+	return bitize(XFS_DIR2_SF_HDR_SIZE(sf->hdr.i8count));
+}
+
+static int
+dir2_sf_list_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir2_sf_t		*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+	return sf->hdr.count;
+}
+
+static int
+dir2_sf_list_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_sf_entry_t	*e;
+	int			i;
+	xfs_dir2_sf_t		*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+	e = XFS_DIR2_SF_FIRSTENTRY(sf);
+	for (i = 0; i < idx; i++)
+		e = XFS_DIR2_SF_NEXTENTRY(sf, e);
+	return bitize((int)((char *)e - (char *)sf));
+}
+
+/*ARGSUSED*/
+int
+dir2sf_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir2_sf_entry_t	*e;
+	int			i;
+	xfs_dir2_sf_t		*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+	e = XFS_DIR2_SF_FIRSTENTRY(sf);
+	for (i = 0; i < sf->hdr.count; i++)
+		e = XFS_DIR2_SF_NEXTENTRY(sf, e);
+	return bitize((int)((char *)e - (char *)sf));
+}
diff --git a/db/dir2sf.h b/db/dir2sf.h
new file mode 100644
index 000000000..f720c8b83
--- /dev/null
+++ b/db/dir2sf.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t	dir2sf_flds[];
+extern const field_t	dir2_inou_flds[];
+extern const field_t	dir2_sf_hdr_flds[];
+extern const field_t	dir2_sf_entry_flds[];
+
+extern int	dir2sf_size(void *obj, int startoff, int idx);
+extern int	dir2_inou_size(void *obj, int startoff, int idx);
+extern int	dir2_sf_entry_size(void *obj, int startoff, int idx);
+extern int	dir2_sf_hdr_size(void *obj, int startoff, int idx);
diff --git a/db/dirshort.c b/db/dirshort.c
new file mode 100644
index 000000000..4a6f4f45f
--- /dev/null
+++ b/db/dirshort.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bit.h"
+#include "dirshort.h"
+
+static int	dir_sf_entry_name_count(void *obj, int startoff);
+static int	dir_shortform_list_count(void *obj, int startoff);
+static int	dir_shortform_list_offset(void *obj, int startoff, int idx);
+
+#define	OFF(f)	bitize(offsetof(xfs_dir_shortform_t, f))
+const field_t	dir_shortform_flds[] = {
+	{ "hdr", FLDT_DIR_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE },
+	{ "list", FLDT_DIR_SF_ENTRY, dir_shortform_list_offset,
+	  dir_shortform_list_count, FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ NULL }
+};
+
+#define	HOFF(f)	bitize(offsetof(xfs_dir_sf_hdr_t, f))
+const field_t	dir_sf_hdr_flds[] = {
+	{ "parent", FLDT_DIR_INO, OI(HOFF(parent)), C1, 0, TYP_INODE },
+	{ "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	EOFF(f)	bitize(offsetof(xfs_dir_sf_entry_t, f))
+const field_t	dir_sf_entry_flds[] = {
+	{ "inumber", FLDT_DIR_INO, OI(EOFF(inumber)), C1, 0, TYP_INODE },
+	{ "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE },
+	{ "name", FLDT_CHARNS, OI(EOFF(name)), dir_sf_entry_name_count,
+	  FLD_COUNT, TYP_NONE },
+	{ NULL }
+};
+
+static int
+dir_sf_entry_name_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir_sf_entry_t	*e;
+	
+	ASSERT(bitoffs(startoff) == 0);
+	e = (xfs_dir_sf_entry_t *)((char *)obj + byteize(startoff));
+	return e->namelen;
+}
+
+int
+dir_sf_entry_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir_sf_entry_t	*e;
+	int			i;
+	xfs_dir_shortform_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff));
+	e = &sf->list[0];
+	for (i = 0; i < idx; i++)
+		e = XFS_DIR_SF_NEXTENTRY(e);
+	return bitize((int)XFS_DIR_SF_ENTSIZE_BYENTRY(e));
+}
+
+static int
+dir_shortform_list_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dir_shortform_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff));
+	return sf->hdr.count;
+}
+
+static int
+dir_shortform_list_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir_sf_entry_t	*e;
+	int			i;
+	xfs_dir_shortform_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff));
+	e = &sf->list[0];
+	for (i = 0; i < idx; i++)
+		e = XFS_DIR_SF_NEXTENTRY(e);
+	return bitize((int)((char *)e - (char *)sf));
+}
+
+int
+dirshort_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_dir_sf_entry_t	*e;
+	int			i;
+	xfs_dir_shortform_t	*sf;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(idx == 0);
+	sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff));
+	e = &sf->list[0];
+	for (i = 0; i < sf->hdr.count; i++)
+		e = XFS_DIR_SF_NEXTENTRY(e);
+	return bitize((int)((char *)e - (char *)sf));
+}
diff --git a/db/dirshort.h b/db/dirshort.h
new file mode 100644
index 000000000..2d50efbf6
--- /dev/null
+++ b/db/dirshort.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t	dir_sf_entry_flds[];
+extern const field_t	dir_sf_hdr_flds[];
+extern const field_t	dir_shortform_flds[];
+extern const field_t	dirshort_hfld[];
+
+extern int	dir_sf_entry_size(void *obj, int startoff, int idx);
+extern int	dirshort_size(void *obj, int startoff, int idx);
diff --git a/db/dquot.c b/db/dquot.c
new file mode 100644
index 000000000..be22d81e1
--- /dev/null
+++ b/db/dquot.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include "bit.h"
+#include "bmap.h"
+#include "command.h"
+#include "data.h"
+#include "dquot.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "io.h"
+#include "mount.h"
+#include "output.h"
+
+static int	dquot_f(int argc, char **argv);
+static void	dquot_help(void);
+
+static const cmdinfo_t	dquot_cmd =
+	{ "dquot", NULL, dquot_f, 1, 2, 1, "[project|user id]",
+	  "set current address to project or user quota block", dquot_help };
+
+const field_t	dqblk_hfld[] = {
+	{ "", FLDT_DQBLK, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	DDOFF(f)	bitize(offsetof(xfs_dqblk_t, dd_ ## f))
+#define	DDSZC(f)	szcount(xfs_dqblk_t, dd_ ## f)
+const field_t	dqblk_flds[] = {
+	{ "diskdq", FLDT_DISK_DQUOT, OI(DDOFF(diskdq)), C1, 0, TYP_NONE },
+	{ "fill", FLDT_CHARS, OI(DDOFF(fill)), CI(DDSZC(fill)), FLD_SKIPALL,
+	  TYP_NONE },
+	{ NULL }
+};
+
+#define	DOFF(f)		bitize(offsetof(xfs_disk_dquot_t, d_ ## f))
+const field_t	disk_dquot_flds[] = {
+	{ "magic", FLDT_UINT16X, OI(DOFF(magic)), C1, 0, TYP_NONE },
+	{ "version", FLDT_UINT8X, OI(DOFF(version)), C1, 0, TYP_NONE },
+	{ "flags", FLDT_UINT8X, OI(DOFF(flags)), C1, 0, TYP_NONE },
+	{ "id", FLDT_DQID, OI(DOFF(id)), C1, 0, TYP_NONE },
+	{ "blk_hardlimit", FLDT_QCNT, OI(DOFF(blk_hardlimit)), C1, 0,
+	  TYP_NONE },
+	{ "blk_softlimit", FLDT_QCNT, OI(DOFF(blk_softlimit)), C1, 0,
+	  TYP_NONE },
+	{ "ino_hardlimit", FLDT_QCNT, OI(DOFF(ino_hardlimit)), C1, 0,
+	  TYP_NONE },
+	{ "ino_softlimit", FLDT_QCNT, OI(DOFF(ino_softlimit)), C1, 0,
+	  TYP_NONE },
+	{ "bcount", FLDT_QCNT, OI(DOFF(bcount)), C1, 0, TYP_NONE },
+	{ "icount", FLDT_QCNT, OI(DOFF(icount)), C1, 0, TYP_NONE },
+	{ "itimer", FLDT_INT32D, OI(DOFF(itimer)), C1, 0, TYP_NONE },
+	{ "btimer", FLDT_INT32D, OI(DOFF(btimer)), C1, 0, TYP_NONE },
+	{ "iwarns", FLDT_QWARNCNT, OI(DOFF(iwarns)), C1, 0, TYP_NONE },
+	{ "bwarns", FLDT_QWARNCNT, OI(DOFF(bwarns)), C1, 0, TYP_NONE },
+	{ "pad0", FLDT_INT32D, OI(DOFF(pad0)), C1, FLD_SKIPALL, TYP_NONE },
+	{ "rtb_hardlimit", FLDT_QCNT, OI(DOFF(rtb_hardlimit)), C1, 0,
+	  TYP_NONE },
+	{ "rtb_softlimit", FLDT_QCNT, OI(DOFF(rtb_softlimit)), C1, 0,
+	  TYP_NONE },
+	{ "rtbcount", FLDT_QCNT, OI(DOFF(rtbcount)), C1, 0, TYP_NONE },
+	{ "rtbtimer", FLDT_INT32D, OI(DOFF(rtbtimer)), C1, 0, TYP_NONE },
+	{ "rtbwarns", FLDT_QWARNCNT, OI(DOFF(rtbwarns)), C1, 0, TYP_NONE },
+	{ "pad", FLDT_UINT16X, OI(DOFF(pad)), C1, FLD_SKIPALL, TYP_NONE },
+	{ NULL }
+};
+
+static void
+dquot_help(void)
+{
+}
+
+static int
+dquot_f(
+	int		argc,
+	char		**argv)
+{
+	bmap_ext_t	bm;
+	int		c;
+	int		doproj;
+	xfs_dqid_t	id;
+	xfs_ino_t	ino;
+	int		nex;
+	char		*p;
+	int		perblock;
+	xfs_fileoff_t	qbno;
+	int		qoff;
+	char		*s;
+
+	doproj = optind = 0;
+	while ((c = getopt(argc, argv, "pu")) != EOF) {
+		switch (c) {
+		case 'p':
+			doproj = 1;
+			break;
+		case 'u':
+			doproj = 0;
+			break;
+		default:
+			dbprintf("bad option for dquot command\n");
+			return 0;
+		}
+	}
+	s = doproj ? "project" : "user";
+	if (optind != argc - 1) {
+		dbprintf("dquot command requires one %s id argument\n", s);
+		return 0;
+	}
+	ino = doproj ? mp->m_sb.sb_pquotino : mp->m_sb.sb_uquotino;
+	if (ino == 0 || ino == NULLFSINO) {
+		dbprintf("no %s quota inode present\n", s);
+		return 0;
+	}
+	id = (xfs_dqid_t)strtol(argv[optind], &p, 0);
+	if (*p != '\0') {
+		dbprintf("bad %s id for dquot %s\n", s, argv[optind]);
+		return 0;
+	}
+	perblock = (int)(mp->m_sb.sb_blocksize / sizeof(xfs_dqblk_t));
+	qbno = (xfs_fileoff_t)(id / perblock);
+	qoff = (int)(id % perblock);
+	push_cur();
+	set_cur_inode(ino);
+	nex = 1;
+	bmap(qbno, 1, XFS_DATA_FORK, &nex, &bm);
+	pop_cur();
+	if (nex == 0) {
+		dbprintf("no %s quota data for id %d\n", s, id);
+		return 0;
+	}
+	set_cur(&typtab[TYP_DQBLK], XFS_FSB_TO_DADDR(mp, bm.startblock), blkbb,
+		DB_RING_IGN, NULL);
+	off_cur(qoff * (int)sizeof(xfs_dqblk_t), sizeof(xfs_dqblk_t));
+	ring_add();
+	return 0;
+}
+
+void
+dquot_init(void)
+{
+	add_command(&dquot_cmd);
+}
diff --git a/db/dquot.h b/db/dquot.h
new file mode 100644
index 000000000..ce231976d
--- /dev/null
+++ b/db/dquot.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	disk_dquot_flds[];
+extern const struct field	dqblk_flds[];
+extern const struct field	dqblk_hfld[];
+
+extern void	dquot_init(void);
diff --git a/db/echo.c b/db/echo.c
new file mode 100644
index 000000000..7027870dc
--- /dev/null
+++ b/db/echo.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "echo.h"
+#include "output.h"
+
+static int	echo_f(int argc, char **argv);
+
+static const cmdinfo_t	echo_cmd =
+	{ "echo", NULL, echo_f, 0, -1, 0, "[args]...",
+	  "echo arguments", NULL };
+
+/*ARGSUSED*/
+static int
+echo_f(
+	int	argc,
+	char	**argv)
+{
+	char	*c;
+
+	for (c = *(++argv); c; c = *(++argv))
+		dbprintf("%s ", c);
+	dbprintf("\n");
+	return 0;
+}
+
+void
+echo_init(void)
+{
+	add_command(&echo_cmd);
+}
diff --git a/db/echo.h b/db/echo.h
new file mode 100644
index 000000000..a2ddeb664
--- /dev/null
+++ b/db/echo.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	echo_init(void);
diff --git a/db/faddr.c b/db/faddr.c
new file mode 100644
index 000000000..ee58936fe
--- /dev/null
+++ b/db/faddr.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "inode.h"
+#include "io.h"
+#include "bit.h"
+#include "bmap.h"
+#include "output.h"
+#include "mount.h"
+
+void
+fa_agblock(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	xfs_agblock_t	bno;
+
+	if (cur_agno == NULLAGNUMBER) {
+		dbprintf("no current allocation group, cannot set new addr\n");
+		return;
+	}
+	bno = (xfs_agblock_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+	if (bno == NULLAGBLOCK) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	ASSERT(typtab[next].typnm == next);
+	set_cur(&typtab[next], XFS_AGB_TO_DADDR(mp, cur_agno, bno), blkbb,
+		DB_RING_ADD, NULL);
+}
+
+/*ARGSUSED*/
+void
+fa_agino(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	xfs_agino_t	agino;
+
+	if (cur_agno == NULLAGNUMBER) {
+		dbprintf("no current allocation group, cannot set new addr\n");
+		return;
+	}
+	agino = (xfs_agino_t)getbitval(obj, bit, bitsz(agino), BVUNSIGNED);
+	if (agino == NULLAGINO) {
+		dbprintf("null inode number, cannot set new addr\n");
+		return;
+	}
+	set_cur_inode(XFS_AGINO_TO_INO(mp, cur_agno, agino));
+}
+
+/*ARGSUSED*/
+void
+fa_attrblock(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	bmap_ext_t	bm;
+	__uint32_t	bno;
+	xfs_dfsbno_t	dfsbno;
+	int		nex;
+
+	bno = (__uint32_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+	if (bno == 0) {
+		dbprintf("null attribute block number, cannot set new addr\n");
+		return;
+	}
+	nex = 1;
+	bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm);
+	if (nex == 0) {
+		dbprintf("attribute block is unmapped\n");
+		return;
+	}
+	dfsbno = bm.startblock + (bno - bm.startoff);
+	ASSERT(typtab[next].typnm == next);
+	set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno), blkbb,
+		DB_RING_ADD, NULL);
+}
+
+void
+fa_cfileoffa(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	bmap_ext_t	bm;
+	xfs_dfiloff_t	bno;
+	xfs_dfsbno_t	dfsbno;
+	int		nex;
+
+	bno = (xfs_dfiloff_t)getbitval(obj, bit, BMBT_STARTOFF_BITLEN,
+		BVUNSIGNED);
+	if (bno == NULLDFILOFF) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	nex = 1;
+	bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm);
+	if (nex == 0) {
+		dbprintf("file block is unmapped\n");
+		return;
+	}
+	dfsbno = bm.startblock + (bno - bm.startoff);
+	ASSERT(typtab[next].typnm == next);
+	set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), blkbb, DB_RING_ADD,
+		NULL);
+}
+
+void
+fa_cfileoffd(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	bbmap_t		bbmap;
+	bmap_ext_t	*bmp;
+	xfs_dfiloff_t	bno;
+	xfs_dfsbno_t	dfsbno;
+	int		nb;
+	int		nex;
+
+	bno = (xfs_dfiloff_t)getbitval(obj, bit, BMBT_STARTOFF_BITLEN,
+		BVUNSIGNED);
+	if (bno == NULLDFILOFF) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	nex = nb = next == TYP_DIR2 ? mp->m_dirblkfsbs : 1;
+	bmp = malloc(nb * sizeof(*bmp));
+	bmap(bno, nb, XFS_DATA_FORK, &nex, bmp);
+	if (nex == 0) {
+		dbprintf("file block is unmapped\n");
+		free(bmp);
+		return;
+	}
+	dfsbno = bmp->startblock + (bno - bmp->startoff);
+	ASSERT(typtab[next].typnm == next);
+	if (nex > 1)
+		make_bbmap(&bbmap, nex, bmp);
+	set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), nb * blkbb,
+		DB_RING_ADD, nex > 1 ? &bbmap: NULL);
+	free(bmp);
+}
+
+void
+fa_cfsblock(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	xfs_dfsbno_t	bno;
+
+	bno = (xfs_dfsbno_t)getbitval(obj, bit, BMBT_STARTBLOCK_BITLEN,
+		BVUNSIGNED);
+	if (bno == NULLDFSBNO) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	ASSERT(typtab[next].typnm == next);
+	set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_ADD,
+		NULL);
+}
+
+void
+fa_dfiloffa(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	bmap_ext_t	bm;
+	xfs_dfiloff_t	bno;
+	xfs_dfsbno_t	dfsbno;
+	int		nex;
+
+	bno = (xfs_dfiloff_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+	if (bno == NULLDFILOFF) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	nex = 1;
+	bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm);
+	if (nex == 0) {
+		dbprintf("file block is unmapped\n");
+		return;
+	}
+	dfsbno = bm.startblock + (bno - bm.startoff);
+	ASSERT(typtab[next].typnm == next);
+	set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), blkbb, DB_RING_ADD,
+		NULL);
+}
+
+void
+fa_dfiloffd(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	bbmap_t		bbmap;
+	bmap_ext_t	*bmp;
+	xfs_dfiloff_t	bno;
+	xfs_dfsbno_t	dfsbno;
+	int		nb;
+	int		nex;
+
+	bno = (xfs_dfiloff_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+	if (bno == NULLDFILOFF) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	nex = nb = next == TYP_DIR2 ? mp->m_dirblkfsbs : 1;
+	bmp = malloc(nb * sizeof(*bmp));
+	bmap(bno, nb, XFS_DATA_FORK, &nex, bmp);
+	if (nex == 0) {
+		dbprintf("file block is unmapped\n");
+		free(bmp);
+		return;
+	}
+	dfsbno = bmp->startblock + (bno - bmp->startoff);
+	ASSERT(typtab[next].typnm == next);
+	if (nex > 1)
+		make_bbmap(&bbmap, nex, bmp);
+	set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), nb * blkbb,
+		DB_RING_ADD, nex > 1 ? &bbmap : NULL);
+	free(bmp);
+}
+
+void
+fa_dfsbno(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	xfs_dfsbno_t	bno;
+
+	bno = (xfs_dfsbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+	if (bno == NULLDFSBNO) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	ASSERT(typtab[next].typnm == next);
+	set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_ADD,
+		NULL);
+}
+
+/*ARGSUSED*/
+void
+fa_dirblock(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	bbmap_t		bbmap;
+	bmap_ext_t	*bmp;
+	__uint32_t	bno;
+	xfs_dfsbno_t	dfsbno;
+	int		nex;
+
+	bno = (__uint32_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+	if (bno == 0) {
+		dbprintf("null directory block number, cannot set new addr\n");
+		return;
+	}
+	nex = mp->m_dirblkfsbs;
+	bmp = malloc(nex * sizeof(*bmp));
+	bmap(bno, mp->m_dirblkfsbs, XFS_DATA_FORK, &nex, bmp);
+	if (nex == 0) {
+		dbprintf("directory block is unmapped\n");
+		free(bmp);
+		return;
+	}
+	dfsbno = bmp->startblock + (bno - bmp->startoff);
+	ASSERT(typtab[next].typnm == next);
+	if (nex > 1)
+		make_bbmap(&bbmap, nex, bmp);
+	set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno),
+		(int)XFS_FSB_TO_DADDR(mp, mp->m_dirblkfsbs), DB_RING_ADD, 
+		nex > 1 ? &bbmap : NULL);
+	free(bmp);
+}
+
+void
+fa_drfsbno(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	xfs_drfsbno_t	bno;
+
+	bno = (xfs_drfsbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+	if (bno == NULLDRFSBNO) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	ASSERT(typtab[next].typnm == next);
+	set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_BB(mp, bno), blkbb,
+		DB_RING_ADD, NULL);
+}
+
+/*ARGSUSED*/
+void
+fa_drtbno(
+	void	*obj,
+	int	bit,
+	typnm_t	next)
+{
+	xfs_drtbno_t	bno;
+
+	bno = (xfs_drtbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+	if (bno == NULLDRTBNO) {
+		dbprintf("null block number, cannot set new addr\n");
+		return;
+	}
+	/* need set_cur to understand rt subvolume */
+}
+
+/*ARGSUSED*/
+void
+fa_ino(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	xfs_ino_t	ino;
+
+	ASSERT(next == TYP_INODE);
+	ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino), BVUNSIGNED);
+	if (ino == NULLFSINO) {
+		dbprintf("null inode number, cannot set new addr\n");
+		return;
+	}
+	set_cur_inode(ino);
+}
+
+void
+fa_ino4(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	xfs_ino_t	ino;
+	xfs_dir2_ino4_t	ino4;
+
+	ASSERT(next == TYP_INODE);
+	ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino4), BVUNSIGNED);
+	if (ino == NULLFSINO) {
+		dbprintf("null inode number, cannot set new addr\n");
+		return;
+	}
+	set_cur_inode(ino);
+}
+
+void
+fa_ino8(
+	void		*obj,
+	int		bit,
+	typnm_t		next)
+{
+	xfs_ino_t	ino;
+	xfs_dir2_ino8_t	ino8;
+
+	ASSERT(next == TYP_INODE);
+	ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino8), BVUNSIGNED);
+	if (ino == NULLFSINO) {
+		dbprintf("null inode number, cannot set new addr\n");
+		return;
+	}
+	set_cur_inode(ino);
+}
diff --git a/db/faddr.h b/db/faddr.h
new file mode 100644
index 000000000..25c471e55
--- /dev/null
+++ b/db/faddr.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef void (*adfnc_t)(void *obj, int bit, typnm_t next);
+
+extern void	fa_agblock(void *obj, int bit, typnm_t next);
+extern void	fa_agino(void *obj, int bit, typnm_t next);
+extern void	fa_attrblock(void *obj, int bit, typnm_t next);
+extern void	fa_cfileoffd(void *obj, int bit, typnm_t next);
+extern void	fa_cfsblock(void *obj, int bit, typnm_t next);
+extern void	fa_dfiloffd(void *obj, int bit, typnm_t next);
+extern void	fa_dfsbno(void *obj, int bit, typnm_t next);
+extern void	fa_dinode_union(void *obj, int bit, typnm_t next);
+extern void	fa_dirblock(void *obj, int bit, typnm_t next);
+extern void	fa_drfsbno(void *obj, int bit, typnm_t next);
+extern void	fa_drtbno(void *obj, int bit, typnm_t next);
+extern void	fa_ino(void *obj, int bit, typnm_t next);
+extern void	fa_cfileoffa(void *obj, int bit, typnm_t next);
+extern void	fa_dfiloffa(void *obj, int bit, typnm_t next);
+extern void	fa_ino4(void *obj, int bit, typnm_t next);
+extern void	fa_ino8(void *obj, int bit, typnm_t next);
diff --git a/db/field.c b/db/field.c
new file mode 100644
index 000000000..399c47266
--- /dev/null
+++ b/db/field.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "bnobt.h"
+#include "cntbt.h"
+#include "inobt.h"
+#include "bmapbt.h"
+#include "bmroot.h"
+#include "bit.h"
+#include "agf.h"
+#include "agfl.h"
+#include "agi.h"
+#include "sb.h"
+#include "dir.h"
+#include "dirshort.h"
+#include "attr.h"
+#include "attrshort.h"
+#include "dquot.h"
+#include "dir2.h"
+#include "dir2sf.h"
+
+const ftattr_t	ftattrtab[] = {
+	{ FLDT_AEXTNUM, "aextnum", fp_num, "%d", SI(bitsz(xfs_aextnum_t)),
+	  FTARG_SIGNED, NULL, NULL },
+	{ FLDT_AGBLOCK, "agblock", fp_num, "%u", SI(bitsz(xfs_agblock_t)),
+	  FTARG_DONULL, fa_agblock, NULL },
+	{ FLDT_AGBLOCKNZ, "agblocknz", fp_num, "%u", SI(bitsz(xfs_agblock_t)),
+	  FTARG_SKIPZERO|FTARG_DONULL, fa_agblock, NULL },
+	{ FLDT_AGF, "agf", NULL, (char *)agf_flds, agf_size, FTARG_SIZE, NULL,
+	  agf_flds },
+	{ FLDT_AGFL, "agfl", NULL, (char *)agfl_flds, agfl_size, FTARG_SIZE,
+	  NULL, agfl_flds },
+	{ FLDT_AGI, "agi", NULL, (char *)agi_flds, agi_size, FTARG_SIZE, NULL,
+	  agi_flds },
+	{ FLDT_AGINO, "agino", fp_num, "%u", SI(bitsz(xfs_agino_t)),
+	  FTARG_DONULL, fa_agino, NULL },
+	{ FLDT_AGINONN, "aginonn", fp_num, "%u", SI(bitsz(xfs_agino_t)),
+	  FTARG_SKIPNULL, fa_agino, NULL },
+	{ FLDT_AGNUMBER, "agnumber", fp_num, "%u", SI(bitsz(xfs_agnumber_t)),
+	  FTARG_DONULL, NULL, NULL },
+	{ FLDT_ATTR, "attr", NULL, (char *)attr_flds, attr_size, FTARG_SIZE,
+	  NULL, attr_flds },
+	{ FLDT_ATTR_BLKINFO, "attr_blkinfo", NULL, (char *)attr_blkinfo_flds,
+	  SI(bitsz(struct xfs_da_blkinfo)), 0, NULL, attr_blkinfo_flds },
+	{ FLDT_ATTR_LEAF_ENTRY, "attr_leaf_entry", fp_sarray,
+	  (char *)attr_leaf_entry_flds, SI(bitsz(struct xfs_attr_leaf_entry)),
+	  0, NULL, attr_leaf_entry_flds },
+	{ FLDT_ATTR_LEAF_HDR, "attr_leaf_hdr", NULL, (char *)attr_leaf_hdr_flds,
+	  SI(bitsz(struct xfs_attr_leaf_hdr)), 0, NULL, attr_leaf_hdr_flds },
+	{ FLDT_ATTR_LEAF_MAP, "attr_leaf_map", fp_sarray,
+	  (char *)attr_leaf_map_flds, SI(bitsz(struct xfs_attr_leaf_map)), 0,
+	  NULL, attr_leaf_map_flds },
+	{ FLDT_ATTR_LEAF_NAME, "attr_leaf_name", NULL,
+	  (char *)attr_leaf_name_flds, attr_leaf_name_size, FTARG_SIZE, NULL,
+	  attr_leaf_name_flds },
+	{ FLDT_ATTR_NODE_ENTRY, "attr_node_entry", fp_sarray,
+	  (char *)attr_node_entry_flds, SI(bitsz(struct xfs_da_node_entry)), 0,
+	  NULL, attr_node_entry_flds },
+	{ FLDT_ATTR_NODE_HDR, "attr_node_hdr", NULL, (char *)attr_node_hdr_flds,
+	  SI(bitsz(struct xfs_da_node_hdr)), 0, NULL, attr_node_hdr_flds },
+	{ FLDT_ATTR_SF_ENTRY, "attr_sf_entry", NULL, (char *)attr_sf_entry_flds,
+	  attr_sf_entry_size, FTARG_SIZE, NULL, attr_sf_entry_flds },
+	{ FLDT_ATTR_SF_HDR, "attr_sf_hdr", NULL, (char *)attr_sf_hdr_flds,
+	  SI(bitsz(struct xfs_attr_sf_hdr)), 0, NULL, attr_sf_hdr_flds },
+	{ FLDT_ATTRBLOCK, "attrblock", fp_num, "%u", SI(bitsz(__uint32_t)), 0,
+	  fa_attrblock, NULL },
+	{ FLDT_ATTRSHORT, "attrshort", NULL, (char *)attr_shortform_flds,
+	  attrshort_size, FTARG_SIZE, NULL, attr_shortform_flds },
+	{ FLDT_BMAPBTA, "bmapbta", NULL, (char *)bmapbta_flds, bmapbta_size,
+	  FTARG_SIZE, NULL, bmapbta_flds },
+	{ FLDT_BMAPBTAKEY, "bmapbtakey", fp_sarray, (char *)bmapbta_key_flds,
+	  SI(bitsz(xfs_bmbt_key_t)), 0, NULL, bmapbta_key_flds },
+	{ FLDT_BMAPBTAPTR, "bmapbtaptr", fp_num, "%llu",
+	  SI(bitsz(xfs_bmbt_ptr_t)), 0, fa_dfsbno, NULL },
+	{ FLDT_BMAPBTAREC, "bmapbtarec", fp_sarray, (char *)bmapbta_rec_flds,
+	  SI(bitsz(xfs_bmbt_rec_t)), 0, NULL, bmapbta_rec_flds },
+	{ FLDT_BMAPBTD, "bmapbtd", NULL, (char *)bmapbtd_flds, bmapbtd_size,
+	  FTARG_SIZE, NULL, bmapbtd_flds },
+	{ FLDT_BMAPBTDKEY, "bmapbtdkey", fp_sarray, (char *)bmapbtd_key_flds,
+	  SI(bitsz(xfs_bmbt_key_t)), 0, NULL, bmapbtd_key_flds },
+	{ FLDT_BMAPBTDPTR, "bmapbtdptr", fp_num, "%llu",
+	  SI(bitsz(xfs_bmbt_ptr_t)), 0, fa_dfsbno, NULL },
+	{ FLDT_BMAPBTDREC, "bmapbtdrec", fp_sarray, (char *)bmapbtd_rec_flds,
+	  SI(bitsz(xfs_bmbt_rec_t)), 0, NULL, bmapbtd_rec_flds },
+	{ FLDT_BMROOTA, "bmroota", NULL, (char *)bmroota_flds, bmroota_size,
+	  FTARG_SIZE, NULL, bmroota_flds },
+	{ FLDT_BMROOTAKEY, "bmrootakey", fp_sarray, (char *)bmroota_key_flds,
+	  SI(bitsz(xfs_bmdr_key_t)), 0, NULL, bmroota_key_flds },
+	{ FLDT_BMROOTAPTR, "bmrootaptr", fp_num, "%llu",
+	  SI(bitsz(xfs_bmdr_ptr_t)), 0, fa_dfsbno, NULL },
+	{ FLDT_BMROOTD, "bmrootd", NULL, (char *)bmrootd_flds, bmrootd_size,
+	  FTARG_SIZE, NULL, bmrootd_flds },
+	{ FLDT_BMROOTDKEY, "bmrootdkey", fp_sarray, (char *)bmrootd_key_flds,
+	  SI(bitsz(xfs_bmdr_key_t)), 0, NULL, bmrootd_key_flds },
+	{ FLDT_BMROOTDPTR, "bmrootdptr", fp_num, "%llu",
+	  SI(bitsz(xfs_bmdr_ptr_t)), 0, fa_dfsbno, NULL },
+	{ FLDT_BNOBT, "bnobt", NULL, (char *)bnobt_flds, bnobt_size, FTARG_SIZE,
+	  NULL, bnobt_flds },
+	{ FLDT_BNOBTKEY, "bnobtkey", fp_sarray, (char *)bnobt_key_flds,
+	  SI(bitsz(xfs_alloc_key_t)), 0, NULL, bnobt_key_flds },
+	{ FLDT_BNOBTPTR, "bnobtptr", fp_num, "%u", SI(bitsz(xfs_alloc_ptr_t)),
+	  0, fa_agblock, NULL },
+	{ FLDT_BNOBTREC, "bnobtrec", fp_sarray, (char *)bnobt_rec_flds,
+	  SI(bitsz(xfs_alloc_rec_t)), 0, NULL, bnobt_rec_flds },
+	{ FLDT_CEXTFLG, "cextflag", fp_num, "%u", SI(BMBT_EXNTFLAG_BITLEN), 0,
+	  NULL, NULL },
+	{ FLDT_CEXTLEN, "cextlen", fp_num, "%u", SI(BMBT_BLOCKCOUNT_BITLEN), 0,
+	  NULL, NULL },
+	{ FLDT_CFILEOFFA, "cfileoffa", fp_num, "%llu", SI(BMBT_STARTOFF_BITLEN),
+	  0, fa_cfileoffa, NULL },
+	{ FLDT_CFILEOFFD, "cfileoffd", fp_num, "%llu", SI(BMBT_STARTOFF_BITLEN),
+	  0, fa_cfileoffd, NULL },
+	{ FLDT_CFSBLOCK, "cfsblock", fp_num, "%llu", SI(BMBT_STARTBLOCK_BITLEN),
+	  0, fa_cfsblock, NULL },
+	{ FLDT_CHARNS, "charns", fp_charns, NULL, SI(bitsz(char)), 0, NULL,
+	  NULL },
+	{ FLDT_CHARS, "chars", fp_num, "%c", SI(bitsz(char)), 0, NULL, NULL },
+	{ FLDT_CNTBT, "cntbt", NULL, (char *)cntbt_flds, cntbt_size, FTARG_SIZE,
+	  NULL, cntbt_flds },
+	{ FLDT_CNTBTKEY, "cntbtkey", fp_sarray, (char *)cntbt_key_flds,
+	  SI(bitsz(xfs_alloc_key_t)), 0, NULL, cntbt_key_flds },
+	{ FLDT_CNTBTPTR, "cntbtptr", fp_num, "%u", SI(bitsz(xfs_alloc_ptr_t)),
+	  0, fa_agblock, NULL },
+	{ FLDT_CNTBTREC, "cntbtrec", fp_sarray, (char *)cntbt_rec_flds,
+	  SI(bitsz(xfs_alloc_rec_t)), 0, NULL, cntbt_rec_flds },
+	{ FLDT_DEV, "dev", fp_num, "%#x", SI(bitsz(xfs_dev_t)), 0, NULL, NULL },
+	{ FLDT_DFILOFFA, "dfiloffa", fp_num, "%llu", SI(bitsz(xfs_dfiloff_t)),
+	  0, fa_dfiloffa, NULL },
+	{ FLDT_DFILOFFD, "dfiloffd", fp_num, "%llu", SI(bitsz(xfs_dfiloff_t)),
+	  0, fa_dfiloffd, NULL },
+	{ FLDT_DFSBNO, "dfsbno", fp_num, "%llu", SI(bitsz(xfs_dfsbno_t)),
+	  FTARG_DONULL, fa_dfsbno, NULL },
+	{ FLDT_DINODE_A, "dinode_a", NULL, (char *)inode_a_flds, inode_a_size,
+	  FTARG_SIZE|FTARG_OKEMPTY, NULL, inode_a_flds },
+	{ FLDT_DINODE_CORE, "dinode_core", NULL, (char *)inode_core_flds,
+	  SI(bitsz(xfs_dinode_core_t)), 0, NULL, inode_core_flds },
+	{ FLDT_DINODE_FMT, "dinode_fmt", fp_dinode_fmt, NULL,
+	  SI(bitsz(__int8_t)), 0, NULL, NULL },
+	{ FLDT_DINODE_U, "dinode_u", NULL, (char *)inode_u_flds, inode_u_size,
+	  FTARG_SIZE|FTARG_OKEMPTY, NULL, inode_u_flds },
+	{ FLDT_DIR, "dir", NULL, (char *)dir_flds, dir_size, FTARG_SIZE, NULL,
+	  dir_flds },
+	{ FLDT_DIR2, "dir2", NULL, (char *)dir2_flds, dir2_size, FTARG_SIZE,
+	  NULL, dir2_flds },
+	{ FLDT_DIR2_BLOCK_TAIL, "dir2_block_tail", NULL,
+	  (char *)dir2_block_tail_flds, SI(bitsz(xfs_dir2_block_tail_t)), 0,
+	  NULL, dir2_block_tail_flds },
+	{ FLDT_DIR2_DATA_FREE, "dir2_data_free", NULL,
+	  (char *)dir2_data_free_flds, SI(bitsz(xfs_dir2_data_free_t)), 0, NULL,
+	  dir2_data_free_flds },
+	{ FLDT_DIR2_DATA_HDR, "dir2_data_hdr", NULL, (char *)dir2_data_hdr_flds,
+	  SI(bitsz(xfs_dir2_data_hdr_t)), 0, NULL, dir2_data_hdr_flds },
+	{ FLDT_DIR2_DATA_OFF, "dir2_data_off", fp_num, "%#x",
+	  SI(bitsz(xfs_dir2_data_off_t)), 0, NULL, NULL },
+	{ FLDT_DIR2_DATA_OFFNZ, "dir2_data_offnz", fp_num, "%#x",
+	  SI(bitsz(xfs_dir2_data_off_t)), FTARG_SKIPZERO, NULL, NULL },
+	{ FLDT_DIR2_DATA_UNION, "dir2_data_union", NULL,
+	  (char *)dir2_data_union_flds, dir2_data_union_size, FTARG_SIZE, NULL,
+	  dir2_data_union_flds },
+	{ FLDT_DIR2_FREE_HDR, "dir2_free_hdr", NULL, (char *)dir2_free_hdr_flds,
+	  SI(bitsz(xfs_dir2_free_hdr_t)), 0, NULL, dir2_free_hdr_flds },
+	{ FLDT_DIR2_INO4, "dir2_ino4", fp_num, "%u", SI(bitsz(xfs_dir2_ino4_t)),
+	  0, fa_ino4, NULL },
+	{ FLDT_DIR2_INO8, "dir2_ino8", fp_num, "%llu",
+	  SI(bitsz(xfs_dir2_ino8_t)), 0, fa_ino8, NULL },
+	{ FLDT_DIR2_INOU, "dir2_inou", NULL, (char *)dir2_inou_flds,
+	  dir2_inou_size, FTARG_SIZE, NULL, dir2_inou_flds },
+	{ FLDT_DIR2_LEAF_ENTRY, "dir2_leaf_entry", NULL,
+	  (char *)dir2_leaf_entry_flds, SI(bitsz(xfs_dir2_leaf_entry_t)), 0,
+	  NULL, dir2_leaf_entry_flds },
+	{ FLDT_DIR2_LEAF_HDR, "dir2_leaf_hdr", NULL, (char *)dir2_leaf_hdr_flds,
+	  SI(bitsz(xfs_dir2_leaf_hdr_t)), 0, NULL, dir2_leaf_hdr_flds },
+	{ FLDT_DIR2_LEAF_TAIL, "dir2_leaf_tail", NULL,
+	  (char *)dir2_leaf_tail_flds, SI(bitsz(xfs_dir2_leaf_tail_t)), 0, NULL,
+	  dir2_leaf_tail_flds },
+	{ FLDT_DIR2_SF_ENTRY, "dir2_sf_entry", NULL, (char *)dir2_sf_entry_flds,
+	  dir2_sf_entry_size, FTARG_SIZE, NULL, dir2_sf_entry_flds },
+	{ FLDT_DIR2_SF_HDR, "dir2_sf_hdr", NULL, (char *)dir2_sf_hdr_flds,
+	  dir2_sf_hdr_size, FTARG_SIZE, NULL, dir2_sf_hdr_flds },
+	{ FLDT_DIR2_SF_OFF, "dir2_sf_off", fp_num, "%#x",
+	  SI(bitsz(xfs_dir2_sf_off_t)), 0, NULL, NULL },
+	{ FLDT_DIR2SF, "dir2sf", NULL, (char *)dir2sf_flds, dir2sf_size,
+	  FTARG_SIZE, NULL, dir2sf_flds },
+	{ FLDT_DIR_BLKINFO, "dir_blkinfo", NULL, (char *)dir_blkinfo_flds,
+	  SI(bitsz(struct xfs_da_blkinfo)), 0, NULL, dir_blkinfo_flds },
+	{ FLDT_DIR_INO, "dir_ino", fp_num, "%llu", SI(bitsz(xfs_dir_ino_t)), 0,
+	  fa_ino, NULL },
+	{ FLDT_DIR_LEAF_ENTRY, "dir_leaf_entry", fp_sarray,
+	  (char *)dir_leaf_entry_flds, SI(bitsz(struct xfs_dir_leaf_entry)), 0,
+	  NULL, dir_leaf_entry_flds },
+	{ FLDT_DIR_LEAF_HDR, "dir_leaf_hdr", NULL, (char *)dir_leaf_hdr_flds,
+	  SI(bitsz(struct xfs_dir_leaf_hdr)), 0, NULL, dir_leaf_hdr_flds },
+	{ FLDT_DIR_LEAF_MAP, "dir_leaf_map", fp_sarray,
+	  (char *)dir_leaf_map_flds, SI(bitsz(struct xfs_dir_leaf_map)), 0,
+	  NULL, dir_leaf_map_flds },
+	{ FLDT_DIR_LEAF_NAME, "dir_leaf_name", NULL, (char *)dir_leaf_name_flds,
+	  dir_leaf_name_size, FTARG_SIZE, NULL, dir_leaf_name_flds },
+	{ FLDT_DIR_NODE_ENTRY, "dir_node_entry", fp_sarray,
+	  (char *)dir_node_entry_flds, SI(bitsz(struct xfs_da_node_entry)), 0,
+	  NULL, dir_node_entry_flds },
+	{ FLDT_DIR_NODE_HDR, "dir_node_hdr", NULL, (char *)dir_node_hdr_flds,
+	  SI(bitsz(struct xfs_da_node_hdr)), 0, NULL, dir_node_hdr_flds },
+	{ FLDT_DIR_SF_ENTRY, "dir_sf_entry", NULL, (char *)dir_sf_entry_flds,
+	  dir_sf_entry_size, FTARG_SIZE, NULL, dir_sf_entry_flds },
+	{ FLDT_DIR_SF_HDR, "dir_sf_hdr", NULL, (char *)dir_sf_hdr_flds,
+	  SI(bitsz(struct xfs_dir_sf_hdr)), 0, NULL, dir_sf_hdr_flds },
+	{ FLDT_DIRBLOCK, "dirblock", fp_num, "%u", SI(bitsz(__uint32_t)), 0,
+	  fa_dirblock, NULL },
+	{ FLDT_DIRSHORT, "dirshort", NULL, (char *)dir_shortform_flds,
+	  dirshort_size, FTARG_SIZE, NULL, dir_shortform_flds },
+	{ FLDT_DISK_DQUOT, "disk_dquot", NULL, (char *)disk_dquot_flds,
+	  SI(bitsz(xfs_disk_dquot_t)), 0, NULL, disk_dquot_flds },
+	{ FLDT_DQBLK, "dqblk", NULL, (char *)dqblk_flds, SI(bitsz(xfs_dqblk_t)),
+	  0, NULL, dqblk_flds },
+	{ FLDT_DQID, "dqid", fp_num, "%d", SI(bitsz(xfs_dqid_t)), 0, NULL,
+	  NULL },
+	{ FLDT_DRFSBNO, "drfsbno", fp_num, "%llu", SI(bitsz(xfs_drfsbno_t)),
+	  FTARG_DONULL, fa_drfsbno, NULL },
+	{ FLDT_DRTBNO, "drtbno", fp_num, "%llu", SI(bitsz(xfs_drtbno_t)),
+	  FTARG_DONULL, fa_drtbno, NULL },
+	{ FLDT_EXTLEN, "extlen", fp_num, "%u", SI(bitsz(xfs_extlen_t)), 0, NULL,
+	  NULL },
+	{ FLDT_EXTNUM, "extnum", fp_num, "%d", SI(bitsz(xfs_extnum_t)),
+	  FTARG_SIGNED, NULL, NULL },
+	{ FLDT_FSIZE, "fsize", fp_num, "%lld", SI(bitsz(xfs_fsize_t)),
+	  FTARG_SIGNED, NULL, NULL },
+	{ FLDT_INO, "ino", fp_num, "%llu", SI(bitsz(xfs_ino_t)), FTARG_DONULL,
+	  fa_ino, NULL },
+	{ FLDT_INOBT, "inobt",  NULL, (char *)inobt_flds, inobt_size,
+	  FTARG_SIZE, NULL, inobt_flds },
+	{ FLDT_INOBTKEY, "inobtkey", fp_sarray, (char *)inobt_key_flds,
+	  SI(bitsz(xfs_inobt_key_t)), 0, NULL, inobt_key_flds },
+	{ FLDT_INOBTPTR, "inobtptr", fp_num, "%u", SI(bitsz(xfs_inobt_ptr_t)),
+	  0, fa_agblock, NULL },
+	{ FLDT_INOBTREC, "inobtrec", fp_sarray, (char *)inobt_rec_flds,
+	  SI(bitsz(xfs_inobt_rec_t)), 0, NULL, inobt_rec_flds },
+	{ FLDT_INODE, "inode", NULL, (char *)inode_flds, inode_size, FTARG_SIZE,
+	  NULL, inode_flds },
+	{ FLDT_INOFREE, "inofree", fp_num, "%#llx", SI(bitsz(xfs_inofree_t)), 0,
+	  NULL, NULL },
+	{ FLDT_INT16D, "int16d", fp_num, "%d", SI(bitsz(__int16_t)),
+	  FTARG_SIGNED, NULL, NULL },
+	{ FLDT_INT32D, "int32d", fp_num, "%d", SI(bitsz(__int32_t)),
+	  FTARG_SIGNED, NULL, NULL },
+	{ FLDT_INT64D, "int64d", fp_num, "%lld", SI(bitsz(__int64_t)),
+	  FTARG_SIGNED, NULL, NULL },
+	{ FLDT_INT8D, "int8d", fp_num, "%d", SI(bitsz(__int8_t)), FTARG_SIGNED,
+	  NULL, NULL },
+	{ FLDT_NSEC, "nsec", fp_num, "%09d", SI(bitsz(__int32_t)), FTARG_SIGNED,
+	  NULL, NULL },
+	{ FLDT_QCNT, "qcnt", fp_num, "%llu", SI(bitsz(xfs_qcnt_t)), 0, NULL,
+	  NULL },
+	{ FLDT_QWARNCNT, "qwarncnt", fp_num, "%u", SI(bitsz(xfs_qwarncnt_t)), 0,
+	  NULL, NULL },
+	{ FLDT_SB, "sb", NULL, (char *)sb_flds, sb_size, FTARG_SIZE, NULL,
+	  sb_flds },
+	{ FLDT_TIME, "time", fp_time, NULL, SI(bitsz(__int32_t)), FTARG_SIGNED,
+	  NULL, NULL },
+	{ FLDT_TIMESTAMP, "timestamp", NULL, (char *)timestamp_flds,
+	  SI(bitsz(xfs_timestamp_t)), 0, NULL, timestamp_flds },
+	{ FLDT_UINT1, "uint1", fp_num, "%u", SI(1), 0, NULL, NULL },
+	{ FLDT_UINT16D, "uint16d", fp_num, "%u", SI(bitsz(__uint16_t)), 0, NULL,
+	  NULL },
+	{ FLDT_UINT16O, "uint16o", fp_num, "%#o", SI(bitsz(__uint16_t)), 0,
+	  NULL, NULL },
+	{ FLDT_UINT16X, "uint16x", fp_num, "%#x", SI(bitsz(__uint16_t)), 0,
+	  NULL, NULL },
+	{ FLDT_UINT32D, "uint32d", fp_num, "%u", SI(bitsz(__uint32_t)), 0, NULL,
+	  NULL },
+	{ FLDT_UINT32O, "uint32o", fp_num, "%#o", SI(bitsz(__uint32_t)), 0,
+	  NULL, NULL },
+	{ FLDT_UINT32X, "uint32x", fp_num, "%#x", SI(bitsz(__uint32_t)), 0,
+	  NULL, NULL },
+	{ FLDT_UINT64D, "uint64d", fp_num, "%llu", SI(bitsz(__uint64_t)), 0,
+	  NULL, NULL },
+	{ FLDT_UINT64O, "uint64o", fp_num, "%#llo", SI(bitsz(__uint64_t)), 0,
+	  NULL, NULL },
+	{ FLDT_UINT64X, "uint64x", fp_num, "%#llx", SI(bitsz(__uint64_t)), 0,
+	  NULL, NULL },
+	{ FLDT_UINT8D, "uint8d", fp_num, "%u", SI(bitsz(__uint8_t)), 0, NULL,
+	  NULL },
+	{ FLDT_UINT8O, "uint8o", fp_num, "%#o", SI(bitsz(__uint8_t)), 0, NULL,
+	  NULL },
+	{ FLDT_UINT8X, "uint8x", fp_num, "%#x", SI(bitsz(__uint8_t)), 0, NULL,
+	  NULL },
+	{ FLDT_UUID, "uuid", fp_uuid, NULL, SI(bitsz(uuid_t)), 0, NULL, NULL },
+	{ FLDT_ZZZ, NULL }
+};
+
+int
+bitoffset(
+	const field_t	*f,
+	void		*obj,
+	int		startoff,
+	int		idx)
+{
+
+	if (!(f->flags & FLD_OFFSET)) {
+		if (f->flags & FLD_ARRAY) {
+			int		abase;
+#ifdef DEBUG
+			const ftattr_t	*fa = &ftattrtab[f->ftyp];
+#endif
+
+			abase = (f->flags & FLD_ABASE1) != 0;
+			ASSERT(fa->ftyp == f->ftyp);
+			ASSERT((fa->arg & FTARG_SIZE) == 0);
+			return (int)(__psint_t)f->offset +
+				(idx - abase) * fsize(f, obj, startoff, idx);
+		} else
+			return (int)(__psint_t)f->offset;
+	} else
+		return (*f->offset)(obj, startoff, idx);
+}
+
+int
+fcount(
+	const field_t	*f,
+	void		*obj,
+	int		startoff)
+{
+	if (!(f->flags & FLD_COUNT))
+		return (int)(__psint_t)f->count;
+	else
+		return (*f->count)(obj, startoff);
+}
+
+const field_t *
+findfield(
+	char		*name,
+	const field_t	*fields,
+        void            *obj,
+        int             startoff)
+{
+	const field_t	*f;
+
+        /* we only match if this field name matches and has a non-zero count */
+	for (f = fields; f->name; f++)
+		if (strcmp(f->name, name) == 0 && fcount(f, obj, startoff))
+			return f;
+	return NULL;
+}
+
+int
+fsize(
+	const field_t	*f,
+	void		*obj,
+	int		startoff,
+	int		idx)
+{
+	const ftattr_t	*fa;
+
+	fa = &ftattrtab[f->ftyp];
+	ASSERT(fa->ftyp == f->ftyp);
+	if (!(fa->arg & FTARG_SIZE))
+		return (int)(__psint_t)fa->size;
+	else
+		return (*fa->size)(obj, startoff, idx);
+}
diff --git a/db/field.h b/db/field.h
new file mode 100644
index 000000000..c5249f7da
--- /dev/null
+++ b/db/field.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef enum fldt	{
+	FLDT_AEXTNUM,
+	FLDT_AGBLOCK,
+	FLDT_AGBLOCKNZ,
+	FLDT_AGF,
+	FLDT_AGFL,
+	FLDT_AGI,
+	FLDT_AGINO,
+	FLDT_AGINONN,
+	FLDT_AGNUMBER,
+	FLDT_ATTR,
+	FLDT_ATTR_BLKINFO,
+	FLDT_ATTR_LEAF_ENTRY,
+	FLDT_ATTR_LEAF_HDR,
+	FLDT_ATTR_LEAF_MAP,
+	FLDT_ATTR_LEAF_NAME,
+	FLDT_ATTR_NODE_ENTRY,
+	FLDT_ATTR_NODE_HDR,
+	FLDT_ATTR_SF_ENTRY,
+	FLDT_ATTR_SF_HDR,
+	FLDT_ATTRBLOCK,
+	FLDT_ATTRSHORT,
+	FLDT_BMAPBTA,
+	FLDT_BMAPBTAKEY,
+	FLDT_BMAPBTAPTR,
+	FLDT_BMAPBTAREC,
+	FLDT_BMAPBTD,
+	FLDT_BMAPBTDKEY,
+	FLDT_BMAPBTDPTR,
+	FLDT_BMAPBTDREC,
+	FLDT_BMROOTA,
+	FLDT_BMROOTAKEY,
+	FLDT_BMROOTAPTR,
+	FLDT_BMROOTD,
+	FLDT_BMROOTDKEY,
+	FLDT_BMROOTDPTR,
+	FLDT_BNOBT,
+	FLDT_BNOBTKEY,
+	FLDT_BNOBTPTR,
+	FLDT_BNOBTREC,
+	FLDT_CEXTFLG,
+	FLDT_CEXTLEN,
+	FLDT_CFILEOFFA,
+	FLDT_CFILEOFFD,
+	FLDT_CFSBLOCK,
+	FLDT_CHARNS,
+	FLDT_CHARS,
+	FLDT_CNTBT,
+	FLDT_CNTBTKEY,
+	FLDT_CNTBTPTR,
+	FLDT_CNTBTREC,
+	FLDT_DEV,
+	FLDT_DFILOFFA,
+	FLDT_DFILOFFD,
+	FLDT_DFSBNO,
+	FLDT_DINODE_A,
+	FLDT_DINODE_CORE,
+	FLDT_DINODE_FMT,
+	FLDT_DINODE_U,
+	FLDT_DIR,
+	FLDT_DIR2,
+	FLDT_DIR2_BLOCK_TAIL,
+	FLDT_DIR2_DATA_FREE,
+	FLDT_DIR2_DATA_HDR,
+	FLDT_DIR2_DATA_OFF,
+	FLDT_DIR2_DATA_OFFNZ,
+	FLDT_DIR2_DATA_UNION,
+	FLDT_DIR2_FREE_HDR,
+	FLDT_DIR2_INO4,
+	FLDT_DIR2_INO8,
+	FLDT_DIR2_INOU,
+	FLDT_DIR2_LEAF_ENTRY,
+	FLDT_DIR2_LEAF_HDR,
+	FLDT_DIR2_LEAF_TAIL,
+	FLDT_DIR2_SF_ENTRY,
+	FLDT_DIR2_SF_HDR,
+	FLDT_DIR2_SF_OFF,
+	FLDT_DIR2SF,
+	FLDT_DIR_BLKINFO,
+	FLDT_DIR_INO,
+	FLDT_DIR_LEAF_ENTRY,
+	FLDT_DIR_LEAF_HDR,
+	FLDT_DIR_LEAF_MAP,
+	FLDT_DIR_LEAF_NAME,
+	FLDT_DIR_NODE_ENTRY,
+	FLDT_DIR_NODE_HDR,
+	FLDT_DIR_SF_ENTRY,
+	FLDT_DIR_SF_HDR,
+	FLDT_DIRBLOCK,
+	FLDT_DIRSHORT,
+	FLDT_DISK_DQUOT,
+	FLDT_DQBLK,
+	FLDT_DQID,
+	FLDT_DRFSBNO,
+	FLDT_DRTBNO,
+	FLDT_EXTLEN,
+	FLDT_EXTNUM,
+	FLDT_FSIZE,
+	FLDT_INO,
+	FLDT_INOBT,
+	FLDT_INOBTKEY,
+	FLDT_INOBTPTR,
+	FLDT_INOBTREC,
+	FLDT_INODE,
+	FLDT_INOFREE,
+	FLDT_INT16D,
+	FLDT_INT32D,
+	FLDT_INT64D,
+	FLDT_INT8D,
+	FLDT_NSEC,
+	FLDT_QCNT,
+	FLDT_QWARNCNT,
+	FLDT_SB,
+	FLDT_TIME,
+	FLDT_TIMESTAMP,
+	FLDT_UINT1,
+	FLDT_UINT16D,
+	FLDT_UINT16O,
+	FLDT_UINT16X,
+	FLDT_UINT32D,
+	FLDT_UINT32O,
+	FLDT_UINT32X,
+	FLDT_UINT64D,
+	FLDT_UINT64O,
+	FLDT_UINT64X,
+	FLDT_UINT8D,
+	FLDT_UINT8O,
+	FLDT_UINT8X,
+	FLDT_UUID,
+	FLDT_ZZZ			/* mark last entry */
+} fldt_t;
+
+typedef int (*offset_fnc_t)(void *obj, int startoff, int idx);
+#define	OI(o)	((offset_fnc_t)(__psint_t)(o))
+
+typedef int (*count_fnc_t)(void *obj, int startoff);
+#define	CI(c)	((count_fnc_t)(__psint_t)(c))
+#define	C1	CI(1)
+
+typedef struct field
+{
+	char		*name;
+	fldt_t		ftyp;
+	offset_fnc_t	offset;
+	count_fnc_t	count;
+	int		flags;
+	typnm_t		next;
+} field_t;
+
+/*
+ * flag values
+ */
+#define	FLD_ABASE1	1	/* field array base is 1 not 0 */
+#define	FLD_SKIPALL	2	/* skip this field in an all-fields print */
+#define	FLD_ARRAY	4	/* this field is an array */
+#define	FLD_OFFSET	8	/* offset value is a function pointer */
+#define	FLD_COUNT	16	/* count value is a function pointer */
+
+typedef int (*size_fnc_t)(void *obj, int startoff, int idx);
+#define	SI(s)	((size_fnc_t)(__psint_t)(s))
+
+typedef struct ftattr
+{
+	fldt_t		ftyp;
+	char		*name;
+	prfnc_t		prfunc;
+	char		*fmtstr;
+	size_fnc_t	size;
+	int		arg;
+	adfnc_t		adfunc;
+	const field_t	*subfld;
+} ftattr_t;
+extern const ftattr_t	ftattrtab[];
+
+/*
+ * arg values
+ */
+#define	FTARG_SKIPZERO	1	/* skip 0 words */
+#define	FTARG_DONULL	2	/* make -1 words be "null" */
+#define	FTARG_SKIPNULL	4	/* skip -1 words */
+#define	FTARG_SIGNED	8	/* field value is signed */
+#define	FTARG_SIZE	16	/* size field is a function */
+#define	FTARG_SKIPNMS	32	/* skip printing names this time */
+#define	FTARG_OKEMPTY	64	/* ok if this (union type) is empty */
+
+extern int		bitoffset(const field_t *f, void *obj, int startoff,
+				  int idx);
+extern int		fcount(const field_t *f, void *obj, int startoff);
+extern const field_t	*findfield(char *name, const field_t *fields,
+                                  void *obj, int startoff);
+extern int		fsize(const field_t *f, void *obj, int startoff,
+			          int idx);
diff --git a/db/flist.c b/db/flist.c
new file mode 100644
index 000000000..fd5e19108
--- /dev/null
+++ b/db/flist.c
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "flist.h"
+#include "debug.h"
+#include "output.h"
+#include "malloc.h"
+
+static void	flist_expand_arrays(flist_t *fl);
+static void	flist_expand_structs(flist_t *fl, void *obj);
+static flist_t	*flist_replicate(flist_t *fl);
+static ftok_t	*flist_split(char *s);
+static void	ftok_free(ftok_t *ft);
+
+static void
+flist_expand_arrays(
+	flist_t		*fl)
+{
+	const field_t	*f;
+#ifdef DEBUG
+	const ftattr_t	*fa;
+#endif
+	int		high;
+	int		idx;
+	int		low;
+	flist_t		*new;
+	flist_t		*prev;
+	flist_t		*sib;
+
+	f = fl->fld;
+#ifdef DEBUG
+	fa = &ftattrtab[f->ftyp];
+#endif
+	ASSERT(fa->ftyp == f->ftyp);
+	ASSERT(f->flags & FLD_ARRAY);
+	low = fl->low;
+	high = fl->high;
+	fl->high = fl->low;
+	sib = fl->sibling;
+	for (idx = low + 1, prev = fl; idx <= high; idx++) {
+		new = flist_make(f->name);
+		new->fld = f;
+		new->low = new->high = idx;
+		new->flags |= FL_OKLOW | FL_OKHIGH;
+		new->child = flist_replicate(fl->child);
+		prev->sibling = new;
+		prev = new;
+	}
+	prev->sibling = sib;
+}
+
+static void
+flist_expand_structs(
+	flist_t		*fl,
+	void		*obj)
+{
+	const field_t	*cf;
+	const field_t	*f;
+	const ftattr_t	*fa;
+	flist_t		*new;
+	flist_t		*prev;
+
+	f = fl->fld;
+	fa = &ftattrtab[f->ftyp];
+	ASSERT(fa->ftyp == f->ftyp);
+	ASSERT(fa->subfld != NULL);
+	ASSERT(fl->child == NULL);
+	for (cf = fa->subfld, prev = NULL; cf->name != NULL; cf++) {
+		if (fcount(cf, obj, fl->offset) == 0)
+			continue;
+		if (cf->flags & FLD_SKIPALL)
+			continue;
+		new = flist_make(cf->name);
+		new->fld = cf;
+		if (prev)
+			prev->sibling = new;
+		else
+			fl->child = new;
+		prev = new;
+	}
+}
+
+void
+flist_free(
+	flist_t	*fl)
+{
+	if (fl->child)
+		flist_free(fl->child);
+	if (fl->sibling)
+		flist_free(fl->sibling);
+	if (fl->name)
+		xfree(fl->name);
+	xfree(fl);
+}
+
+flist_t *
+flist_make(
+	char	*name)
+{
+	flist_t	*fl;
+
+	fl = xmalloc(sizeof(*fl));
+	fl->name = xstrdup(name);
+	fl->fld = NULL;
+	fl->child = NULL;
+	fl->sibling = NULL;
+	fl->low = 0;
+	fl->high = 0;
+	fl->flags = 0;
+	fl->offset = 0;
+	return fl;
+}
+
+int
+flist_parse(
+	const field_t	*fields,
+	flist_t		*fl,
+	void		*obj,
+	int		startoff)
+{
+	const field_t	*f;
+	const ftattr_t	*fa;
+	int		high;
+	int		low;
+
+	while (fl) {
+		f = findfield(fl->name, fields, obj, startoff);
+		if (f == NULL) {
+			dbprintf("field %s not found\n", fl->name);
+			return 0;
+		}
+		fl->fld = f;
+		fa = &ftattrtab[f->ftyp];
+		ASSERT(fa->ftyp == f->ftyp);
+		if (f->flags & FLD_ARRAY) {
+			low = (f->flags & FLD_ABASE1) != 0;
+			high = fcount(f, obj, startoff) + low - 1;
+			if (low > high) {
+				dbprintf("no elements in %s\n", fl->name);
+				return 0;
+			}
+			if (fl->flags & FL_OKHIGH) {
+				if (fl->low < low || fl->low > high ||
+				    fl->high < low || fl->high > high) {
+					dbprintf("indices %d-%d for field %s "
+						 "out of range %d-%d\n",
+						fl->low, fl->high, fl->name,
+						low, high);
+					return 0;
+				}
+			} else if (fl->flags & FL_OKLOW) {
+				if (fl->low < low || fl->low > high) {
+					dbprintf("index %d for field %s out of "
+						 "range %d-%d\n",
+						fl->low, fl->name, low, high);
+					return 0;
+				}
+				fl->high = fl->low;
+				fl->flags |= FL_OKHIGH;
+			} else {
+				fl->low = low;
+				fl->high = high;
+				fl->flags |= FL_OKLOW | FL_OKHIGH;
+			}
+		} else {
+			if (fl->flags & FL_OKLOW) {
+				dbprintf("field %s is not an array\n",
+					fl->name);
+				return 0;
+			}
+		}
+		fl->offset = startoff + bitoffset(f, obj, startoff, fl->low);
+		if ((fl->child != NULL || fa->prfunc == NULL) &&
+		    (f->flags & FLD_ARRAY) && fl->low != fl->high)
+			flist_expand_arrays(fl);
+		if (fa->prfunc == NULL && fl->child == NULL)
+			flist_expand_structs(fl, obj);
+		if (fl->child) {
+			if (fa->subfld == NULL) {
+				dbprintf("field %s has no subfields\n",
+					fl->name);
+				return 0;
+			}
+			if (!flist_parse(fa->subfld, fl->child, obj,
+					fl->offset))
+				return 0;
+		}
+		fl = fl->sibling;
+	}
+	return 1;
+}
+
+void
+flist_print(
+	flist_t	*fl)
+{
+	if (!(debug_state & DEBUG_FLIST))
+		return;
+	while (fl) {
+		dbprintf("fl@%p:\n", fl);
+		dbprintf("\tname=%s, fld=%p, child=%p, sibling=%p\n",
+			fl->name, fl->fld, fl->child, fl->sibling);
+		dbprintf("\tlow=%d, high=%d, flags=%d (%s%s), offset=%d\n",
+			fl->low, fl->high, fl->flags,
+			fl->flags & FL_OKLOW ? "oklow " : "",
+			fl->flags & FL_OKHIGH ? "okhigh" : "", fl->offset);
+		dbprintf("\tfld->name=%s, fld->ftyp=%d (%s)\n",
+			fl->fld->name, fl->fld->ftyp,
+			ftattrtab[fl->fld->ftyp].name);
+		dbprintf("\tfld->flags=%d (%s%s%s%s%s)\n", fl->fld->flags,
+			fl->fld->flags & FLD_ABASE1 ? "abase1 " : "",
+			fl->fld->flags & FLD_SKIPALL ? "skipall " : "",
+			fl->fld->flags & FLD_ARRAY ? "array " : "",
+			fl->fld->flags & FLD_OFFSET ? "offset " : "",
+			fl->fld->flags & FLD_COUNT ? "count " : "");
+		if (fl->child)
+			flist_print(fl->child);
+		fl = fl->sibling;
+	}
+}
+
+static flist_t *
+flist_replicate(
+	flist_t	*f)
+{
+	flist_t	*new;
+
+	if (f == NULL)
+		return NULL;
+	new = flist_make(f->name);
+	new->fld = f->fld;
+	new->child = flist_replicate(f->child);
+	new->sibling = flist_replicate(f->sibling);
+	new->low = f->low;
+	new->high = f->high;
+	new->flags = f->flags;
+	new->offset = f->offset;
+	return new;
+}
+
+flist_t *
+flist_scan(
+	char	*name)
+{
+	flist_t	*fl;
+	flist_t	*lfl;
+	flist_t	*nfl;
+	int	num;
+	ftok_t	*p;
+	ftok_t	*v;
+	char	*x;
+
+	v = flist_split(name);
+	if (!v)
+		return NULL;
+	p = v;
+	fl = lfl = NULL;
+	while (p->tokty != TT_END) {
+		if (p->tokty != TT_NAME)
+			goto bad;
+		nfl = flist_make(p->tok);
+		if (lfl)
+			lfl->child = nfl;
+		else
+			fl = nfl;
+		lfl = nfl;
+		p++;
+		if (p->tokty == TT_LB) {
+			p++;
+			if (p->tokty != TT_NUM)
+				goto bad;
+			num = (int)strtoul(p->tok, &x, 0);
+			if (*x != '\0')
+				goto bad;
+			nfl->flags |= FL_OKLOW;
+			nfl->low = num;
+			p++;
+			if (p->tokty == TT_DASH) {
+				p++;
+				if (p->tokty != TT_NUM)
+					goto bad;
+				num = (int)strtoul(p->tok, &x, 0);
+				if (*x != '\0')
+					goto bad;
+				nfl->flags |= FL_OKHIGH;
+				nfl->high = num;
+				p++;
+			}
+			if (p->tokty != TT_RB)
+				goto bad;
+			p++;
+		}
+		if (p->tokty == TT_DOT) {
+			p++;
+			if (p->tokty == TT_END)
+				goto bad;
+		}
+	}
+	ftok_free(v);
+	return fl;
+bad:
+	dbprintf("bad syntax in field name %s\n", name);
+	ftok_free(v);
+	if (fl)
+		flist_free(fl);
+	return NULL;
+}
+
+static ftok_t *
+flist_split(
+	char		*s)
+{
+	char		*a;
+	int		i;
+	static char	*idchars;
+	static char	*initidchar;
+	int		l;
+	int             tailskip = 0;
+	static char	*numchars;
+	static char     *xnumchars;  /* extended for hex conversion */
+	int		nv;
+	static char	punctchars[] = "[-].";
+	static tokty_t	puncttypes[] = { TT_LB, TT_DASH, TT_RB, TT_DOT };
+	tokty_t		t;
+	ftok_t		*v;
+
+	if (idchars == NULL) {
+		idchars = xmalloc(26 + 10 + 1 + 1);
+		initidchar = xmalloc(26 + 1);
+		numchars = xmalloc(10 + 1);
+		xnumchars = xmalloc(12 + 1);
+		for (i = 'a'; i <= 'z'; i++) {
+			idchars[i - 'a'] = i;
+			initidchar[i - 'a'] = i;
+		}
+
+		for (i = '0'; i <= '9'; i++) {
+			idchars[26 + (i - '0')] = i;
+			numchars[i - '0'] = i;
+			xnumchars[i - '0'] = i;
+		}
+		idchars[26 + 10] = '_';
+		idchars[26 + 10 + 1] = '\0';
+		initidchar[26] = '\0';
+		numchars[10] = '\0';
+		xnumchars[10] = 'x';
+		xnumchars[11] = 'X';
+		xnumchars[12] = '\0';
+	}
+	nv = 0;
+	v = xmalloc(sizeof(*v));
+	v->tok = NULL;
+	while (*s) {
+		/* need to add string handling */
+		if (*s == '\"') {
+			s++; /* skip first quote */
+			if ((a = strrchr(s, '\"')) == NULL) {
+				dbprintf("missing closing quote %s\n", s);
+				ftok_free(v);
+				return NULL;
+			}
+			tailskip = 1; /* skip remaing quote */
+			l = (int)(a - s);
+			t = TT_STRING;
+		} else if (strchr(initidchar, *s)) {
+			l = (int)strspn(s, idchars);
+			t = TT_NAME;
+		} else if (strchr(numchars, *s)) {
+			l = (int)strspn(s, xnumchars);
+			t = TT_NUM;
+		} else if (a = strchr(punctchars, *s)) {
+			l = 1;
+			t = puncttypes[a - punctchars];
+		} else {
+			dbprintf("bad character in field %s\n", s);
+			ftok_free(v);
+			return NULL;
+		}
+		a = xmalloc(l + 1);
+		strncpy(a, s, l);
+		a[l] = '\0';
+		v = xrealloc(v, (nv + 2) * sizeof(*v));
+		v[nv].tok = a;
+		v[nv].tokty = t;
+		nv++;
+		s += l + tailskip;
+		tailskip = 0;
+	}
+	v[nv].tok = NULL;
+	v[nv].tokty = TT_END;
+	return v;
+}
+
+static void
+ftok_free(
+	ftok_t	*ft)
+{
+	ftok_t	*p;
+
+	for (p = ft; p->tok; p++)
+		xfree(p->tok);
+	xfree(ft);
+}
diff --git a/db/flist.h b/db/flist.h
new file mode 100644
index 000000000..eab154d7b
--- /dev/null
+++ b/db/flist.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+typedef struct flist {
+	char			*name;
+	const struct field 	*fld;
+	struct flist		*child;
+	struct flist		*sibling;
+	int			low;
+	int			high;
+	int			flags;
+	int			offset;
+} flist_t;
+
+/*
+ * Flags for flist
+ */
+#define	FL_OKLOW	1
+#define	FL_OKHIGH	2
+
+typedef enum tokty {
+	TT_NAME, TT_NUM, TT_STRING, TT_LB, TT_RB, TT_DASH, TT_DOT, TT_END
+} tokty_t;
+
+typedef struct ftok {
+	char	*tok;
+	tokty_t	tokty;
+} ftok_t;
+
+extern void	flist_free(flist_t *fl);
+extern flist_t	*flist_make(char *name);
+extern int	flist_parse(const struct field *fields, flist_t *fl, void *obj,
+			    int startoff);
+extern void	flist_print(flist_t *fl);
+extern flist_t	*flist_scan(char *name);
diff --git a/db/fprint.c b/db/fprint.c
new file mode 100644
index 000000000..3e4a9bf80
--- /dev/null
+++ b/db/fprint.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <time.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "inobt.h"
+#include "bit.h"
+#include "print.h"
+#include "output.h"
+#include "sig.h"
+#include "malloc.h"
+
+int
+fp_charns(
+	void	*obj,
+	int	bit,
+	int	count,
+	char	*fmtstr,
+	int	size,
+	int	arg,
+	int	base,
+	int	array)
+{
+	int	i;
+	char	*p;
+
+	ASSERT(bitoffs(bit) == 0);
+	ASSERT(size == bitsz(char));
+	dbprintf("\"");
+	for (i = 0, p = (char *)obj + byteize(bit);
+	     i < count && !seenint();
+	     i++, p++) {
+		if (*p == '\\' || *p == '\'' || *p == '"' || *p == '\?')
+			dbprintf("\\%c", *p);
+		else if (isgraph(*p) || *p == ' ')
+			dbprintf("%c", *p);
+		else if (*p == '\a' || *p == '\b' || *p == '\f' || *p == '\n' ||
+			 *p == '\r' || *p == '\t' || *p == '\v')
+			dbprintf("\\%c", *p + ('a' - '\a'));
+		else
+			dbprintf("\\%03o", *p & 0xff);
+	}
+	dbprintf("\"");
+	return 1;
+}
+
+int
+fp_num(
+	void		*obj,
+	int		bit,
+	int		count,
+	char		*fmtstr,
+	int		size,
+	int		arg,
+	int		base,
+	int		array)
+{
+	int		bitpos;
+	int		i;
+	int		isnull;
+	__int64_t	val;
+
+	for (i = 0, bitpos = bit;
+	     i < count && !seenint();
+	     i++, bitpos += size) {
+		val = getbitval(obj, bitpos, size,
+			(arg & FTARG_SIGNED) ? BVSIGNED : BVUNSIGNED);
+		if ((arg & FTARG_SKIPZERO) && val == 0)
+			continue;
+		isnull = (arg & FTARG_SIGNED) || size == 64 ?
+			val == -1LL : val == ((1LL << size) - 1LL);
+		if ((arg & FTARG_SKIPNULL) && isnull)
+			continue;
+		if (array)
+			dbprintf("%d:", i + base);
+		if ((arg & FTARG_DONULL) && isnull)
+			dbprintf("null");
+		else if (size > 32)
+			dbprintf(fmtstr, val);
+		else
+			dbprintf(fmtstr, (__int32_t)val);
+		if (i < count - 1)
+			dbprintf(" ");
+	}
+	return 1;
+}
+
+/*ARGSUSED*/
+int
+fp_sarray(
+	void	*obj,
+	int	bit,
+	int	count,
+	char	*fmtstr,
+	int	size,
+	int	arg,
+	int	base,
+	int	array)
+{
+	print_sarray(obj, bit, count, size, base, array,
+		(const field_t *)fmtstr, (arg & FTARG_SKIPNMS) != 0);
+	return 1;
+}
+
+/*ARGSUSED*/
+int
+fp_time(
+	void	*obj,
+	int	bit,
+	int	count,
+	char	*fmtstr,
+	int	size,
+	int	arg,
+	int	base,
+	int	array)
+{
+	int	bitpos;
+	char	*c;
+	int	i;
+        time_t  t;
+
+	ASSERT(bitoffs(bit) == 0);
+	for (i = 0, bitpos = bit;
+	     i < count && !seenint();
+	     i++, bitpos += size) {
+		if (array)
+			dbprintf("%d:", i + base);
+                t=(time_t)getbitval((char *)obj + byteize(bitpos), 0, sizeof(time_t)*8, 0);
+		c = ctime(&t);
+		dbprintf("%24.24s", c);
+		if (i < count - 1)
+			dbprintf(" ");
+	}
+	return 1;
+}
+
+/*ARGSUSED*/
+int
+fp_uuid(
+	void	*obj,
+	int	bit,
+	int	count,
+	char	*fmtstr,
+	int	size,
+	int	arg,
+	int	base,
+	int	array)
+{
+	char	bp[40];	/* UUID string is 36 chars + trailing '\0' */
+	int	i;
+	uuid_t	*p;
+
+	ASSERT(bitoffs(bit) == 0);
+	for (p = (uuid_t *)((char *)obj + byteize(bit)), i = 0;
+	     i < count && !seenint();
+	     i++, p++) {
+		if (array)
+			dbprintf("%d:", i + base);
+		uuid_unparse(*p, bp);
+		dbprintf("%s", bp);
+		if (i < count - 1)
+			dbprintf(" ");
+	}
+	return 1;
+}
diff --git a/db/fprint.h b/db/fprint.h
new file mode 100644
index 000000000..01c9385b8
--- /dev/null
+++ b/db/fprint.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef int (*prfnc_t)(void *obj, int bit, int count, char *fmtstr, int size,
+		       int arg, int base, int array);
+
+extern int	fp_charns(void *obj, int bit, int count, char *fmtstr, int size,
+			  int arg, int base, int array);
+extern int	fp_num(void *obj, int bit, int count, char *fmtstr, int size,
+		       int arg, int base, int array);
+extern int	fp_sarray(void *obj, int bit, int count, char *fmtstr, int size,
+			  int arg, int base, int array);
+extern int	fp_time(void *obj, int bit, int count, char *fmtstr, int size,
+			int arg, int base, int array);
+extern int	fp_uuid(void *obj, int bit, int count, char *fmtstr, int size,
+			int arg, int base, int array);
diff --git a/db/frag.c b/db/frag.c
new file mode 100644
index 000000000..100d11aa8
--- /dev/null
+++ b/db/frag.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include <sys/time.h>
+#include "bmap.h"
+#include "command.h"
+#include "data.h"
+#include "frag.h"
+#include "io.h"
+#include "output.h"
+#include "type.h"
+#include "mount.h"
+#include "malloc.h"
+
+typedef struct extent {
+	xfs_fileoff_t	startoff;
+	xfs_filblks_t	blockcount;
+} extent_t;
+
+typedef	struct extmap {
+	int		naents;
+	int		nents;
+	extent_t	ents[1];
+} extmap_t;
+#define	EXTMAP_SIZE(n)	\
+	(offsetof(extmap_t, ents) + (sizeof(extent_t) * (n)))
+
+static int		aflag;
+static int		dflag;
+static __uint64_t	extcount_actual;
+static __uint64_t	extcount_ideal;
+static int		fflag;
+static int		lflag;
+static int		qflag;
+static int		Rflag;
+static int		rflag;
+static int		vflag;
+
+typedef void	(*scan_lbtree_f_t)(xfs_btree_lblock_t	*block,
+				   int			level,
+				   extmap_t		**extmapp,
+				   typnm_t		btype);
+
+typedef void	(*scan_sbtree_f_t)(xfs_btree_sblock_t	*block,
+				   int			level,
+				   xfs_agf_t		*agf);
+
+static extmap_t		*extmap_alloc(xfs_extnum_t nex);
+static xfs_extnum_t	extmap_ideal(extmap_t *extmap);
+static void		extmap_set_ext(extmap_t **extmapp, xfs_fileoff_t o,
+				       xfs_extlen_t c);
+static int		frag_f(int argc, char **argv);
+static int		init(int argc, char **argv);
+static void		process_bmbt_reclist(xfs_bmbt_rec_32_t *rp, int numrecs,
+					     extmap_t **extmapp);
+static void		process_btinode(xfs_dinode_t *dip, extmap_t **extmapp,
+					int whichfork);
+static void		process_exinode(xfs_dinode_t *dip, extmap_t **extmapp,
+					int whichfork);
+static void		process_fork(xfs_dinode_t *dip, int whichfork);
+static void		process_inode(xfs_agf_t *agf, xfs_agino_t agino,
+				      xfs_dinode_t *dip);
+static void		scan_ag(xfs_agnumber_t agno);
+static void		scan_lbtree(xfs_fsblock_t root, int nlevels,
+				    scan_lbtree_f_t func, extmap_t **extmapp,
+				    typnm_t btype);
+static void		scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root,
+				    int nlevels, scan_sbtree_f_t func,
+				    typnm_t btype);
+static void		scanfunc_bmap(xfs_btree_lblock_t *ablock, int level,
+				      extmap_t **extmapp, typnm_t btype);
+static void		scanfunc_ino(xfs_btree_sblock_t *ablock, int level,
+				     xfs_agf_t *agf);
+
+static const cmdinfo_t	frag_cmd = 
+	{ "frag", NULL, frag_f, 0, -1, 0,
+	  "[-a] [-d] [-f] [-l] [-r]",
+	  "get file fragmentation data", NULL };
+
+static extmap_t *
+extmap_alloc(
+	xfs_extnum_t	nex)
+{
+	extmap_t	*extmap;
+
+	if (nex < 1)
+		nex = 1;
+	extmap = xmalloc(EXTMAP_SIZE(nex));
+	extmap->naents = nex;
+	extmap->nents = 0;
+	return extmap;
+}
+
+static xfs_extnum_t
+extmap_ideal(
+	extmap_t	*extmap)
+{
+	extent_t	*ep;
+	xfs_extnum_t	rval;
+
+	for (ep = &extmap->ents[0], rval = 0;
+	     ep < &extmap->ents[extmap->nents];
+	     ep++) {
+		if (ep == &extmap->ents[0] ||
+		    ep->startoff != ep[-1].startoff + ep[-1].blockcount)
+			rval++;
+	}
+	return rval;
+}
+
+static void
+extmap_set_ext(
+	extmap_t	**extmapp,
+	xfs_fileoff_t	o,
+	xfs_extlen_t	c)
+{
+	extmap_t	*extmap;
+	extent_t	*ent;
+
+	extmap = *extmapp;
+	if (extmap->nents == extmap->naents) {
+		extmap->naents++;
+		extmap = xrealloc(extmap, EXTMAP_SIZE(extmap->naents));
+		*extmapp = extmap;
+	}
+	ent = &extmap->ents[extmap->nents];
+	ent->startoff = o;
+	ent->blockcount = c;
+	extmap->nents++;
+}
+
+void
+frag_init(void)
+{
+	add_command(&frag_cmd);
+}
+
+/*
+ * Get file fragmentation information.
+ */
+static int
+frag_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	agno;
+	double		answer;
+
+	if (!init(argc, argv))
+		return 0;
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)
+		scan_ag(agno);
+	if (extcount_actual)
+		answer = (double)(extcount_actual - extcount_ideal) * 100.0 /
+			 (double)extcount_actual;
+	else
+		answer = 0.0;
+	dbprintf("actual %llu, ideal %llu, fragmentation factor %.2f%%\n",
+		extcount_actual, extcount_ideal, answer);
+	return 0;
+}
+
+static int
+init(
+	int		argc,
+	char		**argv)
+{
+	int		c;
+
+	aflag = dflag = fflag = lflag = qflag = Rflag = rflag = vflag = 0;
+	optind = 0;
+	while ((c = getopt(argc, argv, "adflqRrv")) != EOF) {
+		switch (c) {
+		case 'a':
+			aflag = 1;
+			break;
+		case 'd':
+			dflag = 1;
+			break;
+		case 'f':
+			fflag = 1;
+			break;
+		case 'l':
+			lflag = 1;
+			break;
+		case 'q':
+			qflag = 1;
+			break;
+		case 'R':
+			Rflag = 1;
+			break;
+		case 'r':
+			rflag = 1;
+			break;
+		case 'v':
+			vflag = 1;
+			break;
+		default:
+			dbprintf("bad option for frag command\n");
+			return 0;
+		}
+	}
+	if (!aflag && !dflag && !fflag && !lflag && !qflag && !Rflag && !rflag)
+		aflag = dflag = fflag = lflag = qflag = Rflag = rflag = 1;
+	extcount_actual = extcount_ideal = 0;
+	return 1;
+}
+
+static void
+process_bmbt_reclist(
+	xfs_bmbt_rec_32_t	*rp,
+	int			numrecs,
+	extmap_t		**extmapp)
+{
+	xfs_dfilblks_t		c;
+	int			f;
+	int			i;
+	xfs_dfiloff_t		o;
+	xfs_dfsbno_t		s;
+
+	for (i = 0; i < numrecs; i++, rp++) {
+		convert_extent((xfs_bmbt_rec_64_t *)rp, &o, &s, &c, &f);
+		extmap_set_ext(extmapp, (xfs_fileoff_t)o, (xfs_extlen_t)c);
+	}
+}
+
+static void
+process_btinode(
+	xfs_dinode_t		*dip,
+	extmap_t		**extmapp,
+	int			whichfork)
+{
+	xfs_bmdr_block_t	*dib;
+	int			i;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmbt_rec_32_t	*rp;
+
+	dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
+	if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) {
+		rp = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR(
+			XFS_DFORK_SIZE(dip, mp, whichfork),
+			xfs_bmdr, dib, 1,
+			XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp,
+					whichfork),
+				xfs_bmdr, 1));
+		process_bmbt_reclist(rp, INT_GET(dib->bb_numrecs, ARCH_CONVERT), extmapp);
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE(dip, mp, whichfork),
+		xfs_bmdr, dib, 1,
+		XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp, whichfork),
+					xfs_bmdr, 0));
+	for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++)
+		scan_lbtree((xfs_fsblock_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT), scanfunc_bmap,
+			extmapp,
+			whichfork == XFS_DATA_FORK ? TYP_BMAPBTD : TYP_BMAPBTA);
+}
+
+static void
+process_exinode(
+	xfs_dinode_t		*dip,
+	extmap_t		**extmapp,
+	int			whichfork)
+{
+	xfs_bmbt_rec_32_t	*rp;
+
+	rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR(dip, whichfork);
+	process_bmbt_reclist(rp, XFS_DFORK_NEXTENTS(dip, whichfork), extmapp);
+}
+
+static void
+process_fork(
+	xfs_dinode_t	*dip,
+	int		whichfork)
+{
+	extmap_t	*extmap;
+	int		nex;
+
+	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
+	if (!nex)
+		return;
+	extmap = extmap_alloc(nex);
+	switch (XFS_DFORK_FORMAT(dip, whichfork)) {
+	case XFS_DINODE_FMT_EXTENTS:
+		process_exinode(dip, &extmap, whichfork);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		process_btinode(dip, &extmap, whichfork);
+		break;
+	}
+	extcount_actual += extmap->nents;
+	extcount_ideal += extmap_ideal(extmap);
+	xfree(extmap);
+}
+
+static void
+process_inode(
+	xfs_agf_t		*agf,
+	xfs_agino_t		agino,
+	xfs_dinode_t		*dip)
+{
+	__uint64_t		actual;
+	xfs_dinode_core_t	*dic;
+	__uint64_t		ideal;
+	xfs_ino_t		ino;
+	int			skipa;
+	int			skipd;
+
+	dic = &dip->di_core;
+	ino = XFS_AGINO_TO_INO(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), agino);
+	switch (INT_GET(dic->di_mode, ARCH_CONVERT) & IFMT) {
+	case IFDIR:
+		skipd = !dflag;
+		break;
+	case IFREG:
+		if (!rflag && (INT_GET(dic->di_flags, ARCH_CONVERT) & XFS_DIFLAG_REALTIME))
+			skipd = 1;
+		else if (!Rflag &&
+			 (ino == mp->m_sb.sb_rbmino ||
+			  ino == mp->m_sb.sb_rsumino))
+			skipd = 1;
+		else if (!qflag &&
+			 (ino == mp->m_sb.sb_uquotino ||
+			  ino == mp->m_sb.sb_pquotino))
+			skipd = 1;
+		else
+			skipd = !fflag;
+		break;
+	case IFLNK:
+		skipd = !lflag;
+		break;
+	default:
+		skipd = 1;
+		break;
+	}
+	actual = extcount_actual;
+	ideal = extcount_ideal;
+	if (!skipd)
+		process_fork(dip, XFS_DATA_FORK);
+	skipa = !aflag || !XFS_DFORK_Q(dip);
+	if (!skipa)
+		process_fork(dip, XFS_ATTR_FORK);
+	if (vflag && (!skipd || !skipa))
+		dbprintf("inode %lld actual %lld ideal %lld\n",
+			ino, extcount_actual - actual, extcount_ideal - ideal);
+}
+
+static void
+scan_ag(
+	xfs_agnumber_t	agno)
+{
+	xfs_agf_t	*agf;
+	xfs_agi_t	*agi;
+
+	push_cur();
+	set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1,
+		DB_RING_IGN, NULL);
+	if ((agf = iocur_top->data) == NULL) {
+		dbprintf("can't read agf block for ag %u\n", agno);
+		pop_cur();
+		return;
+	}
+	push_cur();
+	set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1,
+		DB_RING_IGN, NULL);
+	if ((agi = iocur_top->data) == NULL) {
+		dbprintf("can't read agi block for ag %u\n", agno);
+		pop_cur();
+		pop_cur();
+		return;
+	}
+	scan_sbtree(agf,
+		INT_GET(agi->agi_root, ARCH_CONVERT),
+		INT_GET(agi->agi_level, ARCH_CONVERT),
+		scanfunc_ino, TYP_INOBT);
+	pop_cur();
+	pop_cur();
+}
+
+static void
+scan_lbtree(
+	xfs_fsblock_t	root,
+	int		nlevels,
+	scan_lbtree_f_t	func,
+	extmap_t	**extmapp,
+	typnm_t		btype)
+{
+	push_cur();
+	set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, root), blkbb, DB_RING_IGN,
+		NULL);
+	if (iocur_top->data == NULL) {
+		dbprintf("can't read btree block %u/%u\n",
+			XFS_FSB_TO_AGNO(mp, root),
+			XFS_FSB_TO_AGBNO(mp, root));
+		return;
+	}
+	(*func)(iocur_top->data, nlevels - 1, extmapp, btype);
+	pop_cur();
+}
+
+static void
+scan_sbtree(
+	xfs_agf_t	*agf,
+	xfs_agblock_t	root,
+	int		nlevels,
+	scan_sbtree_f_t	func,
+	typnm_t		btype)
+{
+	xfs_agnumber_t	seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+
+	push_cur();
+	set_cur(&typtab[btype], XFS_AGB_TO_DADDR(mp, seqno, root),
+		blkbb, DB_RING_IGN, NULL);
+	if (iocur_top->data == NULL) {
+		dbprintf("can't read btree block %u/%u\n", seqno, root);
+		return;
+	}
+	(*func)(iocur_top->data, nlevels - 1, agf);
+	pop_cur();
+}
+
+static void
+scanfunc_bmap(
+	xfs_btree_lblock_t	*ablock,
+	int			level,
+	extmap_t		**extmapp,
+	typnm_t			btype)
+{
+	xfs_bmbt_block_t	*block = (xfs_bmbt_block_t *)ablock;
+	int			i;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmbt_rec_32_t	*rp;
+
+	if (level == 0) {
+		rp = (xfs_bmbt_rec_32_t *)
+			XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+			block, 1, mp->m_bmap_dmxr[0]);
+		process_bmbt_reclist(rp, INT_GET(block->bb_numrecs, ARCH_CONVERT), extmapp);
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+		mp->m_bmap_dmxr[0]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+		scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, extmapp, btype);
+}
+
+static void
+scanfunc_ino(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agf_t		*agf)
+{
+	xfs_agino_t		agino;
+	xfs_inobt_block_t	*block = (xfs_inobt_block_t *)ablock;
+	xfs_agnumber_t		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+	int			i;
+	int			j;
+	int			off;
+	xfs_inobt_ptr_t		*pp;
+	xfs_inobt_rec_t		*rp;
+
+	if (level == 0) {
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block,
+			1, mp->m_inobt_mxr[0]);
+		for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+			agino = INT_GET(rp[i].ir_startino, ARCH_CONVERT);
+			off = XFS_INO_TO_OFFSET(mp, agino);
+			push_cur();
+			set_cur(&typtab[TYP_INODE],
+				XFS_AGB_TO_DADDR(mp, seqno,
+						 XFS_AGINO_TO_AGBNO(mp, agino)),
+				(int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)),
+				DB_RING_IGN, NULL);
+			if (iocur_top->data == NULL) {
+				dbprintf("can't read inode block %u/%u\n",
+					seqno, XFS_AGINO_TO_AGBNO(mp, agino));
+				continue;
+			}
+			for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+                                xfs_dinode_t            *dip;
+                                xfs_dinode_core_t       tdic;
+                                
+                                dip=(xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog));
+                            
+                                /* convert the core, then copy it back into the inode */
+	                        libxfs_xlate_dinode_core( (xfs_caddr_t)
+					&dip->di_core, &tdic, 1, ARCH_CONVERT );
+	                        memcpy(&dip->di_core, &tdic, sizeof(xfs_dinode_core_t));
+        
+				if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT))
+					continue;
+				process_inode(agf, agino + j,
+					(xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog)));
+			}
+			pop_cur();
+		}
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1,
+		mp->m_inobt_mxr[1]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+		scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_ino, TYP_INOBT);
+}
diff --git a/db/frag.h b/db/frag.h
new file mode 100644
index 000000000..b7986ed70
--- /dev/null
+++ b/db/frag.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	frag_init(void);
diff --git a/db/freesp.c b/db/freesp.c
new file mode 100644
index 000000000..160d2360c
--- /dev/null
+++ b/db/freesp.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include "command.h"
+#include "data.h"
+#include "freesp.h"
+#include "io.h"
+#include "type.h"
+#include "output.h"
+#include "mount.h"
+#include "malloc.h"
+
+typedef struct histent
+{
+	int		low;
+	int		high;
+	long long	count;
+	long long	blocks;
+} histent_t;
+
+static void	addhistent(int h);
+static void	addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno,
+			  xfs_extlen_t len);
+static int	freesp_f(int argc, char **argv);
+static void	histinit(int maxlen);
+static int	init(int argc, char **argv);
+static void	printhist(void);
+static void	scan_ag(xfs_agnumber_t agno);
+static void	scanfunc_bno(xfs_btree_sblock_t *ablock, typnm_t typ, int level,
+			     xfs_agf_t *agf);
+static void	scanfunc_cnt(xfs_btree_sblock_t *ablock, typnm_t typ, int level,
+			     xfs_agf_t *agf);
+static void	scan_freelist(xfs_agf_t *agf);
+static void	scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root, typnm_t typ,
+			    int nlevels,
+			    void (*func)(xfs_btree_sblock_t *block, typnm_t typ,
+					 int level, xfs_agf_t *agf));
+static int	usage(void);
+
+static int		agcount;
+static xfs_agnumber_t	*aglist;
+static int		countflag;
+static int		dumpflag;
+static int		equalsize;
+static histent_t	*hist;
+static int		histcount;
+static int		multsize;
+static int		seen1;
+static int		summaryflag;
+static long long	totblocks;
+static long long	totexts;
+
+static const cmdinfo_t	freesp_cmd =
+	{ "freesp", NULL, freesp_f, 0, -1, 0,
+	  "[-bcdfs] [-a agno]... [-e binsize] [-h h1]... [-m binmult]", 
+	  "summarize free space for filesystem", NULL };
+
+static int
+inaglist(
+	xfs_agnumber_t	agno)
+{
+	int		i;
+
+	if (agcount == 0)
+		return 1;
+	for (i = 0; i < agcount; i++)
+		if (aglist[i] == agno)
+			return 1;
+	return 0;
+}
+
+/*
+ * Report on freespace usage in xfs filesystem.
+ */
+static int
+freesp_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	agno;
+
+	if (!init(argc, argv))
+		return 0;
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)  {
+		if (inaglist(agno))
+			scan_ag(agno);
+	}
+	if (histcount)
+		printhist();
+	if (summaryflag) {
+		dbprintf("total free extents %lld\n", totexts);
+		dbprintf("total free blocks %lld\n", totblocks);
+		dbprintf("average free extent size %g\n",
+			(double)totblocks / (double)totexts);
+	}
+	if (aglist)
+		xfree(aglist);
+	if (hist)
+		xfree(hist);
+	return 0;
+}
+
+void
+freesp_init(void)
+{
+	add_command(&freesp_cmd);
+}
+
+static void
+aglistadd(
+	char	*a)
+{
+	aglist = xrealloc(aglist, (agcount + 1) * sizeof(*aglist));
+	aglist[agcount] = (xfs_agnumber_t)atoi(a);
+	agcount++;
+}
+
+static int
+init(
+	int		argc,
+	char		**argv)
+{
+	int		c;
+	int		speced = 0;
+
+	agcount = countflag = dumpflag = equalsize = multsize = optind = 0;
+	histcount = seen1 = summaryflag = 0;
+	totblocks = totexts = 0;
+	aglist = NULL;
+	hist = NULL;
+	while ((c = getopt(argc, argv, "a:bcde:h:m:s")) != EOF) {
+		switch (c) {
+		case 'a':
+			aglistadd(optarg);
+			break;
+		case 'b':
+			if (speced) 
+				return usage();
+			multsize = 2;
+			speced = 1;
+			break;
+		case 'c':
+			countflag = 1;
+			break;
+		case 'd':
+			dumpflag = 1;
+			break;
+		case 'e':
+			if (speced)
+				return usage();
+			equalsize = atoi(optarg);
+			speced = 1;
+			break;
+		case 'h':
+			if (speced && !histcount)
+				return usage();
+			addhistent(atoi(optarg));
+			speced = 1;
+			break;
+		case 'm':
+			if (speced)
+				return usage();
+			multsize = atoi(optarg);
+			speced = 1;
+			break;
+		case 's':
+			summaryflag = 1;
+			break;
+		case '?':
+			return usage();
+		}
+	}
+	if (optind != argc)
+		return usage();
+	if (!speced)
+		multsize = 2;
+	histinit((int)mp->m_sb.sb_agblocks);
+	return 1;
+}
+
+static int
+usage(void)
+{
+	dbprintf("freesp arguments: [-bcdfs] [-a agno] [-e binsize] [-h h1]... "
+		 "[-m binmult]\n");
+	return 0;
+}
+
+static void
+scan_ag(
+	xfs_agnumber_t	agno)
+{
+	xfs_agf_t	*agf;
+
+	push_cur();
+	set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1,
+		DB_RING_IGN, NULL);
+	agf = iocur_top->data;
+	scan_freelist(agf);
+	if (countflag)
+		scan_sbtree(agf,
+			INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT),
+			TYP_CNTBT,
+			INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT),
+			scanfunc_cnt);
+	else
+		scan_sbtree(agf,
+			INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT),
+			TYP_BNOBT,
+			INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT),
+			scanfunc_bno);
+	pop_cur();
+}
+
+static void
+scan_freelist(
+	xfs_agf_t	*agf)
+{
+	xfs_agnumber_t	seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+	xfs_agfl_t	*agfl;
+	xfs_agblock_t	bno;
+	int		i;
+
+	if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0)
+		return;
+	push_cur();
+	set_cur(&typtab[TYP_AGFL],
+		XFS_AG_DADDR(mp, seqno, XFS_AGFL_DADDR), 1,
+		DB_RING_IGN, NULL);
+	agfl = iocur_top->data;
+	i = INT_GET(agf->agf_flfirst, ARCH_CONVERT);
+	for (;;) {
+		bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT);
+		addtohist(seqno, bno, 1);
+		if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT))
+			break;
+		if (++i == XFS_AGFL_SIZE)
+			i = 0;
+	}
+	pop_cur();
+}
+
+static void
+scan_sbtree(
+	xfs_agf_t	*agf,
+	xfs_agblock_t	root,
+	typnm_t		typ,
+	int		nlevels,
+	void		(*func)(xfs_btree_sblock_t	*block,
+				typnm_t			typ,
+				int			level,
+				xfs_agf_t		*agf))
+{
+	push_cur();
+	set_cur(&typtab[typ],
+		XFS_AGB_TO_DADDR(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), root),
+		blkbb, DB_RING_IGN, NULL);
+	(*func)((xfs_btree_sblock_t *)iocur_top->data, typ, nlevels - 1, agf);
+	pop_cur();
+}
+
+/*ARGSUSED*/
+static void
+scanfunc_bno(
+	xfs_btree_sblock_t	*ablock,
+	typnm_t			typ,
+	int			level,
+	xfs_agf_t		*agf)
+{
+	xfs_alloc_block_t	*block = (xfs_alloc_block_t *)ablock;
+	int			i;
+	xfs_alloc_ptr_t		*pp;
+	xfs_alloc_rec_t		*rp;
+
+	if (level == 0) {
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+			1, mp->m_alloc_mxr[0]);
+		for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+			addtohist(INT_GET(agf->agf_seqno, ARCH_CONVERT),
+				INT_GET(rp[i].ar_startblock, ARCH_CONVERT), INT_GET(rp[i].ar_blockcount, ARCH_CONVERT));
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+		mp->m_alloc_mxr[1]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+		scan_sbtree(agf, pp[i], typ, level, scanfunc_bno);
+}
+
+static void
+scanfunc_cnt(
+	xfs_btree_sblock_t	*ablock,
+	typnm_t			typ,
+	int			level,
+	xfs_agf_t		*agf)
+{
+	xfs_alloc_block_t	*block = (xfs_alloc_block_t *)ablock;
+	int			i;
+	xfs_alloc_ptr_t		*pp;
+	xfs_alloc_rec_t		*rp;
+
+	if (level == 0) {
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+			1, mp->m_alloc_mxr[0]);
+		for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+			addtohist(INT_GET(agf->agf_seqno, ARCH_CONVERT),
+				INT_GET(rp[i].ar_startblock, ARCH_CONVERT), INT_GET(rp[i].ar_blockcount, ARCH_CONVERT));
+		return;
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+		mp->m_alloc_mxr[1]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+		scan_sbtree(agf, pp[i], typ, level, scanfunc_cnt);
+}
+
+static void
+addhistent(
+	int	h)
+{
+	hist = xrealloc(hist, (histcount + 1) * sizeof(*hist));
+	if (h == 0)
+		h = 1;
+	hist[histcount].low = h;
+	hist[histcount].count = hist[histcount].blocks = 0;
+	histcount++;
+	if (h == 1)
+		seen1 = 1;
+}
+
+static void
+addtohist(
+	xfs_agnumber_t	agno,
+	xfs_agblock_t	agbno,
+	xfs_extlen_t	len)
+{
+	int		i;
+
+	if (dumpflag)
+		dbprintf("%8d %8d %8d\n", agno, agbno, len);
+	totexts++;
+	totblocks += len;
+	for (i = 0; i < histcount; i++) {
+		if (hist[i].high >= len) {
+			hist[i].count++;
+			hist[i].blocks += len;
+			break;
+		}
+	}
+}
+
+static int
+hcmp(
+	const void	*a,
+	const void	*b)
+{
+	return ((histent_t *)a)->low - ((histent_t *)b)->low;
+}
+
+static void
+histinit(
+	int	maxlen)
+{
+	int	i;
+
+	if (equalsize) {
+		for (i = 1; i < maxlen; i += equalsize)
+			addhistent(i);
+	} else if (multsize) {
+		for (i = 1; i < maxlen; i *= multsize)
+			addhistent(i);
+	} else {
+		if (!seen1)
+			addhistent(1);
+		qsort(hist, histcount, sizeof(*hist), hcmp);
+	}
+	for (i = 0; i < histcount; i++) {
+		if (i < histcount - 1)
+			hist[i].high = hist[i + 1].low - 1;
+		else
+			hist[i].high = maxlen;
+	}
+}
+
+static void
+printhist(void)
+{
+	int	i;
+
+	dbprintf("%7s %7s %7s %7s %6s\n",
+		"from", "to", "extents", "blocks", "pct");
+	for (i = 0; i < histcount; i++) {
+		if (hist[i].count)
+			dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low,
+				hist[i].high, hist[i].count, hist[i].blocks,
+				hist[i].blocks * 100.0 / totblocks);
+	}
+}
diff --git a/db/freesp.h b/db/freesp.h
new file mode 100644
index 000000000..15ea50567
--- /dev/null
+++ b/db/freesp.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	freesp_init(void);
diff --git a/db/hash.c b/db/hash.c
new file mode 100644
index 000000000..e11851b51
--- /dev/null
+++ b/db/hash.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "addr.h"
+#include "command.h"
+#include "type.h"
+#include "io.h"
+#include "output.h"
+
+static int hash_f(int argc, char **argv);
+static void hash_help(void);
+
+static const cmdinfo_t hash_cmd =
+	{ "hash", NULL, hash_f, 1, 1, 0, "string",
+	  "calculate hash value", hash_help };
+
+static void
+hash_help(void)
+{
+	dbprintf(
+"\n"
+" 'hash' prints out the calculated hash value for a string using the\n"
+"directory/attribute code hash function.\n"
+"\n"
+" Usage:  \"hash <string>\"\n"
+"\n"
+);
+
+}
+
+/* ARGSUSED */
+static int
+hash_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_dahash_t	hashval;
+
+	hashval = libxfs_da_hashname(argv[1], (int)strlen(argv[1]));
+	dbprintf("0x%x\n", hashval);
+	return 0;
+}
+
+void
+hash_init(void)
+{
+	add_command(&hash_cmd);
+}
diff --git a/db/hash.h b/db/hash.h
new file mode 100644
index 000000000..05c807503
--- /dev/null
+++ b/db/hash.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void		hash_init(void);
+extern xfs_dahash_t	xfs_da_hashname(char *name, int namelen);
diff --git a/db/help.c b/db/help.c
new file mode 100644
index 000000000..56dd8b8ac
--- /dev/null
+++ b/db/help.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "help.h"
+#include "output.h"
+
+static void	help_all(void);
+static void	help_onecmd(const char *cmd, const cmdinfo_t *ct);
+static int	help_f(int argc, char **argv);
+static void	help_oneline(const char *cmd, const cmdinfo_t *ct);
+
+static const cmdinfo_t	help_cmd =
+	{ "help", "?", help_f, 0, 1, 0, "[command]",
+	  "help for one or all commands", NULL };
+
+static void
+help_all(void)
+{
+	const cmdinfo_t	*ct;
+
+	for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++)
+		help_oneline(ct->name, ct);
+	dbprintf("\nUse 'help commandname' for extended help.\n");
+}
+
+static int
+help_f(
+	int		argc,
+	char		**argv)
+{
+	const cmdinfo_t	*ct;
+
+	if (argc == 1) {
+		help_all();
+		return 0;
+	}
+	ct = find_command(argv[1]);
+	if (ct == NULL) {
+		dbprintf("command %s not found\n", argv[1]);
+		return 0;
+	}
+	help_onecmd(argv[1], ct);
+	return 0;
+}
+
+void
+help_init(void)
+{
+	add_command(&help_cmd);
+}
+
+static void
+help_onecmd(
+	const char	*cmd,
+	const cmdinfo_t	*ct)
+{
+	help_oneline(cmd, ct);
+	if (ct->help)
+		ct->help();
+}
+
+static void
+help_oneline(
+	const char	*cmd,
+	const cmdinfo_t	*ct)
+{
+	if (cmd)
+		dbprintf("%s ", cmd);
+	else {
+		dbprintf("%s ", ct->name);
+		if (ct->altname)
+			dbprintf("(or %s) ", ct->altname);
+	}
+	if (ct->args)
+		dbprintf("%s ", ct->args);
+	dbprintf("-- %s\n", ct->oneline);
+}
+
diff --git a/db/help.h b/db/help.h
new file mode 100644
index 000000000..4af57d24e
--- /dev/null
+++ b/db/help.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	help_init(void);
diff --git a/db/init.c b/db/init.c
new file mode 100644
index 000000000..e4233dfc1
--- /dev/null
+++ b/db/init.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include <signal.h>
+#include "command.h"
+#include "data.h"
+#include "init.h"
+#include "input.h"
+#include "io.h"
+#include "mount.h"
+#include "sig.h"
+#include "output.h"
+
+char	*fsdevice;
+
+static void
+usage(void)
+{
+	dbprintf("Usage: %s [-c cmd]... [-p prog] [-l logdev] [-frxV] devname\n", progname);
+	exit(1);
+}
+
+void
+init(
+	int		argc,
+	char		**argv)
+{
+	int		c;
+	FILE		*cfile = NULL;
+
+	progname = basename(argv[0]);
+	while ((c = getopt(argc, argv, "c:fip:rxVl:")) != EOF) {
+		switch (c) {
+		case 'c':
+			if (!cfile)
+				cfile = tmpfile();
+                        if (!cfile) {
+                                perror("tmpfile");
+                                exit(1);
+                        }
+			if (fprintf(cfile, "%s\n", optarg) < 0) {
+                                perror("fprintf(tmpfile)");
+                                dbprintf("%s: error writing temporary file\n",
+                                        progname);
+                                exit(1);
+                        }
+			break;
+		case 'f':
+			xfsargs.disfile = 1;
+			break;
+		case 'i':
+			xfsargs.isreadonly =
+				(LIBXFS_ISREADONLY | LIBXFS_ISINACTIVE);
+			flag_readonly = 1;
+			break;
+		case 'p':
+			progname = optarg;
+			break;
+		case 'r':
+			xfsargs.isreadonly = LIBXFS_ISREADONLY;
+			flag_readonly = 1;
+			break;
+		case 'l':
+			xfsargs.logname = optarg;
+			break;
+		case 'x':
+			flag_expert_mode = 1;
+			break;
+		case 'V':
+			printf("%s version %s\n", progname, VERSION);
+			break;
+		case '?':
+			usage();
+			/*NOTREACHED*/
+		}
+	}
+	if (optind + 1 != argc) {
+		usage();
+		/*NOTREACHED*/
+	}
+	fsdevice = argv[optind];
+	if (!xfsargs.disfile)
+		xfsargs.volname = fsdevice;
+	else
+		xfsargs.dname = fsdevice;
+	xfsargs.notvolok = 1;
+	if (!libxfs_init(&xfsargs)) {
+		fputs("\nfatal error -- couldn't initialize XFS library\n",
+			stderr);
+		exit(1);
+	}
+	mp = dbmount();
+	if (mp == NULL) {
+		dbprintf("%s: %s is not a valid filesystem\n",
+			progname, fsdevice);
+		exit(1);
+		/*NOTREACHED*/
+	}
+	blkbb = 1 << mp->m_blkbb_log;
+	push_cur();
+	init_commands();
+	init_sig();
+	if (cfile) {
+		if (fprintf(cfile, "q\n")<0) {
+                    perror("fprintf(tmpfile)");
+                    dbprintf("%s: error writing temporary file\n", progname);
+                    exit(1);
+                }
+                if (fflush(cfile)<0) {
+                    perror("fflush(tmpfile)");
+                    dbprintf("%s: error writing temporary file\n", progname);
+                    exit(1);
+                }
+		rewind(cfile);
+		pushfile(cfile);
+	}
+}
diff --git a/db/init.h b/db/init.h
new file mode 100644
index 000000000..5cbbda009
--- /dev/null
+++ b/db/init.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern char	*fsdevice;
+extern void	init(int argc, char **argv);
diff --git a/db/inobt.c b/db/inobt.c
new file mode 100644
index 000000000..4b43d9455
--- /dev/null
+++ b/db/inobt.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inobt.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int	inobt_key_count(void *obj, int startoff);
+static int	inobt_key_offset(void *obj, int startoff, int idx);
+static int	inobt_ptr_count(void *obj, int startoff);
+static int	inobt_ptr_offset(void *obj, int startoff, int idx);
+static int	inobt_rec_count(void *obj, int startoff);
+static int	inobt_rec_offset(void *obj, int startoff, int idx);
+
+const field_t	inobt_hfld[] = {
+	{ "", FLDT_INOBT, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_inobt_block_t, bb_ ## f))
+const field_t	inobt_flds[] = {
+	{ "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+	{ "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+	{ "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+	{ "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_INOBT },
+	{ "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_INOBT },
+	{ "recs", FLDT_INOBTREC, inobt_rec_offset, inobt_rec_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "keys", FLDT_INOBTKEY, inobt_key_offset, inobt_key_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ "ptrs", FLDT_INOBTPTR, inobt_ptr_offset, inobt_ptr_count,
+	  FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_INOBT },
+	{ NULL }
+};
+
+#define	KOFF(f)	bitize(offsetof(xfs_inobt_key_t, ir_ ## f))
+const field_t	inobt_key_flds[] = {
+	{ "startino", FLDT_AGINO, OI(KOFF(startino)), C1, 0, TYP_INODE },
+	{ NULL }
+};
+
+#define	ROFF(f)	bitize(offsetof(xfs_inobt_rec_t, ir_ ## f))
+const field_t	inobt_rec_flds[] = {
+	{ "startino", FLDT_AGINO, OI(ROFF(startino)), C1, 0, TYP_INODE },
+	{ "freecount", FLDT_INT32D, OI(ROFF(freecount)), C1, 0, TYP_NONE },
+	{ "free", FLDT_INOFREE, OI(ROFF(free)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+/*ARGSUSED*/
+static int
+inobt_key_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_inobt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+inobt_key_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_inobt_block_t	*block;
+	xfs_inobt_key_t		*kp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0));
+	return bitize((int)((char *)kp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+inobt_ptr_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_inobt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+inobt_ptr_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_inobt_block_t	*block;
+	xfs_inobt_ptr_t		*pp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0));
+	return bitize((int)((char *)pp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+inobt_rec_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_inobt_block_t	*block;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+		return 0;
+	return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+inobt_rec_offset(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_inobt_block_t	*block;
+	xfs_inobt_rec_t		*rp;
+
+	ASSERT(startoff == 0);
+	block = obj;
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+	rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx,
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1));
+	return bitize((int)((char *)rp - (char *)block));
+}
+
+/*ARGSUSED*/
+int
+inobt_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/inobt.h b/db/inobt.h
new file mode 100644
index 000000000..23aeb7e21
--- /dev/null
+++ b/db/inobt.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	inobt_flds[];
+extern const struct field	inobt_hfld[];
+extern const struct field	inobt_key_flds[];
+extern const struct field	inobt_rec_flds[];
+
+extern int	inobt_size(void *obj, int startoff, int idx);
diff --git a/db/inode.c b/db/inode.c
new file mode 100644
index 000000000..a12e32fe3
--- /dev/null
+++ b/db/inode.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "io.h"
+#include "print.h"
+#include "block.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int	inode_a_bmbt_count(void *obj, int startoff);
+static int	inode_a_bmx_count(void *obj, int startoff);
+static int	inode_a_count(void *obj, int startoff);
+static int	inode_a_offset(void *obj, int startoff, int idx);
+static int	inode_a_sfattr_count(void *obj, int startoff);
+static int	inode_core_nlinkv2_count(void *obj, int startoff);
+static int	inode_core_onlink_count(void *obj, int startoff);
+static int	inode_core_projid_count(void *obj, int startoff);
+static int	inode_core_nlinkv1_count(void *obj, int startoff);
+static int	inode_f(int argc, char **argv);
+static int	inode_u_bmbt_count(void *obj, int startoff);
+static int	inode_u_bmx_count(void *obj, int startoff);
+static int	inode_u_c_count(void *obj, int startoff);
+static int	inode_u_dev_count(void *obj, int startoff);
+static int	inode_u_muuid_count(void *obj, int startoff);
+static int	inode_u_sfdir_count(void *obj, int startoff);
+static int	inode_u_sfdir2_count(void *obj, int startoff);
+static int	inode_u_symlink_count(void *obj, int startoff);
+
+static const cmdinfo_t	inode_cmd =
+	{ "inode", NULL, inode_f, 0, 1, 1, "[inode#]",
+	  "set current inode", NULL };
+
+const field_t	inode_hfld[] = {
+	{ "", FLDT_INODE, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_dinode_t, di_ ## f))
+const field_t	inode_flds[] = {
+	{ "core", FLDT_DINODE_CORE, OI(OFF(core)), C1, 0, TYP_NONE },
+	{ "next_unlinked", FLDT_AGINO, OI(OFF(next_unlinked)), C1, 0,
+	  TYP_INODE },
+	{ "u", FLDT_DINODE_U, OI(OFF(u)), C1, 0, TYP_NONE },
+	{ "a", FLDT_DINODE_A, inode_a_offset, inode_a_count,
+	  FLD_COUNT|FLD_OFFSET, TYP_NONE },
+	{ NULL }
+};
+
+#define	COFF(f)	bitize(offsetof(xfs_dinode_core_t, di_ ## f))
+const field_t	inode_core_flds[] = {
+	{ "magic", FLDT_UINT16X, OI(COFF(magic)), C1, 0, TYP_NONE },
+	{ "mode", FLDT_UINT16O, OI(COFF(mode)), C1, 0, TYP_NONE },
+	{ "version", FLDT_INT8D, OI(COFF(version)), C1, 0, TYP_NONE },
+	{ "format", FLDT_DINODE_FMT, OI(COFF(format)), C1, 0, TYP_NONE },
+	{ "nlinkv1", FLDT_UINT16D, OI(COFF(onlink)), inode_core_nlinkv1_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "nlinkv2", FLDT_UINT32D, OI(COFF(nlink)), inode_core_nlinkv2_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "onlink", FLDT_UINT16D, OI(COFF(onlink)), inode_core_onlink_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "projid", FLDT_UINT16D, OI(COFF(projid)), inode_core_projid_count,
+	  FLD_COUNT, TYP_NONE },
+	{ "uid", FLDT_UINT32D, OI(COFF(uid)), C1, 0, TYP_NONE },
+	{ "gid", FLDT_UINT32D, OI(COFF(gid)), C1, 0, TYP_NONE },
+	{ "atime", FLDT_TIMESTAMP, OI(COFF(atime)), C1, 0, TYP_NONE },
+	{ "mtime", FLDT_TIMESTAMP, OI(COFF(mtime)), C1, 0, TYP_NONE },
+	{ "ctime", FLDT_TIMESTAMP, OI(COFF(ctime)), C1, 0, TYP_NONE },
+	{ "size", FLDT_FSIZE, OI(COFF(size)), C1, 0, TYP_NONE },
+	{ "nblocks", FLDT_DRFSBNO, OI(COFF(nblocks)), C1, 0, TYP_NONE },
+	{ "extsize", FLDT_EXTLEN, OI(COFF(extsize)), C1, 0, TYP_NONE },
+	{ "nextents", FLDT_EXTNUM, OI(COFF(nextents)), C1, 0, TYP_NONE },
+	{ "naextents", FLDT_AEXTNUM, OI(COFF(anextents)), C1, 0, TYP_NONE },
+	{ "forkoff", FLDT_UINT8D, OI(COFF(forkoff)), C1, 0, TYP_NONE },
+	{ "aformat", FLDT_DINODE_FMT, OI(COFF(aformat)), C1, 0, TYP_NONE },
+	{ "dmevmask", FLDT_UINT32X, OI(COFF(dmevmask)), C1, 0, TYP_NONE },
+	{ "dmstate", FLDT_UINT16D, OI(COFF(dmstate)), C1, 0, TYP_NONE },
+	{ "flags", FLDT_UINT16X, OI(COFF(flags)), C1, FLD_SKIPALL, TYP_NONE },
+	{ "newrtbm", FLDT_UINT1,
+	  OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_NEWRTBM_BIT - 1), C1,
+	  0, TYP_NONE },
+	{ "prealloc", FLDT_UINT1,
+	  OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_PREALLOC_BIT - 1), C1,
+	  0, TYP_NONE },
+	{ "realtime", FLDT_UINT1,
+	  OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_REALTIME_BIT - 1), C1,
+	  0, TYP_NONE },
+	{ "gen", FLDT_UINT32D, OI(COFF(gen)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	TOFF(f)	bitize(offsetof(xfs_timestamp_t, t_ ## f))
+const field_t	timestamp_flds[] = {
+	{ "sec", FLDT_TIME, OI(TOFF(sec)), C1, 0, TYP_NONE },
+	{ "nsec", FLDT_NSEC, OI(TOFF(nsec)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+const field_t	inode_u_flds[] = {
+	{ "bmbt", FLDT_BMROOTD, 0, inode_u_bmbt_count, FLD_COUNT, TYP_NONE },
+	{ "bmx", FLDT_BMAPBTDREC, 0, inode_u_bmx_count, FLD_ARRAY|FLD_COUNT,
+	  TYP_NONE },
+	{ "c", FLDT_CHARNS, 0, inode_u_c_count, FLD_COUNT, TYP_NONE },
+	{ "dev", FLDT_DEV, 0, inode_u_dev_count, FLD_COUNT, TYP_NONE },
+	{ "muuid", FLDT_UUID, 0, inode_u_muuid_count, FLD_COUNT, TYP_NONE },
+	{ "sfdir", FLDT_DIRSHORT, 0, inode_u_sfdir_count, FLD_COUNT, TYP_NONE },
+	{ "sfdir2", FLDT_DIR2SF, 0, inode_u_sfdir2_count, FLD_COUNT, TYP_NONE },
+	{ "symlink", FLDT_CHARNS, 0, inode_u_symlink_count, FLD_COUNT,
+	  TYP_NONE },
+	{ NULL }
+};
+
+const field_t	inode_a_flds[] = {
+	{ "bmbt", FLDT_BMROOTA, 0, inode_a_bmbt_count, FLD_COUNT, TYP_NONE },
+	{ "bmx", FLDT_BMAPBTAREC, 0, inode_a_bmx_count, FLD_ARRAY|FLD_COUNT,
+	  TYP_NONE },
+	{ "sfattr", FLDT_ATTRSHORT, 0, inode_a_sfattr_count, FLD_COUNT,
+	  TYP_NONE },
+	{ NULL }
+};
+
+static const char	*dinode_fmt_name[] =
+	{ "dev", "local", "extents", "btree", "uuid" };
+static const int	dinode_fmt_name_size =
+	sizeof(dinode_fmt_name) / sizeof(dinode_fmt_name[0]);
+
+/*ARGSUSED*/
+int
+fp_dinode_fmt(
+	void			*obj,
+	int			bit,
+	int			count,
+	char			*fmtstr,
+	int			size,
+	int			arg,
+	int			base,
+	int			array)
+{
+	int			bitpos;
+	xfs_dinode_fmt_t	f;
+	int			i;
+
+	for (i = 0, bitpos = bit; i < count; i++, bitpos += size) {
+		f = (xfs_dinode_fmt_t)getbitval(obj, bitpos, size, BVSIGNED);
+		if (array)
+			dbprintf("%d:", i + base);
+		if (f < 0 || f >= dinode_fmt_name_size)
+			dbprintf("%d", (int)f);
+		else
+			dbprintf("%d (%s)", (int)f, dinode_fmt_name[(int)f]);
+		if (i < count - 1)
+			dbprintf(" ");
+	}
+	return 1;
+}
+
+static int
+inode_a_bmbt_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT))
+		return 0;
+	ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_BTREE;
+}
+
+static int
+inode_a_bmx_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT))
+		return 0;
+	ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_EXTENTS ?
+		INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) : 0;
+}
+
+static int
+inode_a_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(startoff == 0);
+	dip = obj;
+	return XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT);
+}
+
+static int
+inode_a_offset(
+	void		*obj,
+	int		startoff,
+	int		idx)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(startoff == 0);
+	ASSERT(idx == 0);
+	dip = obj;
+	ASSERT(XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT));
+	return bitize((int)((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip));
+}
+
+static int
+inode_a_sfattr_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT))
+		return 0;
+	ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL;
+}
+
+int
+inode_a_size(
+	void			*obj,
+	int			startoff,
+	int			idx)
+{
+	xfs_attr_shortform_t	*asf;
+	xfs_dinode_t		*dip;
+
+	ASSERT(startoff == 0);
+	ASSERT(idx == 0);
+	dip = obj;
+	switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) {
+	case XFS_DINODE_FMT_LOCAL:
+		asf = (xfs_attr_shortform_t *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+		return bitize((int)asf->hdr.totsize);
+	case XFS_DINODE_FMT_EXTENTS:
+		return (int)(INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) * bitsz(xfs_bmbt_rec_t));
+	case XFS_DINODE_FMT_BTREE:
+		return bitize((int)XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT));
+	default:
+		return 0;
+	}
+}
+
+static int
+inode_core_nlinkv1_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dinode_core_t	*dic;
+
+	ASSERT(startoff == 0);
+	ASSERT(obj == iocur_top->data);
+	dic = obj;
+	return dic->di_version == XFS_DINODE_VERSION_1;
+}
+
+static int
+inode_core_nlinkv2_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dinode_core_t	*dic;
+
+	ASSERT(startoff == 0);
+	ASSERT(obj == iocur_top->data);
+	dic = obj;
+	return dic->di_version == XFS_DINODE_VERSION_2;
+}
+
+static int
+inode_core_onlink_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dinode_core_t	*dic;
+
+	ASSERT(startoff == 0);
+	ASSERT(obj == iocur_top->data);
+	dic = obj;
+	return dic->di_version == XFS_DINODE_VERSION_2;
+}
+
+static int
+inode_core_projid_count(
+	void			*obj,
+	int			startoff)
+{
+	xfs_dinode_core_t	*dic;
+
+	ASSERT(startoff == 0);
+	ASSERT(obj == iocur_top->data);
+	dic = obj;
+	return dic->di_version == XFS_DINODE_VERSION_2;
+}
+
+static int
+inode_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_ino_t	ino;
+	char		*p;
+
+	if (argc > 1) {
+		ino = strtoull(argv[1], &p, 0);
+		if (*p != '\0') {
+			dbprintf("bad value for inode number %s\n", argv[1]);
+			return 0;
+		}
+		set_cur_inode(ino);
+	} else if (iocur_top->ino == NULLFSINO)
+		dbprintf("no current inode\n");
+	else
+		dbprintf("current inode number is %lld\n", iocur_top->ino);
+	return 0;
+}
+
+void
+inode_init(void)
+{
+	add_command(&inode_cmd);
+}
+
+typnm_t
+inode_next_type(void)
+{
+	switch (iocur_top->mode & IFMT) {
+	case IFDIR:
+		return XFS_DIR_IS_V2(mp) ? TYP_DIR2 : TYP_DIR;
+	case IFLNK:
+		return TYP_SYMLINK;
+	case IFREG:
+		if (iocur_top->ino == mp->m_sb.sb_rbmino)
+			return TYP_RTBITMAP;
+		else if (iocur_top->ino == mp->m_sb.sb_rsumino)
+			return TYP_RTSUMMARY;
+		else if (iocur_top->ino == mp->m_sb.sb_uquotino ||
+			 iocur_top->ino == mp->m_sb.sb_pquotino)
+			return TYP_DQBLK;
+		else
+			return TYP_DATA;
+	default:
+		return TYP_NONE;
+	}
+}
+
+int
+inode_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_inodesize);
+}
+
+static int
+inode_u_bmbt_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_BTREE;
+}
+
+static int
+inode_u_bmx_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_EXTENTS ?
+		INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) : 0;
+}
+
+static int
+inode_u_c_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL &&
+	       (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFREG ?
+		(int)INT_GET(dip->di_core.di_size, ARCH_CONVERT) : 0;
+}
+
+static int
+inode_u_dev_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_DEV;
+}
+
+static int
+inode_u_muuid_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_UUID;
+}
+
+static int
+inode_u_sfdir_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL &&
+	       (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFDIR
+	       && XFS_DIR_IS_V1(mp);
+}
+
+static int
+inode_u_sfdir2_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL &&
+	       (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFDIR &&
+	       XFS_DIR_IS_V2(mp);
+}
+
+int
+inode_u_size(
+	void		*obj,
+	int		startoff,
+	int		idx)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(startoff == 0);
+	ASSERT(idx == 0);
+	dip = obj;
+	switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) {
+	case XFS_DINODE_FMT_DEV:
+		return bitsz(xfs_dev_t);
+	case XFS_DINODE_FMT_LOCAL:
+		return bitize((int)INT_GET(dip->di_core.di_size, ARCH_CONVERT));
+	case XFS_DINODE_FMT_EXTENTS:
+		return (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) * bitsz(xfs_bmbt_rec_t));
+	case XFS_DINODE_FMT_BTREE:
+		return bitize((int)XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT));
+	case XFS_DINODE_FMT_UUID:
+		return bitsz(uuid_t);
+	default:
+		return 0;
+	}
+}
+
+static int
+inode_u_symlink_count(
+	void		*obj,
+	int		startoff)
+{
+	xfs_dinode_t	*dip;
+
+	ASSERT(bitoffs(startoff) == 0);
+	ASSERT(obj == iocur_top->data);
+	dip = obj;
+	ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+	return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL &&
+	       (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFLNK ?
+		(int)INT_GET(dip->di_core.di_size, ARCH_CONVERT) : 0;
+}
+
+void
+set_cur_inode(
+	xfs_ino_t	ino)
+{
+	xfs_agblock_t	agbno;
+	xfs_agino_t	agino;
+	xfs_agnumber_t	agno;
+	xfs_dinode_t	*dip;
+	int		offset;
+
+	agno = XFS_INO_TO_AGNO(mp, ino);
+	agino = XFS_INO_TO_AGINO(mp, ino);
+	agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+	offset = XFS_AGINO_TO_OFFSET(mp, agino);
+	if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
+	    offset >= mp->m_sb.sb_inopblock ||
+	    XFS_AGINO_TO_INO(mp, agno, agino) != ino) {
+		dbprintf("bad inode number %lld\n", ino);
+		return;
+	}
+	cur_agno = agno;
+	/*
+	 * First set_cur to the block with the inode
+	 * then use off_cur to get the right part of the buffer.
+	 */
+	ASSERT(typtab[TYP_INODE].typnm == TYP_INODE);
+
+	/* ingore ring update here, do it explicitly below */
+	set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, agbno),
+		blkbb, DB_RING_IGN, NULL);
+	off_cur(offset << mp->m_sb.sb_inodelog, mp->m_sb.sb_inodesize);
+	dip = iocur_top->data;
+	iocur_top->ino = ino;
+	iocur_top->mode = INT_GET(dip->di_core.di_mode, ARCH_CONVERT);
+	if ((iocur_top->mode & IFMT) == IFDIR)
+		iocur_top->dirino = ino;
+
+	/* track updated info in ring */
+	ring_add();
+}
diff --git a/db/inode.h b/db/inode.h
new file mode 100644
index 000000000..3d0a22f28
--- /dev/null
+++ b/db/inode.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const struct field	inode_a_flds[];
+extern const struct field	inode_core_flds[];
+extern const struct field	inode_flds[];
+extern const struct field	inode_hfld[];
+extern const struct field	inode_u_flds[];
+extern const struct field	timestamp_flds[];
+
+extern int	fp_dinode_fmt(void *obj, int bit, int count, char *fmtstr,
+			      int size, int arg, int base, int array);
+extern int	inode_a_size(void *obj, int startoff, int idx);
+extern void	inode_init(void);
+extern typnm_t	inode_next_type(void);
+extern int	inode_size(void *obj, int startoff, int idx);
+extern int	inode_u_size(void *obj, int startoff, int idx);
+extern void	set_cur_inode(xfs_ino_t ino);
diff --git a/db/input.c b/db/input.c
new file mode 100644
index 000000000..1394f101b
--- /dev/null
+++ b/db/input.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <signal.h>
+#include <string.h>
+#include "command.h"
+#include "data.h"
+#include "input.h"
+#include "output.h"
+#include "sig.h"
+#include "malloc.h"
+#include "init.h"
+
+int	inputstacksize;
+FILE	**inputstack;
+FILE	*curinput;
+
+static void	popfile(void);
+static int	source_f(int argc, char **argv);
+
+static const cmdinfo_t	source_cmd =
+	{ "source", NULL, source_f, 1, 1, 0, "source-file",
+	  "get commands from source-file", NULL };
+
+/* our homegrown strtok that understands strings */
+
+static char *
+tokenize(
+	char        *inp)
+{
+	static char *last_place = NULL;
+	char        *start;
+	char        *walk;
+	int         in_string = 0;
+	int         in_escape = 0;
+
+	if (inp) {
+		start = inp;
+	} else {
+		if (last_place == NULL)
+			return NULL;
+
+		/* we're done */
+		if (*last_place != '\0')
+			return NULL;
+
+		start = last_place + 1;
+	}
+	last_place = NULL;
+
+	/* eat whitespace */
+	while (*start == ' ' || *start == '\t')
+		start++;
+
+	walk = start;
+	for (;*walk != '\0'; walk++) {
+		if (in_escape) {
+			in_escape = 0;
+			continue;
+		}
+		if (*walk == '\\')
+			in_escape = 1;
+		else if (*walk == '\"')
+			in_string ^= 1;
+
+		if (!in_string && !in_escape &&
+		    (*walk == ' ' || *walk == '\t')) {
+			last_place = walk;
+			*last_place = '\0';
+			break;
+		}
+	}
+	if (walk == start)
+		return NULL;
+
+	return start;
+}
+
+char **
+breakline(
+	char	*input,
+	int	*count)
+{
+	int	c;
+	char	*inp;
+	char	*p;
+	char	**rval;
+
+	c = 0;
+	inp = input;
+	rval = xcalloc(sizeof(char *), 1);
+	for (;;) {
+
+		p = tokenize(inp);
+
+		if (p == NULL)
+			break;
+		inp = NULL;
+		c++;
+		rval = xrealloc(rval, sizeof(*rval) * (c + 1));
+		rval[c - 1] = p;
+		rval[c] = NULL;
+	}
+	*count = c;
+	return rval;
+}
+
+void
+doneline(
+	char	*input,
+	char	**vec)
+{
+	xfree(input);
+	xfree(vec);
+}
+
+char *
+fetchline(void)
+{
+	char	buf[1024];
+	int	iscont;
+	size_t	len;
+	size_t	rlen;
+	char	*rval;
+
+	rval = NULL;
+	for (rlen = iscont = 0; ; ) {
+		if (inputstacksize == 1) {
+			if (iscont)
+				dbprintf("... ");
+			else
+				dbprintf("%s: ", progname);
+			fflush(stdin);
+		}
+		if (seenint() ||
+		    (!fgets(buf, sizeof(buf), curinput) &&
+		     ferror(curinput) && seenint())) {
+			clearint();
+			dbprintf("^C\n");
+			clearerr(curinput);
+			if (iscont) {
+				iscont = 0;
+				rlen = 0;
+				if (rval) {
+					xfree(rval);
+					rval = NULL;
+				}
+			}
+			continue;
+		}
+		if (ferror(curinput) || feof(curinput) ||
+		    (len = strlen(buf)) == 0) {
+			popfile();
+			if (curinput == NULL) {
+				dbprintf("\n");
+				return NULL;
+			}
+			iscont = 0;
+			rlen = 0;
+			if (rval) {
+				xfree(rval);
+				rval = NULL;
+			}
+			continue;
+		}
+		if (inputstacksize == 1)
+			logprintf("%s", buf);
+		rval = xrealloc(rval, rlen + len + 1);
+		if (rlen == 0)
+			rval[0] = '\0';
+		rlen += len;
+		strcat(rval, buf);
+		if (buf[len - 1] == '\n') {
+			if (len > 1 && buf[len - 2] == '\\') {
+				rval[rlen - 2] = ' ';
+				rval[rlen - 1] = '\0';
+				rlen--;
+				iscont = 1;
+			} else {
+				rval[rlen - 1] = '\0';
+				rlen--;
+				break;
+			}
+		}
+	}
+	return rval;
+}
+
+void
+input_init(void)
+{
+	add_command(&source_cmd);
+}
+
+static void
+popfile(void)
+{
+	if (inputstacksize == 0) {
+		curinput = NULL;
+		return;
+	}
+	if (curinput != stdin)
+		fclose(curinput);
+        
+	inputstacksize--;
+        if (inputstacksize) {
+	    inputstack =
+		    xrealloc(inputstack, inputstacksize * sizeof(*inputstack));
+            curinput = inputstack[inputstacksize - 1];
+        } else {
+            free(inputstack);
+            curinput = NULL;
+            inputstack = NULL;
+        }
+}
+
+void
+pushfile(
+	FILE	*file)
+{
+	inputstack =
+		xrealloc(inputstack,
+			(inputstacksize + 1) * sizeof(*inputstack));
+	inputstacksize++;
+	curinput = inputstack[inputstacksize - 1] = file;
+}
+
+/* ARGSUSED */
+static int
+source_f(
+	int	argc,
+	char	**argv)
+{
+	FILE	*f;
+
+	f = fopen(argv[1], "r");
+	if (f == NULL)
+		dbprintf("can't open %s\n", argv[0]);
+	else
+		pushfile(f);
+	return 0;
+}
diff --git a/db/input.h b/db/input.h
new file mode 100644
index 000000000..406997c79
--- /dev/null
+++ b/db/input.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern char	**breakline(char *input, int *count);
+extern void	doneline(char *input, char **vec);
+extern char	*fetchline(void);
+extern void	input_init(void);
+extern void	pushfile(FILE *file);
diff --git a/db/io.c b/db/io.c
new file mode 100644
index 000000000..849103d4b
--- /dev/null
+++ b/db/io.c
@@ -0,0 +1,627 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <errno.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "io.h"
+#include "output.h"
+#include "mount.h"
+#include "malloc.h"
+
+static int	pop_f(int argc, char **argv);
+static void     pop_help(void);
+static int	push_f(int argc, char **argv);
+static void     push_help(void);
+static int	stack_f(int argc, char **argv);
+static void     stack_help(void);
+static int      forward_f(int argc, char **argv);
+static void     forward_help(void);
+static int      back_f(int argc, char **argv);
+static void     back_help(void);
+static int      ring_f(int argc, char **argv);
+static void     ring_help(void);
+
+static const cmdinfo_t	pop_cmd =
+	{ "pop", NULL, pop_f, 0, 0, 0, NULL,
+	  "pop location from the stack", pop_help };
+static const cmdinfo_t	push_cmd =
+	{ "push", NULL, push_f, 0, 2, 0, "[command]",
+	  "push location to the stack", push_help };
+static const cmdinfo_t	stack_cmd =
+	{ "stack", NULL, stack_f, 0, 0, 0, NULL,
+	  "view the location stack", stack_help };
+static const cmdinfo_t  forward_cmd = 
+	{ "forward", "f", forward_f, 0, 0, 0, NULL,
+	  "move forward to next entry in the position ring", forward_help };
+static const cmdinfo_t  back_cmd = 
+	{ "back", "b", back_f, 0, 0, 0, NULL,
+	  "move to the previous location in the position ring", back_help };
+static const cmdinfo_t  ring_cmd = 
+	{ "ring", NULL, ring_f, 0, 1, 0, NULL,
+	  "show position ring or move to a specific entry", ring_help };
+
+iocur_t	*iocur_base;
+iocur_t	*iocur_top;
+int	iocur_sp = -1;
+int	iocur_len;
+
+#define RING_ENTRIES 20
+static iocur_t iocur_ring[RING_ENTRIES];
+static int     ring_head = -1;
+static int     ring_tail = -1;
+static int     ring_current = -1;
+
+void
+io_init(void)
+{
+	add_command(&pop_cmd);
+	add_command(&push_cmd);
+	add_command(&stack_cmd);
+	add_command(&forward_cmd);
+	add_command(&back_cmd);
+	add_command(&ring_cmd);
+}
+
+void
+off_cur(
+	int	off,
+	int	len)
+{
+	if (iocur_top == NULL || off + len > BBTOB(iocur_top->blen))
+		dbprintf("can't set block offset to %d\n", off);
+	else {
+		iocur_top->boff = off;
+		iocur_top->off = ((xfs_off_t)iocur_top->bb << BBSHIFT) + off;
+		iocur_top->len = len;
+		iocur_top->data = (void *)((char *)iocur_top->buf + off);
+	}
+}
+
+void
+pop_cur(void)
+{
+	if (iocur_sp < 0) {
+		dbprintf("can't pop anything from I/O stack\n");
+		return;
+	}
+	if (iocur_top->buf)
+		xfree(iocur_top->buf);
+	if (--iocur_sp >= 0) {
+		iocur_top = iocur_base + iocur_sp;
+		cur_typ = iocur_top->typ;
+	} else {
+		iocur_top = iocur_base;
+		iocur_sp = 0;
+	}
+}
+
+/*ARGSUSED*/
+static int
+pop_f(
+	int	argc,
+	char	**argv)
+{
+	pop_cur();
+	return 0;
+}
+
+static void
+pop_help(void)
+{
+	dbprintf(
+"\n"
+" Changes the address and data type to the first entry on the stack.\n"
+"\n"
+		);
+}
+
+void
+print_iocur(
+	char	*tag,
+	iocur_t	*ioc)
+{
+	int	i;
+
+	dbprintf("%s\n", tag);
+	dbprintf("\tbyte offset %lld, length %d\n", ioc->off, ioc->len);
+	dbprintf("\tbuffer block %lld (fsbno %lld), %d bb%s\n", ioc->bb,
+		(xfs_dfsbno_t)XFS_DADDR_TO_FSB(mp, ioc->bb), ioc->blen,
+		ioc->blen == 1 ? "" : "s");
+	if (ioc->use_bbmap) {
+		dbprintf("\tblock map");
+		for (i = 0; i < ioc->blen; i++)
+			dbprintf(" %d:%lld", i, ioc->bbmap.b[i]);
+		dbprintf("\n");
+	}
+	dbprintf("\tinode %lld, dir inode %lld, type %s\n", ioc->ino,
+		ioc->dirino, ioc->typ == NULL ? "none" : ioc->typ->name);
+}
+
+void
+print_ring(void)
+{
+	int i;
+	iocur_t *ioc;
+
+	if (ring_current == -1) {
+		dbprintf("no entries in location ring.\n");
+		return;
+	}
+
+	dbprintf("      type    bblock  bblen    fsbno     inode\n");
+
+	i = ring_head;
+	for (;;) {
+		ioc = &iocur_ring[i];
+		if (i == ring_current)
+			printf("*%2d: ", i);
+		else
+			printf(" %2d: ", i);
+
+		dbprintf("%-7.7s %8lld %5d %8lld %9lld\n",
+			 ioc->typ == NULL ? "none" : ioc->typ->name,
+			 ioc->bb,
+			 ioc->blen,
+			 (xfs_dfsbno_t)XFS_DADDR_TO_FSB(mp, ioc->bb),
+			 ioc->ino
+			);
+
+		if (i == ring_tail)
+			break;
+
+		i = (i+(RING_ENTRIES-1))%RING_ENTRIES;
+	}
+}
+
+
+void
+push_cur(void)
+{
+	if (iocur_sp + 1 >= iocur_len) {
+		iocur_base = xrealloc(iocur_base,
+			sizeof(*iocur_base) * (iocur_len + 1));
+		iocur_len++;
+	}
+	iocur_sp++;
+	iocur_top = iocur_base + iocur_sp;
+	memset(iocur_top, 0, sizeof(*iocur_base));
+	iocur_top->ino = iocur_sp > 0 ? iocur_top[-1].ino : NULLFSINO;
+	iocur_top->dirino = iocur_sp > 0 ? iocur_top[-1].dirino : NULLFSINO;
+	iocur_top->mode = iocur_sp > 0 ? iocur_top[-1].mode : 0;
+	cur_typ = NULL;
+}
+
+static int
+push_f(
+	int		argc,
+	char		**argv)
+{
+	const cmdinfo_t	*ct;
+
+	if (argc > 1) {
+                /* check we can execute command */
+	        ct = find_command(argv[1]);
+	        if (ct == NULL) {
+		        dbprintf("no such command %s\n", argv[1]);
+		        return 0;
+	        }
+	        if (!ct->canpush) {
+		        dbprintf("no push form allowed for %s\n", argv[1]);
+		        return 0;
+	        }
+        }
+        
+        /* save current state */
+        push_cur();
+	if (iocur_top[-1].typ && iocur_top[-1].typ->typnm == TYP_INODE)
+		set_cur_inode(iocur_top[-1].ino);
+	else
+		set_cur(iocur_top[-1].typ, iocur_top[-1].bb,
+			iocur_top[-1].blen, DB_RING_IGN, 
+			iocur_top[-1].use_bbmap ? &iocur_top[-1].bbmap : NULL);
+
+        /* run requested command */        
+        if (argc>1)
+        	(void)command(argc-1, argv+1);
+	return 0;
+}
+
+static void
+push_help(void)
+{
+	dbprintf(
+"\n"
+" Allows you to push the current address and data type on the stack for\n"
+" later return.  'push' also accepts an additional command to execute after\n"
+" storing the current address (ex: 'push a rootino' from the superblock).\n"
+"\n"
+		);
+}
+
+/* move forward through the ring */
+/* ARGSUSED */
+static int
+forward_f(
+	int		argc,
+	char		**argv)
+{
+	if (ring_current == -1) {
+		dbprintf("ring is empty\n");
+		return 0;
+	}
+	if (ring_current == ring_head) {
+		dbprintf("no further entries\n");
+		return 0;
+	}
+
+	ring_current = (ring_current+1)%RING_ENTRIES;
+
+	set_cur(iocur_ring[ring_current].typ,
+		iocur_ring[ring_current].bb,
+		iocur_ring[ring_current].blen,
+		DB_RING_IGN,
+		iocur_ring[ring_current].use_bbmap ?
+			&iocur_ring[ring_current].bbmap : NULL);
+
+	return 0;
+}
+
+static void
+forward_help(void)
+{
+	dbprintf(
+"\n"
+" The 'forward' ('f') command moves to the next location in the position\n"
+" ring, updating the current position and data type.  If the current location\n"
+" is the top entry in the ring, then the 'forward' command will have\n"
+" no effect.\n"
+"\n"
+		);
+}
+
+/* move backwards through the ring */
+/* ARGSUSED */
+static int
+back_f(
+	int		argc,
+	char		**argv)
+{
+	if (ring_current == -1) {
+		dbprintf("ring is empty\n");
+		return 0;
+	}
+	if (ring_current == ring_tail) {
+		dbprintf("no previous entries\n");
+		return 0;
+	}
+
+	ring_current = (ring_current+(RING_ENTRIES-1))%RING_ENTRIES;
+
+	set_cur(iocur_ring[ring_current].typ,
+		iocur_ring[ring_current].bb,
+		iocur_ring[ring_current].blen,
+		DB_RING_IGN,
+		iocur_ring[ring_current].use_bbmap ?
+			&iocur_ring[ring_current].bbmap : NULL);
+
+	return 0;
+}
+
+static void
+back_help(void)
+{
+	dbprintf(
+"\n"
+" The 'back' ('b') command moves to the previous location in the position\n"
+" ring, updating the current position and data type.  If the current location\n"
+" is the last entry in the ring, then the 'back' command will have no effect.\n"
+"\n"
+		);
+}
+
+/* show or go to specific point in ring */
+static int
+ring_f(
+	int		argc,
+	char		**argv)
+{
+	int index;
+
+	if (argc == 1) {
+		print_ring();
+		return 0;
+	}
+
+	index = (int)strtoul(argv[0], NULL, 0);
+	if (index < 0 || index >= RING_ENTRIES)
+		dbprintf("invalid entry: %d\n", index);
+
+	ring_current = index;
+
+	set_cur(iocur_ring[index].typ,
+		iocur_ring[index].bb,
+		iocur_ring[index].blen,
+		DB_RING_IGN,
+		iocur_ring[index].use_bbmap ? &iocur_ring[index].bbmap : NULL);
+
+	return 0;
+}
+
+static void
+ring_help(void)
+{
+	dbprintf(
+"\n"
+" The position ring automatically keeps track of each disk location and\n"
+" structure type for each change of position you make during your xfs_db\n"
+" session.  The last %d most recent entries are kept in the ring.\n"
+"\n"
+" To display the current list of ring entries type 'ring' by itself on\n"
+" the command line.  The entry highlighted by an asterisk ('*') is the\n"
+" current entry.\n"
+"\n"
+" To move to another entry in the ring type 'ring <num>' where <num> is\n"
+" your desired entry from the ring position list.\n"
+"\n"
+" You may also use the 'forward' ('f') or 'back' ('b') commands to move\n"
+" to the previous or next entry in the ring, respectively.\n"
+"\n"
+" Note: Unlike the 'stack', 'push' and 'pop' commands, the ring tracks your\n"
+" location implicitly.  Use the 'push' and 'pop' commands if you wish to\n"
+" store a specific location explicitly for later return.\n"
+"\n",
+		RING_ENTRIES);
+}
+
+
+void
+ring_add(void)
+{
+	if (ring_head == -1) {
+		/* only get here right after startup */
+		ring_head = 0;
+		ring_tail = 0;
+		ring_current = 0;
+		iocur_ring[0] = *iocur_top;
+	} else {
+		if (ring_current == ring_head) {
+			ring_head = (ring_head+1)%RING_ENTRIES;
+			iocur_ring[ring_head] = *iocur_top;
+			if (ring_head == ring_tail)
+				ring_tail = (ring_tail+1)%RING_ENTRIES;
+			ring_current = ring_head;
+		} else {
+			ring_current = (ring_current+1)%RING_ENTRIES;
+			iocur_ring[ring_current] = *iocur_top;
+		}
+	}
+}
+
+
+int
+write_bbs(
+	__int64_t       bbno,
+	int             count,
+	void            *bufp,
+	bbmap_t		*bbmap)
+{
+	int		c;
+	int		i;
+	int		j;
+	int		rval = EINVAL;	/* initialize for zero `count' case */
+
+	for (j = 0; j < count; j += bbmap ? 1 : count) {
+		if (bbmap)
+			bbno = bbmap->b[j];
+		if (lseek64(xfsargs.dfd, bbno << BBSHIFT, SEEK_SET) < 0) {
+			rval = errno;
+			dbprintf("can't seek in filesystem at bb %lld\n", bbno);
+			return rval;
+		}
+		c = BBTOB(bbmap ? 1 : count);
+		i = (int)write(xfsargs.dfd, (char *)bufp + BBTOB(j), c);
+		if (i < 0) {
+			rval = errno;
+		} else if (i < c) {
+			rval = -1;
+		} else	
+			rval = 0;
+		if (rval)
+			break;
+	}
+	return rval;
+}
+
+int
+read_bbs(
+	__int64_t	bbno,
+	int		count,
+	void		**bufp,
+	bbmap_t		*bbmap)
+{
+	void		*buf;
+	int		c;
+	int		i;
+	int		j;
+	int		rval = EINVAL;
+        
+        if (!count)
+            return EINVAL;
+
+	c = BBTOB(count);
+	if (*bufp == NULL)
+		buf = xmalloc(c);
+	else
+		buf = *bufp;
+	for (j = 0; j < count; j += bbmap ? 1 : count) {
+		if (bbmap)
+			bbno = bbmap->b[j];
+		if (lseek64(xfsargs.dfd, bbno << BBSHIFT, SEEK_SET) < 0) {
+			rval = errno;
+			dbprintf("can't seek in filesystem at bb %lld\n", bbno);
+			if (*bufp == NULL)
+				xfree(buf);
+			buf = NULL;
+		} else {
+			c = BBTOB(bbmap ? 1 : count);
+			i = (int)read(xfsargs.dfd, (char *)buf + BBTOB(j), c);
+			if (i < 0) {
+				rval = errno;
+				if (*bufp == NULL)
+					xfree(buf);
+				buf = NULL;
+			} else if (i < c) {
+				rval = -1;
+				if (*bufp == NULL)
+					xfree(buf);
+				buf = NULL;
+			} else	
+				rval = 0;
+		}
+		if (buf == NULL)
+			break;
+	}
+	if (*bufp == NULL)
+		*bufp = buf;
+	return rval;
+}
+
+void
+write_cur(void)
+{
+	int ret;
+
+	if (iocur_sp < 0) {
+		dbprintf("nothing to write\n");
+		return;
+	}
+	ret = write_bbs(iocur_top->bb, iocur_top->blen, iocur_top->buf,
+		iocur_top->use_bbmap ? &iocur_top->bbmap : NULL);
+	if (ret == -1)
+		dbprintf("incomplete write, block: %lld\n", 
+			 (iocur_base + iocur_sp)->bb);
+	else if (ret != 0)
+		dbprintf("write error: %s\n", strerror(ret));
+	/* re-read buffer from disk */
+	ret = read_bbs(iocur_top->bb, iocur_top->blen, &iocur_top->buf,
+		iocur_top->use_bbmap ? &iocur_top->bbmap : NULL);
+	if (ret == -1)
+		dbprintf("incomplete read, block: %lld\n",
+			 (iocur_base + iocur_sp)->bb);
+	else if (ret != 0)
+		dbprintf("read error: %s\n", strerror(ret));
+}
+
+void
+set_cur(
+	const typ_t	*t,
+	__int64_t	d, 
+	int		c,
+	int             ring_flag,
+	bbmap_t		*bbmap)
+{
+	xfs_ino_t	dirino;
+	xfs_ino_t	ino;
+	__uint16_t	mode;
+
+	if (iocur_sp < 0) {
+		dbprintf("set_cur no stack element to set\n");
+		return;
+	}
+
+#ifdef DEBUG
+	if (bbmap)
+		printf("xfs_db got a bbmap for %lld\n", d);
+#endif
+	ino = iocur_top->ino;
+	dirino = iocur_top->dirino;
+	mode = iocur_top->mode;
+	pop_cur();
+	push_cur();
+	if (read_bbs(d, c, &iocur_top->buf, bbmap))
+		return;
+	iocur_top->bb = d;
+	iocur_top->blen = c;
+	iocur_top->boff = 0;
+	iocur_top->data = iocur_top->buf;
+	iocur_top->len = BBTOB(c);
+	iocur_top->off = d << BBSHIFT;
+	iocur_top->typ = cur_typ = t;
+	iocur_top->ino = ino;
+	iocur_top->dirino = dirino;
+	iocur_top->mode = mode;
+	if (iocur_top->use_bbmap = (bbmap != NULL))
+		iocur_top->bbmap = *bbmap;
+
+	/* store location in ring */
+	if (ring_flag)
+		ring_add();
+}
+
+static void
+stack_help(void)
+{
+	dbprintf(
+"\n"
+" The stack is used to explicitly store your location and data type\n"
+" for later return.  The 'push' operation stores the current address\n"
+" and type on the stack, the 'pop' operation returns you to the\n"
+" position and datatype of the top entry on the stack.\n"
+"\n"
+" The 'stack' allows explicit location saves, see 'ring' for implicit\n"
+" position tracking.\n"
+"\n"
+		);
+}
+
+/*ARGSUSED*/
+static int
+stack_f(
+	int	argc,
+	char	**argv)
+{
+	int	i;
+	char	tagbuf[8];
+
+	for (i = iocur_sp; i >= 0; i--) {
+		sprintf(tagbuf, "%d: ", i);
+		print_iocur(tagbuf, &iocur_base[i]);
+	}
+	return 0;
+}
diff --git a/db/io.h b/db/io.h
new file mode 100644
index 000000000..85ee2994b
--- /dev/null
+++ b/db/io.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct typ;
+
+#define	BBMAP_SIZE		(XFS_MAX_BLOCKSIZE / BBSIZE)
+typedef struct bbmap {
+	__int64_t		b[BBMAP_SIZE];
+} bbmap_t;
+
+typedef struct iocur {
+	__int64_t		bb;	/* BB number in filesystem of buf */
+	int			blen;	/* length of "buf", bb's */
+	int			boff;	/* data - buf */
+	void			*buf;	/* base address of buffer */
+	void			*data;	/* current interesting data */
+	xfs_ino_t		dirino;	/* current directory inode number */
+	xfs_ino_t		ino;	/* current inode number */
+	int			len;	/* length of "data", bytes */
+	__uint16_t		mode;	/* current inode's mode */
+	xfs_off_t		off;	/* fs offset of "data" in bytes */
+	const struct typ	*typ;	/* type of "data" */
+	int			use_bbmap; /* set if bbmap is valid */
+	bbmap_t			bbmap;	/* map daddr if fragmented */
+} iocur_t;
+
+#define DB_RING_ADD 1                   /* add to ring on set_cur */
+#define DB_RING_IGN 0                   /* do not add to ring on set_cur */
+
+extern iocur_t	*iocur_base;		/* base of stack */
+extern iocur_t	*iocur_top;		/* top element of stack */
+extern int	iocur_sp;		/* current top of stack */
+extern int	iocur_len;		/* length of stack array */
+
+extern void	io_init(void);
+extern void	off_cur(int off, int len);
+extern void	pop_cur(void);
+extern void	print_iocur(char *tag, iocur_t *ioc);
+extern void	push_cur(void);
+extern int	read_bbs(__int64_t daddr, int count, void **bufp,
+			 bbmap_t *bbmap);
+extern int	write_bbs(__int64_t daddr, int count, void *bufp,
+			  bbmap_t *bbmap);
+extern void     write_cur(void);
+extern void	set_cur(const struct typ *t, __int64_t d, int c, int ring_add,
+			bbmap_t *bbmap);
+extern void     ring_add(void);
diff --git a/db/main.c b/db/main.c
new file mode 100644
index 000000000..e00046153
--- /dev/null
+++ b/db/main.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "init.h"
+#include "input.h"
+
+int
+main(
+	int	argc,
+	char	**argv)
+{
+	int	c;
+	int	done;
+	char	*input;
+	char	**v;
+
+	pushfile(stdin);
+	init(argc, argv);
+	done = 0;
+	while (!done) {
+		if ((input = fetchline()) == NULL)
+			break;
+		v = breakline(input, &c);
+		if (c)
+			done = command(c, v);
+		doneline(input, v);
+	}
+	return exitcode;
+}
diff --git a/db/malloc.c b/db/malloc.c
new file mode 100644
index 000000000..413b87f57
--- /dev/null
+++ b/db/malloc.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "init.h"
+#include "malloc.h"
+#include "output.h"
+
+static void
+badmalloc(void)
+{
+	dbprintf("%s: out of memory\n", progname);
+	exit(4);
+}
+
+void *
+xcalloc(
+	size_t	nelem,
+	size_t	elsize)
+{
+	void	*ptr;
+
+	ptr = calloc(nelem, elsize);
+	if (ptr)
+		return ptr;
+	badmalloc();
+	/* NOTREACHED */
+	return NULL;
+}
+
+void
+xfree(
+	void	*ptr)
+{
+	free(ptr);
+}
+
+void *
+xmalloc(
+	size_t	size)
+{
+	void	*ptr;
+
+	ptr = malloc(size);
+	if (ptr)
+		return ptr;
+	badmalloc();
+	/* NOTREACHED */
+	return NULL;
+}
+
+void *
+xrealloc(
+	void	*ptr,
+	size_t	size)
+{
+	ptr = realloc(ptr, size);
+	if (ptr || !size)
+		return ptr;
+	badmalloc();
+	/* NOTREACHED */
+	return NULL;
+}
+
+char *
+xstrdup(
+	const char	*s1)
+{
+	char		*s;
+
+	s = strdup(s1);
+	if (s)
+		return s;
+	badmalloc();
+	/* NOTREACHED */
+	return NULL;
+}
diff --git a/db/malloc.h b/db/malloc.h
new file mode 100644
index 000000000..1680a44b0
--- /dev/null
+++ b/db/malloc.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	*xcalloc(size_t nelem, size_t elsize);
+extern void	xfree(void *ptr);
+extern void	*xmalloc(size_t size);
+extern void	*xrealloc(void *ptr, size_t size);
+extern char	*xstrdup(const char *s1);
diff --git a/db/mount.c b/db/mount.c
new file mode 100644
index 000000000..184972720
--- /dev/null
+++ b/db/mount.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "init.h"
+#include "io.h"
+#include "mount.h"
+#include "malloc.h"
+#include "data.h"
+
+xfs_mount_t	*mp;
+
+static void
+compute_maxlevels(
+	xfs_mount_t	*mp,
+	int		whichfork)
+{
+	int		level;
+	uint		maxblocks;
+	uint		maxleafents;
+	int		maxrootrecs;
+	int		minleafrecs;
+	int		minnoderecs;
+	int		sz;
+
+	maxleafents = (whichfork == XFS_DATA_FORK) ? MAXEXTNUM : MAXAEXTNUM;
+	minleafrecs = mp->m_bmap_dmnr[0];
+	minnoderecs = mp->m_bmap_dmnr[1];
+	sz = mp->m_sb.sb_inodesize;
+	maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
+	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+	for (level = 1; maxblocks > 1; level++) {
+		if (maxblocks <= maxrootrecs)
+			maxblocks = 1;
+		else
+			maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+	}
+	mp->m_bm_maxlevels[whichfork] = level;
+}
+
+xfs_mount_t *
+dbmount(void)
+{
+	void		*bufp;
+	int		i;
+	xfs_mount_t	*mp;
+	xfs_sb_t	*sbp;
+
+	mp = xcalloc(1, sizeof(*mp));
+	bufp = NULL;
+	if (read_bbs(XFS_SB_DADDR, 1, &bufp, NULL))
+		return NULL;
+
+        /* copy sb from buf to in-core, converting architecture */
+        libxfs_xlate_sb(bufp, &mp->m_sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+	xfree(bufp);
+	sbp = &mp->m_sb;
+ 
+        if (sbp->sb_magicnum != XFS_SB_MAGIC) {
+            fprintf(stderr,"%s: unexpected XFS SB magic number 0x%08x\n",
+                    progname, sbp->sb_magicnum);
+        }
+	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
+	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
+	mp->m_agno_log = libxfs_highbit32(sbp->sb_agcount - 1) + 1;
+	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+	mp->m_litino =
+		(int)(sbp->sb_inodesize -
+		      (sizeof(xfs_dinode_core_t) + sizeof(xfs_agino_t)));
+	mp->m_blockmask = sbp->sb_blocksize - 1;
+	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
+	mp->m_blockwmask = mp->m_blockwsize - 1;
+	for (i = 0; i < 2; i++) {
+		mp->m_alloc_mxr[i] =
+			(uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+				xfs_alloc, i == 0);
+		mp->m_alloc_mnr[i] =
+			(uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+				xfs_alloc, i == 0);
+		mp->m_bmap_dmxr[i] =
+			(uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+				xfs_bmbt, i == 0);
+		mp->m_bmap_dmnr[i] =
+			(uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+				xfs_bmbt, i == 0);
+		mp->m_inobt_mxr[i] =
+			(uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+				xfs_inobt, i == 0);
+		mp->m_inobt_mnr[i] =
+			(uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+				xfs_inobt, i == 0);
+	}
+	compute_maxlevels(mp, XFS_DATA_FORK);
+	compute_maxlevels(mp, XFS_ATTR_FORK);
+	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
+	mp->m_ialloc_inos = (int)MAX(XFS_INODES_PER_CHUNK, sbp->sb_inopblock);
+	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
+	if (sbp->sb_rblocks) {
+		mp->m_rsumlevels = sbp->sb_rextslog + 1;
+		mp->m_rsumsize =
+			(uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
+			sbp->sb_rbmblocks;
+		if (sbp->sb_blocksize)
+			mp->m_rsumsize =
+				roundup(mp->m_rsumsize, sbp->sb_blocksize);
+	}
+	if (XFS_SB_VERSION_HASDIRV2(sbp)) {
+		mp->m_dirversion = 2;
+		mp->m_dirblksize =
+			1 << (sbp->sb_dirblklog + sbp->sb_blocklog);
+		mp->m_dirblkfsbs = 1 << sbp->sb_dirblklog;
+		mp->m_dirdatablk =
+			XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp));
+		mp->m_dirleafblk =
+			XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
+		mp->m_dirfreeblk =
+			XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp));
+	} else {
+		mp->m_dirversion = 1;
+		mp->m_dirblksize = sbp->sb_blocksize;
+		mp->m_dirblkfsbs = 1;
+	}
+	return mp;
+}
diff --git a/db/mount.h b/db/mount.h
new file mode 100644
index 000000000..72348adcf
--- /dev/null
+++ b/db/mount.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern xfs_mount_t	*dbmount(void);
+extern xfs_mount_t	*mp;
diff --git a/db/output.c b/db/output.c
new file mode 100644
index 000000000..b49b36521
--- /dev/null
+++ b/db/output.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <stdarg.h>
+#include "command.h"
+#include "output.h"
+#include "sig.h"
+#include "malloc.h"
+#include "init.h"
+
+static int	log_f(int argc, char **argv);
+
+static const cmdinfo_t	log_cmd =
+	{ "log", NULL, log_f, 0, 2, 0, "[stop|start <filename>]",
+	  "start or stop logging to a file", NULL };
+
+int		dbprefix;
+static FILE	*log_file;
+static char	*log_file_name;
+
+int
+dbprintf(const char *fmt, ...)
+{
+	va_list	ap;
+	int	i;
+
+	if (seenint())
+		return 0;
+	va_start(ap, fmt);
+	blockint();
+	i = 0;
+	if (dbprefix)
+		i += printf("%s: ", fsdevice);
+	i += vprintf(fmt, ap);
+	unblockint();
+	va_end(ap);
+	if (log_file) {
+		va_start(ap, fmt);
+		vfprintf(log_file, fmt, ap);
+		va_end(ap);
+	}
+	return i;
+}
+
+static int
+log_f(
+	int		argc,
+	char		**argv)
+{
+	if (argc == 1) {
+		if (log_file)
+			dbprintf("logging to %s\n", log_file_name);
+		else
+			dbprintf("no log file\n");
+	} else if (argc == 2 && strcmp(argv[1], "stop") == 0) {
+		if (log_file) {
+			xfree(log_file_name);
+			fclose(log_file);
+			log_file = NULL;
+		} else
+			dbprintf("no log file\n");
+	} else if (argc == 3 && strcmp(argv[1], "start") == 0) {
+		if (log_file)
+			dbprintf("already logging to %s\n", log_file_name);
+		else {
+			log_file = fopen(argv[2], "a");
+			if (log_file == NULL)
+				dbprintf("can't open %s for writing\n",
+					argv[2]);
+			else
+				log_file_name = xstrdup(argv[1]);
+		}
+	} else
+		dbprintf("bad log command, ignored\n");
+	return 0;
+}
+
+void
+logprintf(const char *fmt, ...)
+{
+	va_list	ap;
+
+	if (log_file) {
+		va_start(ap, fmt);
+		(void)vfprintf(log_file, fmt, ap);
+		va_end(ap);
+	}
+}
+
+void
+output_init(void)
+{
+	add_command(&log_cmd);
+}
diff --git a/db/output.h b/db/output.h
new file mode 100644
index 000000000..27861ff73
--- /dev/null
+++ b/db/output.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern int	dbprefix;
+
+extern int	dbprintf(const char *, ...);
+extern void	logprintf(const char *, ...);
+extern void	output_init(void);
diff --git a/db/print.c b/db/print.c
new file mode 100644
index 000000000..f4c747957
--- /dev/null
+++ b/db/print.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "print.h"
+#include "bit.h"
+#include "flist.h"
+#include "strvec.h"
+#include "output.h"
+#include "sig.h"
+#include "write.h"
+
+static void	print_allfields(const struct field *fields);
+static int	print_f(int argc, char **argv);
+static void	print_flist_1(struct flist *flist, char **pfx, int parentoff);
+static void	print_somefields(const struct field *fields, int argc,
+				 char **argv);
+
+static const cmdinfo_t	print_cmd =
+	{ "print", "p", print_f, 0, -1, 0, "[value]...",
+	  "print field values", NULL };
+
+static void
+print_allfields(
+	const field_t	*fields)
+{
+	flist_t		*flist;
+#ifdef DEBUG
+	int		i;
+#endif
+
+	flist = flist_make("");
+	flist->fld = fields;
+#ifndef DEBUG
+	(void)flist_parse(fields, flist, iocur_top->data, 0);
+#else
+	i = flist_parse(fields, flist, iocur_top->data, 0);
+	ASSERT(i == 1);
+#endif
+	flist_print(flist);
+	print_flist(flist);
+	flist_free(flist);
+}
+
+static int
+print_f(
+	int	argc,
+	char	**argv)
+{
+	pfunc_t	pf;
+
+	if (cur_typ == NULL) {
+		dbprintf("no current type\n");
+		return 0;
+	}
+	pf = cur_typ->pfunc;
+	if (pf == NULL) {
+		dbprintf("no print function for type %s\n", cur_typ->name);
+		return 0;
+	}
+	argc--;
+	argv++;
+	(*pf)(DB_READ, cur_typ->fields, argc, argv);
+	return 0;
+}
+
+void
+print_flist(
+	flist_t	*flist)
+{
+	char	**pfx;
+
+	pfx = new_strvec(0);
+	print_flist_1(flist, pfx, 0);
+	free_strvec(pfx);
+}
+
+static void
+print_flist_1(
+	flist_t		*flist,
+	char		**ppfx,
+	int		parentoff)
+{
+	char		buf[16];
+	const field_t	*f;
+	const ftattr_t	*fa;
+	flist_t		*fl;
+	int		low;
+	int		neednl;
+	char		**pfx;
+
+	for (fl = flist; fl && !seenint(); fl = fl->sibling) {
+		pfx = copy_strvec(ppfx);
+		if (fl->name[0])
+			add_strvec(&pfx, fl->name);
+		if (fl->flags & FL_OKLOW) {
+			add_strvec(&pfx, "[");
+			sprintf(buf, "%d", fl->low);
+			add_strvec(&pfx, buf);
+			if (fl->low != fl->high) {
+				add_strvec(&pfx, "-");
+				sprintf(buf, "%d", fl->high);
+				add_strvec(&pfx, buf);
+			}
+			add_strvec(&pfx, "]");
+		}
+		if (fl->child) {
+			if (fl->name[0])
+				add_strvec(&pfx, ".");
+			print_flist_1(fl->child, pfx, fl->offset);
+		} else {
+			f = fl->fld;
+			fa = &ftattrtab[f->ftyp];
+			ASSERT(fa->ftyp == f->ftyp);
+			print_strvec(pfx);
+			dbprintf(" = ");
+			if (fl->flags & FL_OKLOW)
+				low = fl->low;
+			else
+				low = 0;
+			if (fa->prfunc) {
+				neednl = fa->prfunc(iocur_top->data, fl->offset,
+					fcount(f, iocur_top->data, parentoff),
+					fa->fmtstr,
+					fsize(f, iocur_top->data, parentoff, 0),
+					fa->arg, low,
+					(f->flags & FLD_ARRAY) != 0);
+				if (neednl)
+					dbprintf("\n");
+			} else {
+				ASSERT(fa->arg & FTARG_OKEMPTY);
+				dbprintf("(empty)\n");
+			}
+		}
+		free_strvec(pfx);
+	}
+}
+
+void
+print_init(void)
+{
+	add_command(&print_cmd);
+}
+
+void
+print_sarray(
+	void		*obj,
+	int		bit,
+	int		count,
+	int		size,
+	int		base,
+	int		array,
+	const field_t	*flds,
+	int		skipnms)
+{
+	int		bitoff;
+	const field_t	*f;
+	const ftattr_t	*fa;
+	int		first;
+	int		i;
+
+	ASSERT(bitoffs(bit) == 0);
+	if (skipnms == 0) {
+		for (f = flds, first = 1; f->name; f++) {
+			if (f->flags & FLD_SKIPALL)
+				continue;
+			dbprintf("%c%s", first ? '[' : ',', f->name);
+			first = 0;
+		}
+		dbprintf("] ");
+	}
+	for (i = 0, bitoff = bit;
+	     i < count && !seenint();
+	     i++, bitoff += size) {
+		if (array)
+			dbprintf("%d:", i + base);
+		for (f = flds, first = 1; f->name; f++) {
+			if (f->flags & FLD_SKIPALL)
+				continue;
+			fa = &ftattrtab[f->ftyp];
+			ASSERT(fa->ftyp == f->ftyp);
+			dbprintf("%c", first ? '[' : ',');
+			first = 0;
+			if (fa->prfunc)
+				fa->prfunc(obj,
+					bitoff +
+					    bitoffset(f, obj, bitoff, i + base),
+					fcount(f, obj, bitoff), fa->fmtstr,
+					fsize(f, obj, bitoff, i + base),
+					fa->arg, (f->flags & FLD_ABASE1) != 0,
+					f->flags & FLD_ARRAY);
+			else {
+				ASSERT(fa->arg & FTARG_OKEMPTY);
+				dbprintf("(empty)");
+			}
+		}
+		dbprintf("]");
+		if (i < count - 1)
+			dbprintf(" ");
+	}
+}
+
+static void
+print_somefields(
+	const field_t	*fields,
+	int		argc,
+	char		**argv)
+{
+	const ftattr_t	*fa;
+	flist_t		*fl;
+	flist_t		*lfl;
+	flist_t		*nfl;
+
+	fl = lfl = NULL;
+	while (argc > 0) {
+		nfl = flist_scan(*argv);
+		if (!nfl) {
+			if (fl)
+				flist_free(fl);
+			return;
+		}
+		if (lfl)
+			lfl->sibling = nfl;
+		else
+			fl = nfl;
+		lfl = nfl;
+		argc--;
+		argv++;
+	}
+	if (fields->name[0] == '\0') {
+		fa = &ftattrtab[fields->ftyp];
+		ASSERT(fa->ftyp == fields->ftyp);
+		fields = fa->subfld;
+	}
+	if (!flist_parse(fields, fl, iocur_top->data, 0)) {
+		flist_free(fl);
+		return;
+	}
+	flist_print(fl);
+	print_flist(fl);
+	flist_free(fl);
+}
+
+/*ARGSUSED*/
+void
+print_string(
+	const field_t	*fields,
+	int		argc,
+	char		**argv)
+{
+	char		*cp;
+
+	if (argc != 0)
+		dbprintf("no arguments allowed\n");
+	dbprintf("\"");
+	for (cp = iocur_top->data;
+	     cp < (char *)iocur_top->data + iocur_top->len && *cp &&
+		     !seenint();
+	     cp++)
+		dbprintf("%c", *cp);
+	dbprintf("\"\n");
+}
+
+void
+print_struct(
+	const field_t	*fields,
+	int		argc,
+	char		**argv)
+{
+	if (argc == 0)
+		print_allfields(fields);
+	else
+		print_somefields(fields, argc, argv);
+}
diff --git a/db/print.h b/db/print.h
new file mode 100644
index 000000000..81ae2c01b
--- /dev/null
+++ b/db/print.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+struct flist;
+
+extern void	print_flist(struct flist *flist);
+extern void	print_init(void);
+extern void	print_sarray(void *obj, int bit, int count, int size, int base,
+			     int array, const field_t *flds, int skipnms);
+extern void	print_struct(const struct field *fields, int argc, char **argv);
+extern void	print_string(const struct field *fields, int argc, char **argv);
diff --git a/db/quit.c b/db/quit.c
new file mode 100644
index 000000000..1a93178bd
--- /dev/null
+++ b/db/quit.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "quit.h"
+
+static int	quit_f(int argc, char **argv);
+
+static const cmdinfo_t	quit_cmd =
+	{ "quit", "q", quit_f, 0, 0, 0, NULL,
+	  "exit xfs_db", NULL };
+
+static int
+quit_f(
+	int	argc,
+	char	**argv)
+{
+	return 1;
+}
+
+void
+quit_init(void)
+{
+	add_command(&quit_cmd);
+}
diff --git a/db/quit.h b/db/quit.h
new file mode 100644
index 000000000..0e3e50d4e
--- /dev/null
+++ b/db/quit.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	quit_init(void);
diff --git a/db/sb.c b/db/sb.c
new file mode 100644
index 000000000..dc40b7dd5
--- /dev/null
+++ b/db/sb.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "sb.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int	sb_f(int argc, char **argv);
+static void     sb_help(void);
+
+static const cmdinfo_t	sb_cmd =
+	{ "sb", NULL, sb_f, 0, 1, 1, "[agno]",
+	  "set current address to sb header", sb_help };
+
+const field_t	sb_hfld[] = {
+	{ "", FLDT_SB, OI(0), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+#define	OFF(f)	bitize(offsetof(xfs_sb_t, sb_ ## f))
+#define	SZC(f)	szcount(xfs_sb_t, sb_ ## f)
+const field_t	sb_flds[] = {
+	{ "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE },
+	{ "blocksize", FLDT_UINT32D, OI(OFF(blocksize)), C1, 0, TYP_NONE },
+	{ "dblocks", FLDT_DRFSBNO, OI(OFF(dblocks)), C1, 0, TYP_NONE },
+	{ "rblocks", FLDT_DRFSBNO, OI(OFF(rblocks)), C1, 0, TYP_NONE },
+	{ "rextents", FLDT_DRTBNO, OI(OFF(rextents)), C1, 0, TYP_NONE },
+	{ "uuid", FLDT_UUID, OI(OFF(uuid)), C1, 0, TYP_NONE },
+	{ "logstart", FLDT_DFSBNO, OI(OFF(logstart)), C1, 0, TYP_LOG },
+	{ "rootino", FLDT_INO, OI(OFF(rootino)), C1, 0, TYP_INODE },
+	{ "rbmino", FLDT_INO, OI(OFF(rbmino)), C1, 0, TYP_INODE },
+	{ "rsumino", FLDT_INO, OI(OFF(rsumino)), C1, 0, TYP_INODE },
+	{ "rextsize", FLDT_AGBLOCK, OI(OFF(rextsize)), C1, 0, TYP_NONE },
+	{ "agblocks", FLDT_AGBLOCK, OI(OFF(agblocks)), C1, 0, TYP_NONE },
+	{ "agcount", FLDT_AGNUMBER, OI(OFF(agcount)), C1, 0, TYP_NONE },
+	{ "rbmblocks", FLDT_EXTLEN, OI(OFF(rbmblocks)), C1, 0, TYP_NONE },
+	{ "logblocks", FLDT_EXTLEN, OI(OFF(logblocks)), C1, 0, TYP_NONE },
+	{ "versionnum", FLDT_UINT16X, OI(OFF(versionnum)), C1, 0, TYP_NONE },
+	{ "sectsize", FLDT_UINT16D, OI(OFF(sectsize)), C1, 0, TYP_NONE },
+	{ "inodesize", FLDT_UINT16D, OI(OFF(inodesize)), C1, 0, TYP_NONE },
+	{ "inopblock", FLDT_UINT16D, OI(OFF(inopblock)), C1, 0, TYP_NONE },
+	{ "fname", FLDT_CHARNS, OI(OFF(fname)), CI(SZC(fname)), 0, TYP_NONE },
+	{ "blocklog", FLDT_UINT8D, OI(OFF(blocklog)), C1, 0, TYP_NONE },
+	{ "sectlog", FLDT_UINT8D, OI(OFF(sectlog)), C1, 0, TYP_NONE },
+	{ "inodelog", FLDT_UINT8D, OI(OFF(inodelog)), C1, 0, TYP_NONE },
+	{ "inopblog", FLDT_UINT8D, OI(OFF(inopblog)), C1, 0, TYP_NONE },
+	{ "agblklog", FLDT_UINT8D, OI(OFF(agblklog)), C1, 0, TYP_NONE },
+	{ "rextslog", FLDT_UINT8D, OI(OFF(rextslog)), C1, 0, TYP_NONE },
+	{ "inprogress", FLDT_UINT8D, OI(OFF(inprogress)), C1, 0, TYP_NONE },
+	{ "imax_pct", FLDT_UINT8D, OI(OFF(imax_pct)), C1, 0, TYP_NONE },
+	{ "icount", FLDT_UINT64D, OI(OFF(icount)), C1, 0, TYP_NONE },
+	{ "ifree", FLDT_UINT64D, OI(OFF(ifree)), C1, 0, TYP_NONE },
+	{ "fdblocks", FLDT_UINT64D, OI(OFF(fdblocks)), C1, 0, TYP_NONE },
+	{ "frextents", FLDT_UINT64D, OI(OFF(frextents)), C1, 0, TYP_NONE },
+	{ "uquotino", FLDT_INO, OI(OFF(uquotino)), C1, 0, TYP_INODE },
+	{ "pquotino", FLDT_INO, OI(OFF(pquotino)), C1, 0, TYP_INODE },
+	{ "qflags", FLDT_UINT16X, OI(OFF(qflags)), C1, 0, TYP_NONE },
+	{ "flags", FLDT_UINT8X, OI(OFF(flags)), C1, 0, TYP_NONE },
+	{ "shared_vn", FLDT_UINT8D, OI(OFF(shared_vn)), C1, 0, TYP_NONE },
+	{ "inoalignmt", FLDT_EXTLEN, OI(OFF(inoalignmt)), C1, 0, TYP_NONE },
+	{ "unit", FLDT_UINT32D, OI(OFF(unit)), C1, 0, TYP_NONE },
+	{ "width", FLDT_UINT32D, OI(OFF(width)), C1, 0, TYP_NONE },
+	{ "dirblklog", FLDT_UINT8D, OI(OFF(dirblklog)), C1, 0, TYP_NONE },
+	{ NULL }
+};
+
+static void
+sb_help(void)
+{
+	dbprintf(
+"\n"
+" set allocation group superblock\n"
+"\n"
+" Example:\n"
+"\n"
+" 'sb 7' - set location to 7th allocation group superblock, set type to 'sb'\n"
+"\n"
+" Located in the 1st 512 byte block of each allocation group,\n"
+" the superblock contains the base information for the filesystem.\n"
+" The superblock in allocation group 0 is the primary.  The copies in the\n"
+" remaining allocation groups only serve as backup for filesystem recovery.\n"
+" The icount/ifree/fdblocks/frextents are only updated in superblock 0.\n"
+"\n"
+);
+}
+
+static int
+sb_f(
+	int		argc,
+	char		**argv)
+{
+	xfs_agnumber_t	agno;
+	char		*p;
+
+	if (argc > 1) {
+		agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0);
+		if (*p != '\0' || agno >= mp->m_sb.sb_agcount) {
+			dbprintf("bad allocation group number %s\n", argv[1]);
+			return 0;
+		}
+		cur_agno = agno;
+	} else if (cur_agno == NULLAGNUMBER)
+		cur_agno = 0;
+	ASSERT(typtab[TYP_SB].typnm == TYP_SB);
+	set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, cur_agno, XFS_SB_DADDR), 1,
+		DB_RING_ADD, NULL);
+	return 0;
+}
+
+void
+sb_init(void)
+{
+	add_command(&sb_cmd);
+}
+
+/*ARGSUSED*/
+int
+sb_size(
+	void	*obj,
+	int	startoff,
+	int	idx)
+{
+	return bitize(mp->m_sb.sb_sectsize);
+}
diff --git a/db/sb.h b/db/sb.h
new file mode 100644
index 000000000..5d646cee6
--- /dev/null
+++ b/db/sb.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field	sb_flds[];
+extern const struct field	sb_hfld[];
+
+extern void	sb_init(void);
+extern int	sb_size(void *obj, int startoff, int idx);
diff --git a/db/sig.c b/db/sig.c
new file mode 100644
index 000000000..9b70cedef
--- /dev/null
+++ b/db/sig.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <signal.h>
+#include "sig.h"
+
+static int	gotintr;
+static sigset_t	intrset;
+
+static void
+interrupt(int sig, siginfo_t *info, void *uc)
+{
+	gotintr = 1;
+}
+
+void
+blockint(void)
+{
+	sigprocmask(SIG_BLOCK, &intrset, NULL);
+}
+
+void
+clearint(void)
+{
+	gotintr = 0;
+}
+
+void
+init_sig(void)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = interrupt;
+	sigaction(SIGINT, &sa, NULL);
+	sigemptyset(&intrset);
+	sigaddset(&intrset, SIGINT);
+}
+
+int
+seenint(void)
+{
+	return gotintr;
+}
+
+void
+unblockint(void)
+{
+	sigprocmask(SIG_UNBLOCK, &intrset, NULL);
+}
diff --git a/db/sig.h b/db/sig.h
new file mode 100644
index 000000000..8bea24748
--- /dev/null
+++ b/db/sig.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	blockint(void);
+extern void	clearint(void);
+extern void	init_sig(void);
+extern int	seenint(void);
+extern void	unblockint(void);
diff --git a/db/strvec.c b/db/strvec.c
new file mode 100644
index 000000000..d346188e1
--- /dev/null
+++ b/db/strvec.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "strvec.h"
+#include "output.h"
+#include "malloc.h"
+
+static int	count_strvec(char **vec);
+
+void
+add_strvec(
+	char	***vecp,
+	char	*str)
+{
+	char	*dup;
+	int	i;
+	char	**vec;
+
+	dup = xstrdup(str);
+	vec = *vecp;
+	i = count_strvec(vec);
+	vec = xrealloc(vec, sizeof(*vec) * (i + 2));
+	vec[i] = dup;
+	vec[i + 1] = NULL;
+	*vecp = vec;
+}
+
+char **
+copy_strvec(
+	char	**vec)
+{
+	int	i;
+	char	**rval;
+
+	i = count_strvec(vec);
+	rval = new_strvec(i);
+	for (i = 0; vec[i] != NULL; i++)
+		rval[i] = xstrdup(vec[i]);
+	return rval;
+}
+
+static int
+count_strvec(
+	char	**vec)
+{
+	int	i;
+
+	for (i = 0; vec[i] != NULL; i++)
+		continue;
+	return i;
+}
+
+void
+free_strvec(
+	char	**vec)
+{
+	int	i;
+
+	for (i = 0; vec[i] != NULL; i++)
+		xfree(vec[i]);
+	xfree(vec);
+}
+
+char **
+new_strvec(
+	int	count)
+{
+	char	**rval;
+
+	rval = xmalloc(sizeof(*rval) * (count + 1));
+	rval[count] = NULL;
+	return rval;
+}
+
+void
+print_strvec(
+	char	**vec)
+{
+	int	i;
+
+	for (i = 0; vec[i] != NULL; i++)
+		dbprintf("%s", vec[i]);
+}
diff --git a/db/strvec.h b/db/strvec.h
new file mode 100644
index 000000000..f74f3790e
--- /dev/null
+++ b/db/strvec.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	add_strvec(char ***vecp, char *str);
+extern char	**copy_strvec(char **vec);
+extern void	free_strvec(char **vec);
+extern char	**new_strvec(int count);
+extern void	print_strvec(char **vec);
diff --git a/db/type.c b/db/type.c
new file mode 100644
index 000000000..50381c440
--- /dev/null
+++ b/db/type.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "agf.h"
+#include "agfl.h"
+#include "agi.h"
+#include "block.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "print.h"
+#include "sb.h"
+#include "inode.h"
+#include "bnobt.h"
+#include "cntbt.h"
+#include "inobt.h"
+#include "bmapbt.h"
+#include "bmroot.h"
+#include "agf.h"
+#include "agfl.h"
+#include "agi.h"
+#include "dir.h"
+#include "dirshort.h"
+#include "io.h"
+#include "output.h"
+#include "write.h"
+#include "attr.h"
+#include "dquot.h"
+#include "dir2.h"
+
+static const typ_t	*findtyp(char *name);
+static int		type_f(int argc, char **argv);
+
+const typ_t	*cur_typ;
+
+static const cmdinfo_t	type_cmd =
+	{ "type", NULL, type_f, 0, 1, 1, "[newtype]",
+	  "set/show current data type", NULL };
+
+const typ_t	typtab[] = {
+	{ TYP_AGF, "agf", handle_struct, agf_hfld },
+	{ TYP_AGFL, "agfl", handle_struct, agfl_hfld },
+	{ TYP_AGI, "agi", handle_struct, agi_hfld },
+	{ TYP_ATTR, "attr", handle_struct, attr_hfld },
+	{ TYP_BMAPBTA, "bmapbta", handle_struct, bmapbta_hfld },
+	{ TYP_BMAPBTD, "bmapbtd", handle_struct, bmapbtd_hfld },
+	{ TYP_BNOBT, "bnobt", handle_struct, bnobt_hfld },
+	{ TYP_CNTBT, "cntbt", handle_struct, cntbt_hfld },
+	{ TYP_DATA, "data", handle_block, NULL },
+	{ TYP_DIR, "dir", handle_struct, dir_hfld },
+	{ TYP_DIR2, "dir2", handle_struct, dir2_hfld },
+	{ TYP_DQBLK, "dqblk", handle_struct, dqblk_hfld },
+	{ TYP_INOBT, "inobt", handle_struct, inobt_hfld },
+	{ TYP_INODATA, "inodata", NULL, NULL },
+	{ TYP_INODE, "inode", handle_struct, inode_hfld },
+	{ TYP_LOG, "log", NULL, NULL },
+	{ TYP_RTBITMAP, "rtbitmap", NULL, NULL },
+	{ TYP_RTSUMMARY, "rtsummary", NULL, NULL },
+	{ TYP_SB, "sb", handle_struct, sb_hfld },
+	{ TYP_SYMLINK, "symlink", handle_string, NULL },
+	{ TYP_NONE, NULL }
+};
+
+static const typ_t *
+findtyp(
+	char		*name)
+{
+	const typ_t	*tt;
+
+	for (tt = typtab; tt->name != NULL; tt++) {
+		ASSERT(tt->typnm == (typnm_t)(tt - typtab));
+		if (strcmp(tt->name, name) == 0)
+			return tt;
+	}
+	return NULL;
+}
+
+static int
+type_f(
+	int		argc,
+	char		**argv)
+{
+	const typ_t	*tt;
+	int count = 0;
+
+	if (argc == 1) {
+		if (cur_typ == NULL)
+			dbprintf("no current type\n");
+		else
+			dbprintf("current type is \"%s\"\n", cur_typ->name);
+
+		dbprintf("\n supported types are:\n ");
+		for (tt = typtab, count = 0; tt->name != NULL; tt++) {
+			if ((tt+1)->name != NULL) {
+				dbprintf("%s, ", tt->name);
+				if ((++count % 8) == 0)
+					dbprintf("\n ");
+			} else {
+				dbprintf("%s\n", tt->name);
+			}
+		}
+
+		
+	} else {
+		tt = findtyp(argv[1]);
+		if (tt == NULL) {
+			dbprintf("no such type %s\n", argv[1]);
+                } else {
+                        if (iocur_top->typ == NULL) {
+                            dbprintf("no current object\n");
+                        } else {
+    			    iocur_top->typ = cur_typ = tt;
+                        }
+                }
+	}
+	return 0;
+}
+
+void
+type_init(void)
+{
+	add_command(&type_cmd);
+}
+
+/* read/write selectors for each major data type */
+
+void
+handle_struct(
+	int           action,
+	const field_t *fields,
+	int           argc,
+	char          **argv)
+{
+	if (action == DB_WRITE)
+		write_struct(fields, argc, argv);
+	else
+		print_struct(fields, argc, argv);
+}
+
+void
+handle_string(
+	int           action,
+	const field_t *fields,
+	int           argc,
+	char          **argv)
+{
+	if (action == DB_WRITE)
+		write_string(fields, argc, argv);
+	else
+		print_string(fields, argc, argv);
+}
+
+void
+handle_block(
+	int           action,
+	const field_t *fields,
+	int           argc,
+	char          **argv)
+{
+	if (action == DB_WRITE)
+		write_block(fields, argc, argv);
+	else
+		print_block(fields, argc, argv);
+}
diff --git a/db/type.h b/db/type.h
new file mode 100644
index 000000000..9108c8d9c
--- /dev/null
+++ b/db/type.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+#define	szof(x,y)	sizeof(((x *)0)->y)
+#define	szcount(x,y)	(szof(x,y) / szof(x,y[0]))
+
+typedef enum typnm
+{
+	TYP_AGF, TYP_AGFL, TYP_AGI, TYP_ATTR, TYP_BMAPBTA,
+	TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_DATA, TYP_DIR,
+	TYP_DIR2, TYP_DQBLK, TYP_INOBT, TYP_INODATA, TYP_INODE,
+	TYP_LOG, TYP_RTBITMAP, TYP_RTSUMMARY, TYP_SB, TYP_SYMLINK,
+	TYP_NONE
+} typnm_t;
+
+#define DB_WRITE 1
+#define DB_READ  0
+
+typedef void (*opfunc_t)(const struct field *fld, int argc, char **argv);
+typedef void (*pfunc_t)(int action, const struct field *fld, int argc, char **argv);
+
+typedef struct typ
+{
+	typnm_t			typnm;
+	char			*name;
+	pfunc_t			pfunc;
+	const struct field	*fields;
+} typ_t;
+extern const typ_t	typtab[], *cur_typ;
+
+extern void	type_init(void);
+extern void	handle_block(int action, const struct field *fields, int argc,
+			     char **argv);
+extern void	handle_string(int action, const struct field *fields, int argc,
+			      char **argv);
+extern void	handle_struct(int action, const struct field *fields, int argc,
+			      char **argv);
diff --git a/db/uuid.c b/db/uuid.c
new file mode 100644
index 000000000..1c1bf3290
--- /dev/null
+++ b/db/uuid.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "uuid.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int	uuid_f(int argc, char **argv);
+static void     uuid_help(void);
+static int	label_f(int argc, char **argv);
+static void     label_help(void);
+
+static const cmdinfo_t	uuid_cmd =
+	{ "uuid", NULL, uuid_f, 0, 1, 1, "[uuid]",
+	  "write/print FS uuid", uuid_help };
+static const cmdinfo_t	label_cmd =
+	{ "label", NULL, label_f, 0, 1, 1, "[label]",
+	  "write/print FS label", label_help };
+static int	warned;
+
+static void
+uuid_help(void)
+{
+	dbprintf(
+"\n"
+" write/print FS uuid\n"
+"\n"
+" Example:\n"
+"\n"
+" 'uuid'                                      - print UUID\n"
+" 'uuid 01234567-0123-0123-0123-0123456789ab' - write UUID\n"
+" 'uuid generate'                             - generate and write\n"
+" 'uuid rewrite'                              - copy UUID from SB 0\n"
+" 'uuid null'                                 - write a null uuid\n"
+"\n"
+"The print function checks the UUID in each SB and will warn if the UUIDs\n"
+"differ between AGs (the log is not checked). The write commands will\n"
+"set the uuid in all AGs to either a specified value, a newly generated\n"
+"value, the value found in the first superblock (SB 0) or a null value\n"
+"respectively. As a side effect of writing the UUID, the log is cleared\n"
+"(which is fine on a CLEANLY unmounted FS).\n"
+"\n"
+);
+}
+
+static void
+label_help(void)
+{
+	dbprintf(
+"\n"
+" write/print FS label\n"
+"\n"
+" Example:\n"
+"\n"
+" 'label'              - print label\n"
+" 'label 123456789012' - write label\n"
+" 'label --'           - write an empty label\n"
+"\n"
+"The print function checks the label in each SB and will warn if the labels\n"
+"differ between AGs. The write commands will set the label in all AGs to the\n"
+"specified value.  The maximum length of a label is 12 characters - use of a\n"
+"longer label will result in truncation and a warning will be issued.\n"
+"\n"
+);
+}
+
+static int
+get_sb(xfs_agnumber_t agno, xfs_sb_t *sb)
+{
+	push_cur();
+	set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1,
+		DB_RING_IGN, NULL);
+ 
+	if (!iocur_top->data) {
+		dbprintf("can't read superblock for AG %u\n", agno);
+		pop_cur();
+		return 0;
+	}
+
+	libxfs_xlate_sb(iocur_top->data, sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+ 
+	if (sb->sb_magicnum != XFS_SB_MAGIC) {
+		dbprintf("bad sb magic # %#x in AG %u\n",
+			sb->sb_magicnum, agno);
+                return 0;
+	}
+	if (!XFS_SB_GOOD_VERSION(sb)) {
+		dbprintf("bad sb version # %#x in AG %u\n",
+			sb->sb_versionnum, agno);
+                return 0;
+	}
+	if (agno == 0 && sb->sb_inprogress != 0) {
+		dbprintf("mkfs not completed successfully\n");
+                return 0;
+	}
+	return 1;
+}
+
+static uuid_t *
+do_uuid(xfs_agnumber_t agno, uuid_t *uuid)
+{
+	xfs_sb_t	tsb;
+	static uuid_t	uu;
+
+	if (!get_sb(agno, &tsb))
+		return NULL;
+
+	if (!uuid) {	/* get uuid */
+		memcpy(&uu, &tsb.sb_uuid, sizeof(uuid_t));
+		pop_cur();
+		return &uu;
+	}
+	/* set uuid */
+	memcpy(&tsb.sb_uuid, uuid, sizeof(uuid_t));
+	libxfs_xlate_sb(iocur_top->data, &tsb, -1, ARCH_CONVERT, XFS_SB_UUID);
+	write_cur();
+	return uuid;
+}
+
+static char *
+do_label(xfs_agnumber_t agno, char *label)
+{
+	size_t		len;
+	xfs_sb_t	tsb;
+	static char 	lbl[sizeof(tsb.sb_fname) + 1];
+
+	if (!get_sb(agno, &tsb))
+		return NULL;
+
+	memset(&lbl[0], 0, sizeof(lbl));
+
+	if (!label) {	/* get label */
+		pop_cur();
+		memcpy(&lbl[0], &tsb.sb_fname, sizeof(tsb.sb_fname));
+		return &lbl[0];
+	}
+	/* set label */
+	if ((len = strlen(label)) > sizeof(tsb.sb_fname)) {
+		if (!warned++)
+			dbprintf("warning: truncating label from %d to %d "
+				"characters\n", len, sizeof(tsb.sb_fname));
+		len = sizeof(tsb.sb_fname);
+	}
+	if ( len == 2 &&
+	     (strcmp(label, "\"\"") == 0 ||
+	      strcmp(label, "''")   == 0 ||
+	      strcmp(label, "--")   == 0) )
+		label[0] = label[1] = '\0';
+	memset(&tsb.sb_fname, 0, sizeof(tsb.sb_fname));
+	memcpy(&tsb.sb_fname, label, len);
+	memcpy(&lbl[0], &tsb.sb_fname, sizeof(tsb.sb_fname));
+	libxfs_xlate_sb(iocur_top->data, &tsb, -1, ARCH_CONVERT, XFS_SB_FNAME);
+	write_cur();
+	return &lbl[0];
+}
+
+static int
+uuid_f(
+	int		argc,
+	char		**argv)
+{
+	char	        bp[40];
+	xfs_agnumber_t	agno;
+        uuid_t          uu;
+        uuid_t          *uup=NULL;
+        
+	if (argc != 1 && argc != 2) {
+	    dbprintf("invalid parameters\n");
+	    return 0;
+	}
+        
+        if (argc==2) {
+            /* write uuid */
+            
+	    if (flag_readonly || !flag_expert_mode) {
+		    dbprintf("%s not started in read-write expert mode, writing disabled\n",
+			    progname);
+		    return 0;
+	    }
+            
+            if (!strcasecmp(argv[1], "generate")) {
+                uuid_generate(uu);
+            } else if (!strcasecmp(argv[1], "null")) {
+                uuid_clear(uu);
+            } else if (!strcasecmp(argv[1], "rewrite")) {
+                uup=do_uuid(0, NULL);
+                if (!uup) {
+                    dbprintf("failed to read UUID from AG 0\n");
+                    return 0;
+                }
+                memcpy(&uu, *uup, sizeof(uuid_t));
+	        uuid_unparse(uu, bp);
+                dbprintf("old uuid = %s\n", bp);
+            } else {
+                if (uuid_parse(argv[1], uu)) {
+                    dbprintf("invalid uuid\n");
+                    return 0;
+                }
+            }
+            
+            if (mp->m_sb.sb_logstart) {
+                if (xfsargs.logdev) {
+                    dbprintf("external log specified for FS with internal log - aborting \n");
+                    return 0;
+                }
+            } else {
+                if (!xfsargs.logdev) {
+                    dbprintf("no external log specified for FS with external log - aborting\n");
+                    return 0;
+                }
+            }
+            
+            dbprintf("clearing log and setting uuid\n");
+            
+            /* clear log (setting uuid) */
+            
+            if (libxfs_log_clear(
+                    (mp->m_sb.sb_logstart)?xfsargs.ddev:xfsargs.logdev,
+                    XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart),
+                    XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks),
+                    &uu,
+                    XLOG_FMT)) {
+                        dbprintf("error clearing log\n");
+                        return 0;
+                    }
+                
+            
+            dbprintf("writing all SBs\n");
+            
+	    for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)
+                if (!do_uuid(agno, &uu)) {
+                    dbprintf("failed to set uuid in AG %d\n", agno);
+                    break;
+                }
+                
+	    uuid_unparse(uu, bp);
+            dbprintf("new uuid = %s\n", bp);
+            
+            return 0;
+            
+        } else {
+            /* get (check) uuid */
+            
+	    for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+                uup=do_uuid(agno, NULL);
+                if (!uup) {
+                    dbprintf("failed to read UUID from AG %d\n", agno);
+                    return 0;
+                }
+                if (agno) {
+                    if (memcmp(&uu, uup, sizeof(uuid_t))) {
+                        dbprintf("warning: uuid copies differ\n");
+                        break;
+                    }
+                } else {
+                    memcpy(uu, uup, sizeof(uuid_t));
+                }
+            }
+            if (mp->m_sb.sb_logstart) {
+                if (xfsargs.logdev) 
+                    dbprintf("warning: external log specified for FS with internal log\n");
+            } else {
+                if (!xfsargs.logdev) {
+                    dbprintf("warning: no external log specified for FS with external log\n");
+                }
+            }            
+                
+	    uuid_unparse(uu, bp);
+	    dbprintf("uuid = %s\n", bp);
+        }
+
+	return 0;
+}
+
+static int
+label_f(
+	int		argc,
+	char		**argv)
+{
+	char	        *p = NULL;
+	xfs_sb_t	sb;
+	xfs_agnumber_t	ag;
+        
+	if (argc != 1 && argc != 2) {
+		dbprintf("invalid parameters\n");
+		return 0;
+	}
+
+        if (argc==2) {	/* write label */
+		if (flag_readonly || !flag_expert_mode) {
+			dbprintf("%s not started in read-write expert mode, "
+				"writing disabled\n", progname);
+			return 0;
+		}
+
+		dbprintf("writing all SBs\n");
+		for (ag = 0; ag < mp->m_sb.sb_agcount; ag++)
+			if ((p = do_label(ag, argv[1])) == NULL) {
+				dbprintf("failed to set label in AG %d\n", ag);
+				break;
+			}
+		dbprintf("new label = \"%s\"\n", p);
+	} else {	/* print label */
+		for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+			p = do_label(ag, NULL);
+			if (!p) {
+				dbprintf("failed to read label in AG %d\n", ag);
+				return 0;
+			}
+			if (!ag)
+				memcpy(&sb.sb_fname, p, sizeof(sb.sb_fname));
+			else if (memcmp(&sb.sb_fname, p, sizeof(sb.sb_fname)))
+				dbprintf("warning: label in AG %d differs\n");
+		}
+		dbprintf("label = \"%s\"\n", p);
+        }
+	return 0;
+}
+
+void
+uuid_init(void)
+{
+	warned = 0;
+	add_command(&label_cmd);
+	add_command(&uuid_cmd);
+}
diff --git a/db/uuid.h b/db/uuid.h
new file mode 100644
index 000000000..8997237ed
--- /dev/null
+++ b/db/uuid.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void	uuid_init(void);
diff --git a/db/write.c b/db/write.c
new file mode 100644
index 000000000..32477f237
--- /dev/null
+++ b/db/write.c
@@ -0,0 +1,708 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <ctype.h>
+#include <time.h>
+#include "bit.h"
+#include "block.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "flist.h"
+#include "io.h"
+#include "output.h"
+#include "print.h"
+#include "write.h"
+#include "malloc.h"
+
+static int	write_f(int argc, char **argv);
+static void     write_help(void);
+
+static const cmdinfo_t	write_cmd =
+	{ "write", NULL, write_f, 0, -1, 0, "[field or value]...",
+	  "write value to disk", write_help };
+
+void
+write_init(void)
+{
+	if (!flag_expert_mode)
+		return;
+
+	add_command(&write_cmd);
+	srand48(clock());
+}
+
+static void 
+write_help(void)
+{
+	dbprintf(
+"\n"
+" The 'write' command takes on different personalities depending on the\n"
+" type of object being worked with.\n\n"
+" Write has 3 modes:\n"
+"  'struct mode' - is active anytime you're looking at a filesystem object\n"
+"                  which contains individual fields (ex: an inode).\n"
+"  'data mode'   - is active anytime you set a disk address directly or set\n"
+"                  the type to 'data'.\n"
+"  'string mode' - only used for writing symlink blocks.\n"
+"\n"
+" Examples:\n"
+"  Struct mode: 'write core.uid 23'          - set an inode uid field to 23.\n"
+"               'write fname \"hello\\000\"'    - write superblock fname.\n"
+"               (note: in struct mode strings are not null terminated)\n"
+"               'write fname #6669736800'    - write superblock fname with hex.\n"
+"               'write uuid 00112233-4455-6677-8899-aabbccddeeff'\n" 
+"                                            - write superblock uuid.\n"
+"  Data mode:   'write fill 0xff' - fill the entire block with 0xff's\n"
+"               'write lshift 3' - shift the block 3 bytes to the left\n"
+"               'write sequence 1 5' - write a cycle of number [1-5] through\n"
+"                                      the entire block.\n"
+"  String mode: 'write \"This_is_a_filename\" - write null terminated string.\n"
+"\n"
+" In data mode type 'write' by itself for a list of specific commands.\n\n"
+);
+
+}
+
+static int
+write_f(
+	int		argc,
+	char		**argv)
+{
+	pfunc_t	pf;
+	extern char *progname;
+
+	if (flag_readonly) {
+		dbprintf("%s started in read only mode, writing disabled\n",
+			progname);
+		return 0;
+	}
+
+	if (cur_typ == NULL) {
+		dbprintf("no current type\n");
+		return 0;
+	}
+
+	pf = cur_typ->pfunc;
+	if (pf == NULL) {
+		dbprintf("no handler function for type %s, write unsupported.\n",
+			 cur_typ->name);
+		return 0;
+	}
+
+	/* move past the "write" command */
+	argc--;
+	argv++;
+
+	(*pf)(DB_WRITE, cur_typ->fields, argc, argv);
+
+	return 0;
+}
+
+/* compare significant portions of commands */
+
+static int
+sigcmp(
+	char  *s1,
+	char  *s2,
+	int   sig)
+{
+	int sigcnt;
+
+	if (!s1 || !s2)
+		return 0;
+
+	for (sigcnt = 0; *s1 == *s2; s1++, s2++) {
+		sigcnt++;
+		if (*s1 == '\0')
+			return 1;
+	}
+	if (*s1 && *s2)
+		return 0;
+
+	if (sig && (sigcnt >= sig))
+		return 1;
+
+	return 0;
+}
+
+/* ARGSUSED */
+static void
+bwrite_lshift(
+	int   start,
+	int   len,
+	int   shift,
+	int   from,
+	int   to)
+{
+	char *base;
+
+	if (shift == -1)
+		shift = 1;
+	if (start == -1)
+		start = 0;
+	if (len == -1)
+		len = iocur_top->len - start;
+
+	if (len+start > iocur_top->len) {
+		dbprintf("length (%d) too large for data block size (%d)",
+			 len, iocur_top->len);
+	}
+
+	base = (char *)iocur_top->data + start;
+
+	memcpy(base, base+shift, len-shift);
+	memset(base+(len-shift), 0, shift);
+}
+
+/* ARGSUSED */
+static void
+bwrite_rshift(
+	int   start,
+	int   len,
+	int   shift,
+	int   from,
+	int   to)
+{
+	char *base;
+
+	if (shift == -1)
+		shift = 1;
+	if (start == -1)
+		start = 0;
+	if (len == -1)
+		len = iocur_top->len - start;
+
+	if (len+start > iocur_top->len) {
+		dbprintf("length (%d) too large for data block size (%d)",
+			 len, iocur_top->len);
+	}
+
+	base = (char *)iocur_top->data + start;
+
+	memcpy(base+shift, base, len-shift);
+	memset(base, 0, shift);
+}
+
+/* ARGSUSED */
+static void
+bwrite_lrot(
+	int   start,
+	int   len,
+	int   shift,
+	int   from,
+	int   to)
+{
+	char *base;
+	char *hold_region;
+
+	if (shift == -1)
+		shift = 1;
+	if (start == -1)
+		start = 0;
+	if (len == -1)
+		len = iocur_top->len - start;
+
+	if (len+start > iocur_top->len) {
+		dbprintf("length (%d) too large for data block size (%d)",
+			 len, iocur_top->len);
+	}
+
+	base = (char *)iocur_top->data + start;
+
+	hold_region = xmalloc(shift);
+	memcpy(hold_region, base, shift);
+	memcpy(base, base+shift, len-shift);
+	memcpy(base+(len-shift), hold_region, shift);
+}
+
+/* ARGSUSED */
+static void
+bwrite_rrot(
+	int   start,
+	int   len,
+	int   shift,
+	int   from,
+	int   to)
+{
+	char *base;
+	char *hold_region;
+
+	if (shift == -1)
+		shift = 1;
+	if (start == -1)
+		start = 0;
+	if (len == -1)
+		len = iocur_top->len - start;
+
+	if (len+start > iocur_top->len) {
+		dbprintf("length (%d) too large for data block size (%d)",
+			 len, iocur_top->len);
+	}
+
+	base = (char *)iocur_top->data + start;
+
+	hold_region = xmalloc(shift);
+	memcpy(hold_region, base+(len-shift), shift);
+	memcpy(base+shift, base, len-shift);
+	memcpy(base, hold_region, shift);
+}
+
+/* ARGSUSED */
+static void
+bwrite_seq(
+	int   start,
+	int   len,
+	int   step,
+	int   from,
+	int   to)
+{
+	int i;
+	int tmp;
+	int base;
+	int range;
+	int top;
+	char *buf = (char *)iocur_top->data;
+
+	if (start == -1)
+		start = 0;
+
+	if (len == -1)
+		len = iocur_top->len - start;
+
+	if (len+start > iocur_top->len) {
+		dbprintf("length (%d) too large for data block size (%d)",
+			 len, iocur_top->len);
+	}
+
+	if (from == -1 || from > 255)
+		from = 0;
+	if (to == -1 || to > 255)
+		to = 255;
+	if (step == -1)
+		step = 1;
+
+	base = from;
+	top = to;
+	if (from > to) {
+		base = to;
+		top = from;
+		if (step > 0)
+			step = -step;
+	}
+
+	range = top - base;
+
+	tmp = 0;
+	for (i = start; i < start+len; i++) {
+		*buf++ = tmp + base;
+		tmp = (tmp + step)%(range+1);
+	}
+}
+
+/* ARGSUSED */
+static void
+bwrite_random(
+	int   start,
+	int   len,
+	int   shift,
+	int   from,
+	int   to)
+{
+	int i;
+	char *buf = (char *)iocur_top->data;
+
+	if (start == -1)
+		start = 0;
+
+	if (len == -1)
+		len = iocur_top->len - start;
+
+	if (len+start > iocur_top->len) {
+		dbprintf("length (%d) too large for data block size (%d)",
+			 len, iocur_top->len);
+	}
+
+	for (i = start; i < start+len; i++)
+		*buf++ = (char)lrand48();
+}
+
+/* ARGSUSED */
+static void
+bwrite_fill(
+	int   start,
+	int   len,
+	int   value,
+	int   from,
+	int   to)
+{
+	char *base;
+
+	if (value == -1)
+		value = 0;
+	if (start == -1)
+		start = 0;
+	if (len == -1)
+		len = iocur_top->len - start;
+
+	if (len+start > iocur_top->len) {
+		dbprintf("length (%d) too large for data block size (%d)",
+			 len, iocur_top->len);
+	}
+
+	base = (char *)iocur_top->data + start;
+
+	memset(base, value, len);
+}
+
+static struct bw_cmd {
+	void	(*cmdfunc)(int,int,int,int,int);
+	char	*cmdstr;
+	int	sig_chars;
+	int	argmin;
+	int	argmax;
+	int	shiftcount_arg;
+	int	from_arg;
+	int	to_arg;
+	int	start_arg;
+	int	len_arg;
+	char	*usage;
+} bw_cmdtab[] = {
+	/* cmd   sig min max sh frm to start len */
+	{ bwrite_lshift, "lshift",   2, 0, 3, 1, 0, 0, 2, 3,
+		"[shiftcount] [start] [len]", },
+	{ bwrite_rshift, "rshift",   2, 0, 3, 1, 0, 0, 2, 3,
+		"[shiftcount] [start] [len]", },
+	{ bwrite_lrot,   "lrot",     2, 0, 3, 1, 0, 0, 2, 3,
+		"[shiftcount] [start] [len]", },
+	{ bwrite_rrot,   "rrot",     2, 0, 3, 1, 0, 0, 2, 3,
+		"[shiftcount] [start] [len]", },
+	{ bwrite_seq,    "sequence", 3, 0, 4, 0, 1, 2, 3, 4,
+		"[from] [to] [start] [len]", },
+	{ bwrite_random, "random",   3, 0, 2, 0, 0, 0, 1, 2,
+		"[start] [len]", },
+	{ bwrite_fill,   "fill",     1, 1, 3, 1, 0, 0, 2, 3,
+		"num [start] [len]" }
+};
+
+#define BWRITE_CMD_MAX (sizeof(bw_cmdtab)/sizeof(bw_cmdtab[0]))
+
+static int
+convert_oct(
+	char *arg,
+	int  *ret)
+{
+	int count;
+	int i;
+	int val = 0;
+
+	/* only allow 1 case, '\' and 3 octal digits (or less) */
+
+	for (count = 0; count < 3; count++) {
+		if (arg[count] == '\0')
+			break;
+
+		if ((arg[count] < '0') && (arg[count] > '7'))
+			break;
+	}
+
+	for (i = 0; i < count; i++) {
+		val |= ((arg[(count-1)-i]-'0')&0x07)<<(i*3);
+	}
+
+	*ret = val&0xff;
+
+	return(count);
+}
+
+#define NYBBLE(x) (isdigit(x)?(x-'0'):(tolower(x)-'a'+0xa))
+
+static char *
+convert_arg(
+	char *arg,
+	int  bit_length)
+{
+	int i;
+	static char *buf = NULL;
+	char *rbuf;
+	long long *value;
+	int alloc_size;
+	char *ostr;
+	int octval, ret;
+
+	if (bit_length <= 64)
+		alloc_size = 8;
+	else
+		alloc_size = (bit_length+7)/8;
+
+	buf = xrealloc(buf, alloc_size);
+	memset(buf, 0, alloc_size);
+	value = (long long *)buf;
+	rbuf = buf;
+
+	if (*arg == '\"') {
+                /* handle strings */
+
+		/* zap closing quote if there is one */
+		if ((ostr = strrchr(arg+1, '\"')) != NULL)
+			*ostr = '\0';
+
+		ostr = arg+1;
+		for (i = 0; i < alloc_size; i++) {
+			if (!*ostr)
+				break;
+
+			/* do octal */
+			if (*ostr == '\\') {
+				if (*(ostr+1) >= '0' || *(ostr+1) <= '7') {
+					ret = convert_oct(ostr+1, &octval);
+					*rbuf++ = octval;
+					ostr += ret+1;
+					continue;
+				}
+			}
+			*rbuf++ = *ostr++;
+		}
+
+		return buf;
+        } else if (arg[0] == '#' || strchr(arg,'-')) {
+                /*
+                 * handle hex blocks ie
+                 *    #00112233445566778899aabbccddeeff
+                 * and uuids ie 
+                 *    1122334455667788-99aa-bbcc-ddee-ff00112233445566778899
+                 */
+                int bytes=bit_length/8;
+                
+                /* skip leading hash */
+                if (*arg=='#') arg++;
+                    
+                while (*arg && bytes--) {
+                    /* skip hypens */
+                    while (*arg=='-') arg++;
+                    
+                    /* get first nybble */
+                    if (!isxdigit(*arg)) return NULL;
+                    *rbuf=NYBBLE(*arg)<<4;
+                    arg++;
+                    
+                    /* skip more hyphens */
+                    while (*arg=='-') arg++;
+                    
+                    /* get second nybble */
+                    if (!isxdigit(*arg)) return NULL;
+                    *rbuf++|=NYBBLE(*arg);
+                    arg++;
+                }
+                if (bytes<0&&*arg) return NULL;
+                return buf;
+	} else {
+                /*
+                 * handle integers
+                 */
+		*value = strtoll(arg, NULL, 0);
+                
+#if __BYTE_ORDER == BIG_ENDIAN
+		/* hackery for big endian */
+		if (bit_length <= 8) {
+			rbuf += 7;
+		} else if (bit_length <= 16) {
+			rbuf += 6;
+		} else if (bit_length <= 32) {
+			rbuf += 4;
+		}
+#endif
+		return rbuf;
+	}
+}
+
+
+/* ARGSUSED */
+void
+write_struct(
+	const field_t	*fields,
+	int		argc,
+	char		**argv)
+{
+	const ftattr_t	*fa;
+	flist_t		*fl;
+	flist_t         *sfl;
+	int             bit_length;
+	char            *buf;
+	int		parentoffset;
+
+	if (argc != 2) {
+		dbprintf("usage: write fieldname value\n");
+		return;
+	}
+
+	fl = flist_scan(argv[0]);
+	if (!fl) {
+		dbprintf("unable to parse '%s'.\n", argv[0]);
+		return;
+	}
+
+	/* if we're a root field type, go down 1 layer to get field list */
+	if (fields->name[0] == '\0') {
+		fa = &ftattrtab[fields->ftyp];
+		ASSERT(fa->ftyp == fields->ftyp);
+		fields = fa->subfld;
+	}
+
+	/* run down the field list and set offsets into the data */
+	if (!flist_parse(fields, fl, iocur_top->data, 0)) {
+		flist_free(fl);
+		dbprintf("parsing error\n");
+		return;
+	}
+
+	sfl = fl;
+	parentoffset = 0;
+	while (sfl->child) {
+		parentoffset = sfl->offset;
+		sfl = sfl->child;
+	}
+
+	bit_length = fsize(sfl->fld, iocur_top->data, parentoffset, 0);
+	bit_length *= fcount(sfl->fld, iocur_top->data, parentoffset);
+
+	/* convert this to a generic conversion routine */
+	/* should be able to handle str, num, or even labels */
+	
+	buf = convert_arg(argv[1], bit_length);
+	if (!buf) {
+		dbprintf("unable to convert value '%s'.\n", argv[1]);
+		return;
+	}
+
+        setbitval(iocur_top->data, sfl->offset, bit_length, buf);
+	write_cur();
+
+	flist_print(fl);
+	print_flist(fl); 
+	flist_free(fl);
+}
+
+/* ARGSUSED */
+void
+write_string(
+	const field_t	*fields,
+	int		argc,
+	char		**argv)
+{
+	char *buf;
+	int i;
+
+	if (argc != 1) {
+		dbprintf("usage (in string mode): write \"string...\"\n");
+		return;
+	}
+
+	buf = convert_arg(argv[0], (int)((strlen(argv[0])+1)*8));
+	for (i = 0; i < iocur_top->len; i++) {
+		((char *)iocur_top->data)[i] = *buf;
+		if (*buf++ == '\0')
+			break;
+	}
+
+	/* write back to disk */
+	write_cur();
+}
+
+/* ARGSUSED */
+void
+write_block(
+	const field_t	*fields,
+	int		argc,
+	char		**argv)
+{
+	int i;
+	int shiftcount = -1; 
+	int start = -1;
+	int len = -1;
+	int from = -1;
+	int to = -1;
+	struct bw_cmd *cmd = NULL;
+
+	if (argc <= 1 || argc > 5)
+		goto block_usage;
+
+	for (i = 0; i < BWRITE_CMD_MAX; i++) {
+		if (sigcmp(argv[0], bw_cmdtab[i].cmdstr,
+			   bw_cmdtab[i].sig_chars)) {
+			cmd = &bw_cmdtab[i];
+			break;
+		}
+	}
+
+	if (!cmd) {
+		dbprintf("write: invalid subcommand\n");
+		goto block_usage;
+	}
+
+	if ((argc < cmd->argmin + 1) || (argc > cmd->argmax + 1)) {
+		dbprintf("write %s: invalid number of arguments\n",
+			 cmd->cmdstr);
+		goto block_usage;
+	}
+	
+	if (cmd->shiftcount_arg && (cmd->shiftcount_arg < argc))
+		shiftcount = (int)strtoul(argv[cmd->shiftcount_arg], NULL, 0);
+	if (cmd->start_arg && (cmd->start_arg < argc))
+		start =  (int)strtoul(argv[cmd->start_arg], NULL, 0);
+	if (cmd->len_arg && (cmd->len_arg < argc))
+		len = (int)strtoul(argv[cmd->len_arg], NULL, 0);
+	if (cmd->from_arg  && (cmd->len_arg < argc))
+		from = (int)strtoul(argv[cmd->from_arg], NULL, 0);
+	if (cmd->to_arg && (cmd->len_arg < argc))
+		to = (int)strtoul(argv[cmd->to_arg], NULL, 0);
+
+	cmd->cmdfunc(start, len, shiftcount, from, to);
+
+	/* write back to disk */
+	write_cur();
+	return;
+
+  block_usage:
+
+	dbprintf("usage: write (in data mode)\n");
+	for (i = 0; i < BWRITE_CMD_MAX; i++) {
+		dbprintf("              %-9.9s %s\n",
+			 bw_cmdtab[i].cmdstr, bw_cmdtab[i].usage);
+	}
+	dbprintf("\n");
+	return;
+}
diff --git a/db/write.h b/db/write.h
new file mode 100644
index 000000000..7e0596f0c
--- /dev/null
+++ b/db/write.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern void	write_init(void);
+extern void	write_block(const field_t *fields, int argc, char **argv);
+extern void	write_string(const field_t *fields, int argc, char **argv);
+extern void	write_struct(const field_t *fields, int argc, char **argv);
diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
new file mode 100755
index 000000000..c516ae9f9
--- /dev/null
+++ b/db/xfs_admin.sh
@@ -0,0 +1,60 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+OPTS=""
+USAGE="Usage: xfs_admin [-flu] [-L label] [-U uuid] special"
+
+while getopts "fluL:U:" c
+do
+	case $c in
+	f)	OPTS=$OPTS" -f";;
+	l)	OPTS=$OPTS" -c label";;
+	L)	OPTS=$OPTS" -c 'label "$OPTARG"'";;
+	u)	OPTS=$OPTS" -c uuid";;
+	U)	OPTS=$OPTS" -c 'uuid "$OPTARG"'";;
+	\?)	echo $USAGE 1>&2
+		exit 2
+		;;
+	esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+	1)	eval xfs_db -x -p xfs_admin $OPTS $1
+		status=$?
+		;;
+	*)	echo $USAGE 1>&2
+		exit 2
+		;;
+esac
+exit $status
diff --git a/db/xfs_check.sh b/db/xfs_check.sh
new file mode 100755
index 000000000..bff2ecc32
--- /dev/null
+++ b/db/xfs_check.sh
@@ -0,0 +1,63 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+#ident "$Revision: 1.1 $"
+
+OPTS=" "
+ISFILE=" "
+USAGE="usage: xfs_check [-svf] [-i ino]... [-b bno]... special"
+
+
+while getopts "b:fi:sv" c
+do
+	case $c in
+	s)	OPTS=$OPTS"-s ";;
+	v)	OPTS=$OPTS"-v ";;
+	i)	OPTS=$OPTS"-i "$OPTARG" ";;
+	b)	OPTS=$OPTS"-b "$OPTARG" ";;
+	f)	ISFILE=" -f";;
+	\?)	echo $USAGE 1>&2
+		exit 2
+		;;
+	esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+	1)	xfs_db$ISFILE -i -p xfs_check -c "check$OPTS" $1
+		status=$?
+		;;
+	*)	echo $USAGE 1>&2
+		exit 2
+		;;
+esac
+exit $status
diff --git a/db/xfs_check64.sh b/db/xfs_check64.sh
new file mode 100755
index 000000000..930939093
--- /dev/null
+++ b/db/xfs_check64.sh
@@ -0,0 +1,63 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+#ident "$Revision: 1.1 $"
+
+OPTS=" "
+ISFILE=" "
+USAGE="usage: xfs_check64 [-svf] [-i ino]... [-b bno]... special"
+
+
+while getopts "b:fi:sv" c
+do
+	case $c in
+	s)	OPTS=$OPTS"-s ";;
+	v)	OPTS=$OPTS"-v ";;
+	i)	OPTS=$OPTS"-i "$OPTARG" ";;
+	b)	OPTS=$OPTS"-b "$OPTARG" ";;
+	f)	ISFILE=" -f";;
+	\?)	echo $USAGE 1>&2
+		exit 2
+		;;
+	esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+	1)	xfs_db64$ISFILE -i -p xfs_check64 -c "check$OPTS" $1
+		status=$?
+		;;
+	*)	echo $USAGE 1>&2
+		exit 2
+		;;
+esac
+exit $status
diff --git a/db/xfs_ncheck.sh b/db/xfs_ncheck.sh
new file mode 100755
index 000000000..3c83e35d3
--- /dev/null
+++ b/db/xfs_ncheck.sh
@@ -0,0 +1,61 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+#ident "$Revision: 1.1 $"
+
+OPTS=" "
+ISFILE=" "
+USAGE="usage: xfs_ncheck [-sf] [-i ino]... special"
+
+
+while getopts "b:fi:sv" c
+do
+	case $c in
+	s)	OPTS=$OPTS"-s ";;
+	i)	OPTS=$OPTS"-i "$OPTARG" ";;
+	f)	ISFILE=" -f";;
+	\?)	echo $USAGE 1>&2
+		exit 2
+		;;
+	esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+	1)	xfs_db$ISFILE -r -p xfs_ncheck -c "blockget -ns" -c "ncheck$OPTS" $1
+		status=$?
+		;;
+	*)	echo $USAGE 1>&2
+		exit 2
+		;;
+esac
+exit $status
diff --git a/db/xfs_ncheck64.sh b/db/xfs_ncheck64.sh
new file mode 100755
index 000000000..7fcd3a3de
--- /dev/null
+++ b/db/xfs_ncheck64.sh
@@ -0,0 +1,61 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+#ident "$Revision: 1.1 $"
+
+OPTS=" "
+ISFILE=" "
+USAGE="usage: xfs_ncheck64 [-sf] [-i ino]... special"
+
+
+while getopts "b:fi:sv" c
+do
+	case $c in
+	s)	OPTS=$OPTS"-s ";;
+	i)	OPTS=$OPTS"-i "$OPTARG" ";;
+	f)	ISFILE=" -f";;
+	\?)	echo $USAGE 1>&2
+		exit 2
+		;;
+	esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+	1)	xfs_db64$ISFILE -r -p xfs_ncheck64 -c "blockget -ns" -c "ncheck$OPTS" $1
+		status=$?
+		;;
+	*)	echo $USAGE 1>&2
+		exit 2
+		;;
+esac
+exit $status
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 000000000..abd968598
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+DOCFILES = README.LVM README.xfsdump Porting-Guide
+LSRCFILES = $(DOCFILES)
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_DOC_DIR)
+	$(INSTALL) -m 644 $(DOCFILES) $(XFS_CMDS_DOC_DIR)
diff --git a/doc/README.LVM b/doc/README.LVM
new file mode 100644
index 000000000..06eb6def8
--- /dev/null
+++ b/doc/README.LVM
@@ -0,0 +1,77 @@
+XFS on LVM
+__________
+
+PREFACE
+
+This is a quick reference to setting XFS up on LVM. For more information
+please see the LVM HOWTO at:
+
+                http://www.linuxdoc.org/HOWTO/LVM-HOWTO.html
+
+PREREQUISITES
+
+You need a kernel with LVM support either built in or as a module.
+This document assumes lvm as a module.
+
+SETTING UP LVM
+
+>>> Load module
+
+      [root@crash /sbin]# modprobe lvm-mod
+
+>>> Set partition type to 0x8e for partitions you wish to use with LVM
+
+      [root@crash /sbin]# fdisk /dev/sda1
+      Command (m for help): t
+      Partition number (1-4): 1
+      Hex code (type L to list codes): 8e
+      Changed system type of partition 1 to 8e (Unknown)
+
+      Command (m for help): w
+      The partition table has been altered!
+
+>>> Write PV superblock on physical volumes
+
+      [root@crash /root]# pvcreate /dev/sda1 /dev/sdb1 /dev/sdc1 /dev/sdd1
+      pvcreate -- physical volume "/dev/sda1" successfully created
+      pvcreate -- physical volume "/dev/sdb1" successfully created
+      pvcreate -- physical volume "/dev/sdc1" successfully created
+      pvcreate -- physical volume "/dev/sdd1" successfully created
+
+>>> Create a volume group consisting of the PVs we just set up
+
+[root@crash /root]# vgcreate vg00 /dev/sda1 /dev/sdb1 /dev/sdc1 /dev/sdd1
+      vgcreate -- INFO: using default physical extent size 4 MB
+      vgcreate -- INFO: maximum logical volume size is 255.99 Gigabyte
+      vgcreate -- doing automatic backup of volume group "vg00"
+      vgcreate -- volume group "vg00" successfully created and activated
+
+>>> Create a logical volume - striped across 4 PVs, 64 KB chunk size, 20 GB
+
+[root@crash /root]# lvcreate -i 4 -I 64 -L 20G -n lv00 vg00
+      lvcreate -- rounding 20971520 KB to stripe boundary size 20975616 KB / 5121 PE
+      lvcreate -- doing automatic backup of "vg00"
+      lvcreate -- logical volume "/dev/vg00/lv00" successfully created
+
+>>> Build a filesystem on the LV
+
+[root@crash /root]# mkfs -t xfs /dev/vg00/lv00 
+      meta-data=/dev/vg00/lv00         isize=256    agcount=20, agsize=262144 blks
+      data     =                       bsize=4096   blocks=5242879, imaxpct=25
+               =                       sunit=0      swidth=0 blks, unwritten=1
+      naming   =version 2              bsize=4096  
+      log      =internal log           bsize=4096   blocks=1200
+      realtime =none                   extsz=65536  blocks=0, rtextents=0
+
+[root@crash /root]# mount -t xfs /dev/vg00/lv00 /xfs
+
+>>> Go nuts
+
+
+After a reboot you will need to reactivate the VGs/LVs:
+
+      modprobe lvm-mod
+      vgchange -a y
+
+These commands could be added to a startup script.
+
diff --git a/fsck/Makefile b/fsck/Makefile
new file mode 100644
index 000000000..965dca0b4
--- /dev/null
+++ b/fsck/Makefile
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = fsck.xfs
+CFILES = xfs_fsck.c
+LCFLAGS = -s -O3
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR)
+	$(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR)
diff --git a/fsck/xfs_fsck.c b/fsck/xfs_fsck.c
new file mode 100644
index 000000000..edb8746f2
--- /dev/null
+++ b/fsck/xfs_fsck.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/* http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html */
+/*   Unfortunately, we need to be a little more portable.  ;^)   */
+/* This used to be a symlink to /bin/true but that gives a wierd */
+/* dependency problem in a certain package manager.              */
+
+int
+main(int argc, char **argv)
+{
+	return 0;
+}
diff --git a/growfs/Makefile b/growfs/Makefile
new file mode 100644
index 000000000..f0bf7616c
--- /dev/null
+++ b/growfs/Makefile
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_growfs
+CMDDEPS = $(LIBXFS)
+
+CFILES = xfs_growfs.c
+LLDLIBS = $(LIBXFS) $(LIBUUID)
+LSRCFILES = xfs_info.sh
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+	$(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
+	$(INSTALL) -m 755 xfs_info.sh $(XFS_CMDS_BIN_DIR)/xfs_info
diff --git a/growfs/xfs_growfs.c b/growfs/xfs_growfs.c
new file mode 100644
index 000000000..555f924b4
--- /dev/null
+++ b/growfs/xfs_growfs.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libxfs.h>
+#include <mntent.h>
+#include <sys/ioctl.h>
+
+static char	*fname;		/* mount point name */
+static char	*datadev;	/* data device name */
+static char	*logdev;	/*  log device name */
+static char	*rtdev;		/*   RT device name */
+
+static void
+usage(void)
+{
+	fprintf(stderr,
+"Usage: %s [options] mountpoint\n\n\
+Options:\n\
+        -d          grow data/metadata section\n\
+        -l          grow log section\n\
+        -r          grow realtime section\n\
+        -n          don't change anything, just show geometry\n\
+        -i          convert log from external to internal format\n\
+        -t          alternate location for mount table (/etc/mtab)\n\
+        -x          convert log from internal to external format\n\
+        -D size     grow data/metadata section to size blks\n\
+        -L size     grow/shrink log section to size blks\n\
+        -R size     grow realtime section to size blks\n\
+        -e size     set realtime extent size to size blks\n\
+        -m imaxpct  set inode max percent to imaxpct\n\
+        -V          print version information\n",
+		progname);
+	exit(2);
+}
+
+void
+report_info(
+	xfs_fsop_geom_t	geo,
+	char		*mntpoint,
+	int		unwritten,
+	int		dirversion,
+	int		isint)
+{
+	printf("meta-data=%-22s isize=%-6d agcount=%d, agsize=%d blks\n"
+	       "data     =%-22s bsize=%-6d blocks=%lld, imaxpct=%d\n"
+	       "         =%-22s sunit=%-6d swidth=%d blks, unwritten=%d\n"
+	       "naming   =version %-14d bsize=%-6d\n"
+	       "log      =%-22s bsize=%-6d blocks=%d\n"
+	       "realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n",
+	       mntpoint, geo.inodesize, geo.agcount, geo.agblocks,
+	       "", geo.blocksize, geo.datablocks, geo.imaxpct,
+	       "", geo.sunit, geo.swidth, unwritten,
+	       dirversion, geo.dirblocksize,
+	       isint ? "internal" : "external", geo.blocksize, geo.logblocks,
+	       geo.rtblocks ? "external" : "none",
+	       geo.rtextsize * geo.blocksize, geo.rtblocks, geo.rtextents);
+}
+
+void
+explore_mtab(char *mtab, char *mntpoint)
+{
+	struct mntent	*mnt;
+	struct stat64	statuser;
+	struct stat64	statmtab;
+	FILE		*mtp;
+	char		*rtend;
+	char		*logend;
+
+	if ((mtp = setmntent(mtab, "r")) == NULL) {
+		fprintf(stderr, "%s: cannot access mount list %s: %s\n",
+			progname, MOUNTED, strerror(errno));
+		exit(1);
+	}
+	if (stat64(mntpoint, &statuser) < 0) {
+		fprintf(stderr, "%s: cannot access mount point %s: %s\n",
+			progname, mntpoint, strerror(errno));
+		exit(1);
+	}
+
+	while ((mnt = getmntent(mtp)) != NULL) {
+		if (stat64(mnt->mnt_dir, &statmtab) < 0) {
+			fprintf(stderr, "%s: ignoring entry %s in %s: %s\n",
+				progname, mnt->mnt_dir, mtab, strerror(errno));
+			continue;
+		}
+		if (statuser.st_ino != statmtab.st_ino ||
+				statuser.st_dev != statmtab.st_dev)
+			continue;
+		else if (strcmp(mnt->mnt_type, "xfs") != 0) {
+			fprintf(stderr, "%s: %s is not an XFS filesystem\n",
+				progname, mntpoint);
+			exit(1);
+		}
+		break;	/* we've found it */
+	}
+
+	if (mnt == NULL) {
+		fprintf(stderr,
+		"%s: %s is not a filesystem mount point, according to %s\n",
+			progname, mntpoint, MOUNTED);
+		exit(1);
+	}
+
+	/* find the data, log (logdev=), and realtime (rtdev=) devices */
+	rtend = logend = NULL;
+	fname = mnt->mnt_dir;
+	datadev = mnt->mnt_fsname;
+	if (logdev = hasmntopt(mnt, "logdev=")) {
+		logdev += 7;
+		logend = strtok(logdev, " ");
+	}
+	if (rtdev = hasmntopt(mnt, "rtdev=")) {
+		rtdev += 6;
+		rtend = strtok(rtdev, " ");
+	}
+
+	/* Do this only after we've finished processing mount options */
+	if (logdev && logend != logdev)
+		*logend = '\0';	/* terminate end of log device name */
+	if (rtdev && rtend != rtdev)
+		*rtend = '\0';	/* terminate end of rt device name */
+
+	endmntent(mtp);
+}
+
+int
+main(int argc, char **argv)
+{
+	int			aflag;	/* fake flag, do all pieces */
+	int			c;	/* current option character */
+	long long		ddsize;	/* device size in 512-byte blocks */
+	int			dflag;	/* -d flag */
+	int			dirversion; /* directory version number */
+	long long		dlsize;	/* device size in 512-byte blocks */
+	long long		drsize;	/* device size in 512-byte blocks */
+	long long		dsize;	/* new data size in fs blocks */
+	int			error;	/* we have hit an error */
+	long			esize;	/* new rt extent size */
+	int			ffd;	/* mount point file descriptor */
+	xfs_fsop_geom_t		geo;	/* current fs geometry */
+	int			iflag;	/* -i flag */
+	int			isint;	/* log is currently internal */
+	int			lflag;	/* -l flag */
+	long long		lsize;	/* new log size in fs blocks */
+	int			maxpct;	/* -m flag value */
+	int			mflag;	/* -m flag */
+	char			*mtab;	/* mount table file (/etc/mtab) */
+	int			nflag;	/* -n flag */
+	xfs_fsop_geom_t		ngeo;	/* new fs geometry */
+	int			rflag;	/* -r flag */
+	long long		rsize;	/* new rt size in fs blocks */
+	int			unwritten; /* unwritten extent flag */
+	int			xflag;	/* -x flag */
+	libxfs_init_t		xi;	/* libxfs structure */
+
+	mtab = MOUNTED;
+	progname = basename(argv[0]);
+	aflag = dflag = iflag = lflag = mflag = nflag = rflag = xflag = 0;
+	maxpct = esize = 0;
+	dsize = lsize = rsize = 0LL;
+	while ((c = getopt(argc, argv, "dD:e:ilL:m:np:rR:t:xV")) != EOF) {
+		switch (c) {
+		case 'D':
+			dsize = atoll(optarg);
+			/* fall through */
+		case 'd':
+			dflag = 1;
+			break;
+		case 'e':
+			esize = atol(optarg);
+			rflag = 1;
+			break;
+		case 'i':
+			lflag = iflag = 1;
+			break;
+		case 'L':
+			lsize = atoll(optarg);
+			/* fall through */
+		case 'l':
+			lflag = 1;
+			break;
+		case 'm':
+			mflag = 1;
+			maxpct = atoi(optarg);
+			break;
+		case 'n':
+			nflag = 1;
+			break;
+		case 'p':
+			progname = optarg;
+			break;
+		case 'R':
+			rsize = atoll(optarg);
+			/* fall through */
+		case 'r':
+			rflag = 1;
+			break;
+		case 't':
+			mtab = optarg;
+			break;
+		case 'x':
+			lflag = xflag = 1;
+			break;
+		case 'V':
+			printf("%s version %s\n", progname, VERSION);
+			break;
+		case '?':
+		default:
+			usage();
+		}
+	}
+	if (argc - optind != 1)
+		usage();
+	if (iflag && xflag)
+		usage();
+	if (dflag + lflag + rflag == 0)
+		aflag = 1;
+
+	explore_mtab(mtab, argv[optind]);
+
+	ffd = open(fname, O_RDONLY);
+	if (ffd < 0) {
+		perror(fname);
+		return 1;
+	}
+
+	/* get the current filesystem size & geometry */
+	if (ioctl(ffd, XFS_IOC_FSGEOMETRY, &geo) < 0) {
+		fprintf(stderr, "%s: cannot determine geometry of filesystem"
+			" mounted at %s: %s\n",
+			progname, fname, strerror(errno));
+		exit(1);
+	}
+	isint = geo.logstart > 0;
+	unwritten = geo.flags & XFS_FSOP_GEOM_FLAGS_EXTFLG ? 1 : 0;
+	dirversion = geo.flags & XFS_FSOP_GEOM_FLAGS_DIRV2 ? 2 : 1;
+
+	if (nflag) {
+		report_info(geo, fname, unwritten, dirversion, isint);
+		exit(0);
+	}
+
+	/*
+	 * Need root access from here on (using raw devices)...
+	 */
+
+	bzero(&xi, sizeof(xi));
+	xi.dname = datadev;
+	xi.logname = logdev;
+	xi.rtname = rtdev;
+	xi.notvolok = 1;
+	xi.isreadonly = LIBXFS_ISREADONLY;
+
+	if (!libxfs_init(&xi))
+		usage();
+
+	/* check we got the info for all the sections we are trying to modify */
+	if (!xi.ddev) {
+		fprintf(stderr, "%s: failed to access data device for %s\n",
+			progname, fname);
+		exit(1);
+	}
+	if (lflag && !isint && !xi.logdev) {
+		fprintf(stderr, "%s: failed to access external log for %s\n",
+			progname, fname);
+		exit(1);
+	}
+	if (rflag && !xi.rtdev) {
+		fprintf(stderr, "%s: failed to access realtime device for %s\n",
+			progname, fname);
+		exit(1);
+	}
+
+	report_info(geo, fname, unwritten, dirversion, isint);
+
+	ddsize = xi.dsize;
+	dlsize = ( xi.logBBsize? xi.logBBsize :
+			geo.logblocks * (geo.blocksize / BBSIZE) );
+	drsize = xi.rtsize;
+
+	error = 0;
+	if (dflag | aflag) {
+		xfs_growfs_data_t	in;
+		
+		if (!mflag)
+			maxpct = geo.imaxpct;
+		if (!dsize)
+			dsize = ddsize / (geo.blocksize / BBSIZE);
+		else if (dsize > ddsize / (geo.blocksize / BBSIZE)) {
+			fprintf(stderr,
+				"data size %llu too large, maximum is %lld\n",
+				(__u64)dsize, ddsize/(geo.blocksize/BBSIZE));
+			error = 1;
+		}
+		if (!error && dsize < geo.datablocks) {
+			fprintf(stderr, "data size %llu too small,"
+				" old size is %lld\n",
+				(__u64)dsize, geo.datablocks);
+			error = 1;
+		} else if (!error &&
+			   dsize == geo.datablocks && maxpct == geo.imaxpct) {
+			if (dflag)
+				fprintf(stderr,
+					"data size unchanged, skipping\n");
+			if (mflag)
+				fprintf(stderr,
+					"inode max pct unchanged, skipping\n");
+		} else if (!error && !nflag) {
+			in.newblocks = (__u64)dsize;
+			in.imaxpct = (__u32)maxpct;
+			if (ioctl(ffd, XFS_IOC_FSGROWFSDATA, &in) < 0) {
+				if (errno == EWOULDBLOCK)
+					fprintf(stderr,
+				 "%s: growfs operation in progress already\n",
+						progname);
+				else
+					fprintf(stderr,
+				"%s: ioctl failed - XFS_IOC_FSGROWFSDATA: %s\n",
+						progname, strerror(errno));
+				error = 1;
+			}
+		}
+	}
+
+	if (!error && (rflag | aflag)) {
+		xfs_growfs_rt_t	in;
+
+		if (!esize)
+			esize = (__u32)geo.rtextsize;
+		if (!rsize)
+			rsize = drsize / (geo.blocksize / BBSIZE);
+		else if (rsize > drsize / (geo.blocksize / BBSIZE)) {
+			fprintf(stderr,
+			"realtime size %lld too large, maximum is %lld\n",
+				rsize, drsize / (geo.blocksize / BBSIZE));
+			error = 1;
+		}
+		if (!error && rsize < geo.rtblocks) {
+			fprintf(stderr,
+			"realtime size %lld too small, old size is %lld\n",
+				rsize, geo.rtblocks);
+			error = 1;
+		} else if (!error && rsize == geo.rtblocks) {
+			if (rflag)
+				fprintf(stderr,
+					"realtime size unchanged, skipping\n");
+		} else if (!error && !nflag) {
+			in.newblocks = (__u64)rsize;
+			in.extsize = (__u32)esize;
+			if (ioctl(ffd, XFS_IOC_FSGROWFSRT, &in) < 0) {
+				if (errno == EWOULDBLOCK)
+					fprintf(stderr,
+				"%s: growfs operation in progress already\n",
+						progname);
+				else if (errno == ENOSYS)
+					fprintf(stderr,
+				"%s: realtime growth not implemented\n",
+						progname);
+				else
+					fprintf(stderr,
+				"%s: ioctl failed - XFS_IOC_FSGROWFSRT: %s\n",
+						progname, strerror(errno));
+				error = 1;
+			}
+		}
+	}
+
+	if (!error && (lflag | aflag)) {
+		xfs_growfs_log_t	in;
+
+		if (!lsize)
+			lsize = dlsize / (geo.blocksize / BBSIZE);
+		if (iflag)
+			in.isint = 1;
+		else if (xflag)
+			in.isint = 0;
+		else 
+			in.isint = xi.logBBsize == 0;
+		if (lsize == geo.logblocks && (in.isint == isint)) {
+			if (lflag)
+				fprintf(stderr,
+					"log size unchanged, skipping\n");
+		} else if (!nflag) {
+			in.newblocks = (__u32)lsize;
+			if (ioctl(ffd, XFS_IOC_FSGROWFSLOG, &in) < 0) {
+				if (errno == EWOULDBLOCK)
+					fprintf(stderr,
+				"%s: growfs operation in progress already\n",
+						progname);
+				else if (errno == ENOSYS)
+					fprintf(stderr,
+				"%s: log growth not supported yet\n", progname);
+				else
+					fprintf(stderr,
+				"%s: ioctl failed - XFS_IOC_FSGROWFSLOG: %s\n",
+						progname, strerror(errno));
+				error = 1;
+			}
+		}
+	}
+
+	if (ioctl(ffd, XFS_IOC_FSGEOMETRY, &ngeo) < 0) {
+		fprintf(stderr, "%s: ioctl failed - XFS_IOC_FSGEOMETRY: %s\n",
+			progname, strerror(errno));
+		exit(1);
+	}
+	if (geo.datablocks != ngeo.datablocks)
+		printf("data blocks changed from %lld to %lld\n",
+			geo.datablocks, ngeo.datablocks);
+	if (geo.imaxpct != ngeo.imaxpct)
+		printf("inode max percent changed from %d to %d\n",
+			geo.imaxpct, ngeo.imaxpct);
+	if (geo.logblocks != ngeo.logblocks)
+		printf("log blocks changed from %d to %d\n",
+			geo.logblocks, ngeo.logblocks);
+	if ((geo.logstart == 0) != (ngeo.logstart == 0))
+		printf("log changed from %s to %s\n",
+			geo.logstart ? "internal" : "external",
+			ngeo.logstart ? "internal" : "external");
+	if (geo.rtblocks != ngeo.rtblocks)
+		printf("realtime blocks changed from %lld to %lld\n",
+			geo.rtblocks, ngeo.rtblocks);
+	if (geo.rtextsize != ngeo.rtextsize)
+		printf("realtime extent size changed from %d to %d\n",
+			geo.rtextsize, ngeo.rtextsize);
+	exit(0);
+}
diff --git a/growfs/xfs_info.sh b/growfs/xfs_info.sh
new file mode 100755
index 000000000..2b1316f81
--- /dev/null
+++ b/growfs/xfs_info.sh
@@ -0,0 +1,56 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+OPTS=""
+USAGE="Usage: xfs_info [-t mtab] mountpoint"
+
+while getopts "t:" c
+do
+	case $c in
+	t)	OPTS="-t $OPTARG" ;;
+	*)	echo $USAGE 1>&2
+		exit 2
+		;;
+	esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+	1)	xfs_growfs -p xfs_info -n $OPTS $1
+		status=$?
+		;;
+	*)	echo $USAGE 1>&2
+		exit 2
+		;;
+esac
+exit $status
diff --git a/include/Makefile b/include/Makefile
new file mode 100644
index 000000000..60d0a28bd
--- /dev/null
+++ b/include/Makefile
@@ -0,0 +1,52 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+LSRCFILES = libxfs.h acl.h arch.h attributes.h handle.h jdm.h \
+	platform_defs.h.in builddefs.in buildrules \
+	xfs_ag.h xfs_alloc.h xfs_alloc_btree.h xfs_arch.h xfs_attr_leaf.h \
+	xfs_attr_sf.h xfs_bit.h xfs_bmap.h xfs_bmap_btree.h xfs_btree.h \
+	xfs_buf_item.h xfs_cred.h xfs_da_btree.h xfs_dfrag.h xfs_dinode.h \
+	xfs_dir.h xfs_dir2.h xfs_dir2_block.h xfs_dir2_data.h xfs_dir2_leaf.h \
+	xfs_dir2_node.h xfs_dir2_sf.h xfs_dir_leaf.h xfs_dir_sf.h xfs_dqblk.h \
+	xfs_dquot_item.h xfs_extfree_item.h xfs_fs.h xfs_ialloc.h \
+	xfs_ialloc_btree.h xfs_imap.h xfs_inode.h xfs_inode_item.h xfs_inum.h \
+	xfs_log.h xfs_log_priv.h xfs_log_recover.h xfs_mount.h xfs_quota.h \
+	xfs_rtalloc.h xfs_sb.h xfs_trans.h xfs_trans_space.h xfs_types.h
+
+default :
+
+include $(BUILDRULES)
+
+install : default
diff --git a/include/arch.h b/include/arch.h
new file mode 100644
index 000000000..12ce1c5b1
--- /dev/null
+++ b/include/arch.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_ARCH_H__
+#define __XFS_SUPPORT_ARCH_H__
+
+#ifdef __KERNEL__
+
+#include <asm/byteorder.h>
+
+#ifdef __LITTLE_ENDIAN
+# define __BYTE_ORDER	__LITTLE_ENDIAN
+#endif
+#ifdef __BIG_ENDIAN
+# define __BYTE_ORDER	__BIG_ENDIAN
+#endif
+
+#else
+
+#include <linux/byteorder/swab.h>
+
+#endif	/* __KERNEL__ */
+
+/* do we need conversion? */
+
+#define ARCH_NOCONVERT 1
+#if __BYTE_ORDER == __LITTLE_ENDIAN 
+#define ARCH_CONVERT   0
+#else
+#define ARCH_CONVERT   ARCH_NOCONVERT
+#endif
+
+/* generic swapping macros */
+
+#define INT_SWAP16(A) ((typeof(A))(__swab16((__u16)A)))
+#define INT_SWAP32(A) ((typeof(A))(__swab32((__u32)A)))
+#define INT_SWAP64(A) ((typeof(A))(__swab64((__u64)A)))
+
+#define INT_SWAP(type, var) \
+    ((sizeof(type) == 8) ? INT_SWAP64(var) : \
+    ((sizeof(type) == 4) ? INT_SWAP32(var) : \
+    ((sizeof(type) == 2) ? INT_SWAP16(var) : \
+    (var))))
+  
+
+#define INT_SWAP_UNALIGNED_32(from,to) \
+    { \
+        ((__u8*)(to))[0] = ((__u8*)(from))[3]; \
+        ((__u8*)(to))[1] = ((__u8*)(from))[2]; \
+        ((__u8*)(to))[2] = ((__u8*)(from))[1]; \
+        ((__u8*)(to))[3] = ((__u8*)(from))[0]; \
+    }
+
+#define INT_SWAP_UNALIGNED_64(from,to) \
+    { \
+        INT_SWAP_UNALIGNED_32( ((__u8*)(from)) + 4, ((__u8*)(to))); \
+        INT_SWAP_UNALIGNED_32( ((__u8*)(from)), ((__u8*)(to)) + 4); \
+    }
+
+/* 
+ * get and set integers from potentially unaligned locations
+ */
+        
+#define INT_GET_UNALIGNED_16_LE(pointer) \
+   ((__u16)((((__u8*)(pointer))[0]      ) | (((__u8*)(pointer))[1] << 8 )))
+#define INT_GET_UNALIGNED_16_BE(pointer) \
+   ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1])))
+#define INT_SET_UNALIGNED_16_LE(pointer,value) \
+    { \
+        ((__u8*)(pointer))[0] = (((value)     ) & 0xff); \
+        ((__u8*)(pointer))[1] = (((value) >> 8) & 0xff); \
+    }
+#define INT_SET_UNALIGNED_16_BE(pointer,value) \
+    { \
+        ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \
+        ((__u8*)(pointer))[1] = (((value)     ) & 0xff); \
+    }
+   
+#define INT_GET_UNALIGNED_32_LE(pointer) \
+   ((__u32)((((__u8*)(pointer))[0]      ) | (((__u8*)(pointer))[1] << 8 ) \
+           |(((__u8*)(pointer))[2] << 16) | (((__u8*)(pointer))[3] << 24)))
+#define INT_GET_UNALIGNED_32_BE(pointer) \
+   ((__u32)((((__u8*)(pointer))[0] << 24) | (((__u8*)(pointer))[1] << 16) \
+           |(((__u8*)(pointer))[2] << 8)  | (((__u8*)(pointer))[3]      )))
+    
+#define INT_GET_UNALIGNED_64_LE(pointer) \
+   (((__u64)(INT_GET_UNALIGNED_32_LE(((__u8*)(pointer))+4)) << 32 ) \
+   |((__u64)(INT_GET_UNALIGNED_32_LE(((__u8*)(pointer))  ))       ))
+#define INT_GET_UNALIGNED_64_BE(pointer) \
+   (((__u64)(INT_GET_UNALIGNED_32_BE(((__u8*)(pointer))  )) << 32  ) \
+   |((__u64)(INT_GET_UNALIGNED_32_BE(((__u8*)(pointer))+4))        ))
+   
+/*
+ * now pick the right ones for our MACHINE ARCHITECTURE
+ */
+   
+#if __BYTE_ORDER == __LITTLE_ENDIAN 
+#define INT_GET_UNALIGNED_16(pointer)       INT_GET_UNALIGNED_16_LE(pointer)
+#define INT_SET_UNALIGNED_16(pointer,value) INT_SET_UNALIGNED_16_LE(pointer,value)
+#define INT_GET_UNALIGNED_32(pointer)       INT_GET_UNALIGNED_32_LE(pointer)
+#define INT_GET_UNALIGNED_64(pointer)       INT_GET_UNALIGNED_64_LE(pointer)
+#else
+#define INT_GET_UNALIGNED_16(pointer)       INT_GET_UNALIGNED_16_BE(pointer)
+#define INT_SET_UNALIGNED_16(pointer,value) INT_SET_UNALIGNED_16_BE(pointer,value)
+#define INT_GET_UNALIGNED_32(pointer)       INT_GET_UNALIGNED_32_BE(pointer)
+#define INT_GET_UNALIGNED_64(pointer)       INT_GET_UNALIGNED_64_BE(pointer)
+#endif
+
+/* define generic INT_ macros */
+
+#define INT_GET(reference,arch) \
+    (((arch) == ARCH_NOCONVERT) \
+        ? \
+            (reference) \
+        : \
+            INT_SWAP((reference),(reference)) \
+    )
+
+/* does not return a value */   
+#define INT_SET(reference,arch,valueref) \
+    (void)( \
+        ((reference) = (valueref)), \
+        ( \
+           ((arch) != ARCH_NOCONVERT) ? \
+               (reference) = INT_SWAP((reference),(reference)) \
+           : 0 \
+        ) \
+    )
+
+/* does not return a value */   
+#define INT_MOD_EXPR(reference,arch,code) \
+    (void)(((arch) == ARCH_NOCONVERT) \
+        ? \
+            ((reference) code) \
+        : \
+            ( \
+                (reference) = INT_GET((reference),arch) , \
+                ((reference) code), \
+                INT_SET(reference, arch, reference) \
+            ) \
+    )
+    
+/* does not return a value */   
+#define INT_MOD(reference,arch,delta) \
+    (void)( \
+        INT_MOD_EXPR(reference,arch,+=(delta)) \
+    )
+    
+/*
+ * INT_COPY - copy a value between two locations with the
+ *            _same architecture_ but _potentially different sizes_
+ *
+ *          if the types of the two parameters are equal or they are
+ *              in native architecture, a simple copy is done
+ *
+ *          otherwise, architecture conversions are done
+ *
+ */
+    
+/* does not return a value */   
+#define INT_COPY(dst,src,arch) \
+    (void)( \
+        ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \
+            ? \
+                ((dst) = (src)) \
+            : \
+                INT_SET(dst, arch, INT_GET(src, arch)) \
+    )
+    
+/*
+ * INT_XLATE - copy a value in either direction between two locations 
+ *             with different architectures 
+ *
+ *                  dir < 0     - copy from memory to buffer (native to arch)
+ *                  dir > 0     - copy from buffer to memory (arch to native)
+ */
+    
+/* does not return a value */   
+#define INT_XLATE(buf,mem,dir,arch) {\
+    ASSERT(dir); \
+    if (dir>0) { \
+        (mem)=INT_GET(buf, arch); \
+    } else { \
+        INT_SET(buf, arch, mem); \
+    } \
+}
+
+#define INT_ISZERO(reference,arch) \
+    ((reference) == 0)
+    
+#define INT_ZERO(reference,arch) \
+    ((reference) = 0)
+        
+#define INT_GET_UNALIGNED_16_ARCH(pointer,arch) \
+    ( ((arch) == ARCH_NOCONVERT) \
+        ? \
+            (INT_GET_UNALIGNED_16(pointer)) \
+        : \
+            (INT_GET_UNALIGNED_16_BE(pointer)) \
+    )
+#define INT_SET_UNALIGNED_16_ARCH(pointer,value,arch) \
+    if ((arch) == ARCH_NOCONVERT) { \
+        INT_SET_UNALIGNED_16(pointer,value); \
+    } else { \
+        INT_SET_UNALIGNED_16_BE(pointer,value); \
+    }
+
+#endif	/* __XFS_SUPPORT_ARCH_H__ */
diff --git a/include/builddefs.in b/include/builddefs.in
new file mode 100644
index 000000000..0f10b8aaf
--- /dev/null
+++ b/include/builddefs.in
@@ -0,0 +1,173 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+# @configure_input@
+#
+
+ifndef _BUILDDEFS_INCLUDED_
+_BUILDDEFS_INCLUDED_ = 1
+
+DEBUG = @debug_build@
+OPTIMIZER = @opt_build@
+MALLOCLIB = @malloc_lib@
+
+LIBXFS = $(TOPDIR)/libxfs/libxfs.a
+LIBATTR = $(TOPDIR)/libattr/libattr.a
+LIBHANDLE = $(TOPDIR)/handle/libhandle.a
+LIBUUID = /usr/lib/libuuid.a
+LIBLVM = @liblvm@
+
+BUILDRULES = $(TOPDIR)/include/buildrules
+
+# General package information
+TARGET_OS = @host_platform@
+PACKAGE_NAME = @package_name@
+PACKAGE_RELEASE = @package_release@
+PACKAGE_VERSION = @package_version@
+PACKAGE_DISTRIBUTION = @package_distribution@
+PACKAGE_BUILDER	= @package_builder@
+XFS_CMDS_SBIN_DIR = @xfs_cmds_sbin_dir@
+XFS_CMDS_BIN_DIR = @xfs_cmds_bin_dir@
+XFS_CMDS_LIB_DIR = @xfs_cmds_lib_dir@
+XFS_CMDS_SHARE_DIR = @xfs_cmds_share_dir@
+XFS_CMDS_INC_DIR = @xfs_cmds_inc_dir@
+XFS_CMDS_MAN_DIR = @xfs_cmds_man_dir@
+XFS_CMDS_TMP_DIR = @xfs_cmds_tmp_dir@
+XFS_CMDS_DOC_DIR = @xfs_cmds_doc_dir@
+
+# LCFLAGS, LLDFLAGS, LLDLIBS, LSRCFILES and LDIRT may be specified in
+# user Makefiles. Note: LSRCFILES is anything other than Makefile, $(CFILES)
+# $(CXXFILES), or $(HFILES) and is used to construct the manifest list
+# during the "dist" phase (packaging).
+
+CFLAGS += $(OPTIMIZER) $(DEBUG) -funsigned-char -Wall -Wno-parentheses \
+	$(LCFLAGS) -I$(TOPDIR)/include '-DVERSION="$(PACKAGE_VERSION)"' \
+	-D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
+	-DXFS_BIG_FILES=1 -DXFS_BIG_FILESYSTEMS=1 -DHAVE_LIBLVM=@have_liblvm@
+
+LDFLAGS = $(LLDFLAGS)
+LDLIBS = $(LLDLIBS) $(MALLOCLIB)
+
+MAKEOPTS = --no-print-directory
+SRCFILES = Makefile $(HFILES) $(CFILES) $(LSRCFILES) $(LFILES) $(YFILES)
+DIRT = $(LDIRT) dep dep.bak $(OBJECTS) $(CMDTARGET) $(LIBTARGET) \
+	$(STATICLIBTARGET) *.[1-9].gz
+
+OBJECTS = $(ASFILES:.s=.o) \
+          $(CFILES:.c=.o) \
+          $(LFILES:.l=.o) \
+          $(YFILES:%.y=%.tab.o)
+
+MAKE	= @make@
+CC	= @cc@
+LD	= @ld@
+AWK	= @awk@
+SED	= @sed@
+INSTALL	= $(TOPDIR)/install-sh -o root -g root
+ECHO	= @echo@
+LN_S	= @LN_S@
+
+CCF	= $(CC) $(CFLAGS)
+MAKEF	= $(MAKE) $(MAKEOPTS)
+CXXF	= $(CXX) $(CXXFLAGS)
+LDF	= $(LD) $(LDFLAGS)
+MAKEDEPEND  = @makedepend@
+
+ZIP	= @zip@
+TAR	= @tar@
+RPM	= @rpm@
+RPM_VERSION = @rpm_version@
+
+HAVE_ZIPPED_MANPAGES = @have_zipped_manpages@
+
+SHELL = /bin/sh
+IMAGES_DIR = $(TOPDIR)/all-images
+DIST_DIR = $(TOPDIR)/dist
+
+SUBDIRS_MAKERULE = \
+	@for d in $(SUBDIRS) ""; do \
+	    if test -d "$$d" -a ! -z "$$d"; then \
+		$(ECHO) === $$d ===; \
+		$(MAKEF) -C $$d $@ || exit $$?; \
+	    fi; \
+	done
+
+MAN_MAKERULE = \
+    @for f in *.[12345678] ""; do \
+	if test ! -z "$$f"; then \
+	    $(ZIP) --best -c < $$f > $$f.gz; \
+	fi; \
+    done
+
+INSTALL_MAN = \
+    @for d in $(MAN_PAGES); do \
+	first=true; \
+	for m in `$(AWK) '/^\.SH NAME/ {ok=1; next} ok {print; exit}' $$d \
+	| sed -e 's/,/ /g' -e 's/\\-.*//' -e 's/\\\f[0-9]//g' -e 's/  / /g;q'`; \
+	do \
+	    [ -z "$$m" -o "$$m" = "\\" ] && continue; \
+	    t=$(MAN_DEST)/$$m.$(MAN_SECTION); \
+	    if $$first; then \
+		if $(HAVE_ZIPPED_MANPAGES); then \
+		    $(ZIP) --best -c $$d > $$d.gz; _sfx=.gz; \
+		fi; \
+		u=$$m.$(MAN_SECTION)$$_sfx; \
+		echo $(INSTALL) -m 644 $${d}$$_sfx $${t}$$_sfx; \
+		$(INSTALL) -m 644 $${d}$$_sfx $${t}$$_sfx; \
+	    else \
+		echo $(INSTALL) -S $$u $${t}$$_sfx; \
+		$(INSTALL) -S $$u $${t}$$_sfx; \
+	    fi; \
+	    first=false; \
+	done; \
+    done
+
+DIST_MAKERULE = \
+	$(MAKEF) -C build dist
+
+SOURCE_MAKERULE = \
+	@test -z "$$DIR" && DIR="."; \
+	for f in $(SRCFILES) ""; do \
+	    if test ! -z "$$f"; then $(ECHO) $$DIR/$$f; fi;\
+	done; \
+	for d in `echo $(SUBDIRS)` ; do \
+	    if test -d "$$d" -a ! -z "$$d"; then \
+		$(MAKEF) DIR=$$DIR/$$d -C $$d $@ || exit $$?; \
+	    fi; \
+	done
+
+endif
+
+#
+# For targets that should always be rebuilt,
+# define a target that is never up-to-date.
+# Targets needing this should depend on $(_FORCE)
+_FORCE = __force_build
diff --git a/include/buildrules b/include/buildrules
new file mode 100644
index 000000000..af2a7caca
--- /dev/null
+++ b/include/buildrules
@@ -0,0 +1,76 @@
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as published
+# by the Free Software Fondation.
+# 
+# This program is distributed in the hope that it would be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  Further, any license provided herein,
+# whether implied or otherwise, is limited to this program in accordance with
+# the express provisions of the GNU General Public License.  Patent licenses,
+# if any, provided herein do not apply to combinations of this program with
+# other product or programs, or any other product whatsoever.  This program is
+# distributed without any warranty that the program is delivered free of the
+# rightful claim of any third person by way of infringement or the like.  See
+# the GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write the Free Software Foundation, Inc., 59 Temple
+# Place - Suite 330, Boston MA 02111-1307, USA.
+# Common build rules for gmake
+#
+ifndef _BUILDRULES_INCLUDED_
+_BUILDRULES_INCLUDED_ = 1
+
+include $(TOPDIR)/include/builddefs
+
+#
+# Standard targets
+#
+ifdef CMDTARGET
+$(CMDTARGET) : $(SUBDIRS) $(OBJECTS) $(CMDDEPS)
+	$(CCF) -o $(CMDTARGET) $(LDFLAGS) $(OBJECTS) $(LDLIBS) 
+endif
+
+ifdef LIBTARGET
+$(LIBTARGET) : $(SUBDIRS) $(OBJECTS)
+	$(CC) $(LDFLAGS) -shared -Wl,-soname,$(LIBTARGET) -o $(LIBTARGET) \
+		$(OBJECTS) $(LDLIBS) $(LIB_FOR_DLOPEN) $(LIB_FOR_BASENAME)
+endif
+
+ifdef STATICLIBTARGET
+$(STATICLIBTARGET) : $(SUBDIRS) $(OBJECTS)
+	$(AR) crf $(STATICLIBTARGET) $?
+endif
+
+clean clobber : $(SUBDIRS)
+	rm -f $(DIRT)
+	$(SUBDIRS_MAKERULE)
+
+# Never blow away subdirs
+ifdef SUBDIRS
+.PRECIOUS: $(SUBDIRS)
+$(SUBDIRS):
+	$(SUBDIRS_MAKERULE)
+endif
+
+source :
+	$(SOURCE_MAKERULE)
+
+endif
+
+$(_FORCE):
+
+.PHONY : depend
+
+depend : $(CFILES) $(HFILES)
+	$(SUBDIRS_MAKERULE)
+	touch dep
+	$(MAKEDEPEND) -fdep -- $(CFLAGS) -- $(CFILES)
+
+# Include dep, but only if it exists
+ifeq ($(shell test -f dep && echo dep), dep)
+include dep
+endif
diff --git a/include/handle.h b/include/handle.h
new file mode 100644
index 000000000..0400a7046
--- /dev/null
+++ b/include/handle.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __HANDLE_H__
+#define __HANDLE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int  path_to_handle (char *__path, void **__hanp, size_t *__hlen);
+extern int  path_to_fshandle (char *__path, void **__hanp, size_t *__hlen);
+extern int  fd_to_handle (int __fd, void **__hanp, size_t *__hlen);
+extern int  handle_to_fshandle (void *__hanp, size_t __hlen, void **__fshanp,
+				size_t *__fshlen);
+extern void free_handle (void *__hanp, size_t __hlen);
+extern int  open_by_handle (void *__hanp, size_t __hlen, int __rw);
+extern int  readlink_by_handle (void *__hanp, size_t __hlen, void *__buf,
+				size_t __bs);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* __HANDLE_H__ */
diff --git a/include/jdm.h b/include/jdm.h
new file mode 100644
index 000000000..3d2012018
--- /dev/null
+++ b/include/jdm.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __JDM_H__
+#define __JDM_H__
+
+typedef int	intgen_t;
+typedef void	jdm_fshandle_t;
+
+struct xfs_bstat;
+extern jdm_fshandle_t *jdm_getfshandle	(char *mntpnt);
+extern intgen_t jdm_open	(jdm_fshandle_t *fsh, struct xfs_bstat *sp,
+				 intgen_t oflags);
+extern intgen_t jdm_readlink	(jdm_fshandle_t *fsh, struct xfs_bstat *sp,
+				 char *bufp, size_t bufsz);
+
+#ifdef EXTATTR
+
+struct attrlist_cursor;
+extern intgen_t jdm_attr_multi	(jdm_fshandle_t *fsh, struct xfs_bstat *sp,
+				 char *bufp, int rtrvcnt, int flags);
+extern intgen_t	jdm_attr_list	(jdm_fshandle_t *fsh, struct xfs_bstat *sp,
+				 char *bufp, size_t bufsz, int flags, 
+				 struct attrlist_cursor *cursor);
+#endif	/* EXTATTR */
+
+/* macro for determining the size of a structure member */
+#define sizeofmember( t, m )	sizeof( ( ( t * )0 )->m )
+
+/* macro for calculating the offset of a structure member */
+#define offsetofmember( t, m )	( ( size_t )( char * )&( ( ( t * )0 )->m ) )
+
+#endif	/* __JDM_H__ */
diff --git a/include/libxfs.h b/include/libxfs.h
new file mode 100644
index 000000000..78e597846
--- /dev/null
+++ b/include/libxfs.h
@@ -0,0 +1,474 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __LIBXFS_H__
+#define __LIBXFS_H__
+
+#include "platform_defs.h"
+
+#include <uuid/uuid.h>
+#include <xfs_fs.h>
+#include <xfs_types.h>
+#include <arch.h>
+#include <xfs_arch.h>
+#include <xfs_sb.h>
+#include <xfs_bit.h>
+#include <xfs_inum.h>
+#include <xfs_ag.h>
+#include <xfs_da_btree.h>
+#include <xfs_bmap_btree.h>
+#include <xfs_alloc_btree.h>
+#include <xfs_ialloc_btree.h>
+#include <xfs_alloc.h>
+#include <xfs_ialloc.h>
+#include <xfs_rtalloc.h>
+#include <xfs_btree.h>
+#include <xfs_dir.h>
+#include <xfs_dir_sf.h>
+#include <xfs_dir_leaf.h>
+#include <xfs_dir2.h>
+#include <xfs_dir2_data.h>
+#include <xfs_dir2_leaf.h>
+#include <xfs_dir2_block.h>
+#include <xfs_dir2_node.h>
+#include <xfs_dir2_sf.h>
+#include <xfs_attr_sf.h>
+#include <xfs_dinode.h>
+#include <xfs_attr_leaf.h>
+#include <xfs_quota.h>
+#include <xfs_dqblk.h>
+#include <xfs_mount.h>
+#include <xfs_trans_space.h>
+#include <xfs_inode.h>
+#include <xfs_buf_item.h>
+#include <xfs_inode_item.h>
+#include <xfs_cred.h>
+#include <xfs_bmap.h>
+#include <xfs_imap.h>
+#include <xfs_log.h>
+#include <xfs_log_priv.h>
+
+/*
+ * Argument structure for libxfs_init().
+ */
+typedef struct {
+                                /* input parameters */
+        char            *volname;       /* pathname of volume */
+        char            *dname;         /* pathname of data "subvolume" */
+        char            *logname;       /* pathname of log "subvolume" */
+        char            *rtname;        /* pathname of realtime "subvolume" */
+        int             isreadonly;     /* filesystem is only read in applic */
+        int             disfile;        /* data "subvolume" is a regular file */        int             dcreat;         /* try to create data subvolume */
+        int             lisfile;        /* log "subvolume" is a regular file */
+        int             lcreat;         /* try to create log subvolume */
+        int             risfile;        /* realtime "subvolume" is a reg file */        int             rcreat;         /* try to create realtime subvolume */
+        char            *notvolmsg;     /* format string for not XLV message */
+        int             notvolok;       /* set if not XLV => try data */
+                                /* output results */
+        dev_t           ddev;           /* device for data subvolume */
+        dev_t           logdev;         /* device for log subvolume */
+        dev_t           rtdev;          /* device for realtime subvolume */
+        long long       dsize;          /* size of data subvolume (BBs) */
+        long long       logBBsize;      /* size of log subvolume (BBs) */
+                                        /* (blocks allocated for use as 
+                                         * log is stored in mount structure) */
+        long long       logBBstart;     /* start block of log subvolume (BBs) */        long long       rtsize;         /* size of realtime subvolume (BBs) */
+        int             dfd;            /* data subvolume file descriptor */
+        int             logfd;          /* log subvolume file descriptor */
+        int             rtfd;           /* realtime subvolume file descriptor */
+} libxfs_init_t;
+
+#define LIBXFS_ISREADONLY	0x0069	/* disallow all mounted filesystems */
+#define LIBXFS_ISINACTIVE	0x6900	/* allow mounted only if mounted ro */
+
+extern char	*progname;
+extern int	libxfs_init (libxfs_init_t *);
+extern int	libxfs_device_to_fd (dev_t);
+extern dev_t	libxfs_device_open (char *, int, int);
+extern void	libxfs_device_zero (dev_t, xfs_daddr_t, uint);
+extern void	libxfs_device_close (dev_t);
+
+/* check or write log footer: specify device, log size in blocks & uuid */
+extern int	libxfs_log_clear (dev_t, xfs_daddr_t, uint, uuid_t *, int);
+
+/* 
+ * Define a user-level mount structure with all we need
+ * in order to make use of the numerous XFS_* macros.
+ */
+struct xfs_inode;
+typedef struct xfs_mount {
+	xfs_sb_t		m_sb;		/* copy of fs superblock */
+	int			m_bsize;	/* fs logical block size */
+	xfs_agnumber_t		m_agfrotor;	/* last ag where space found */
+	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
+	uint			m_rsumlevels;	/* rt summary levels */
+	uint			m_rsumsize;	/* size of rt summary, bytes */
+	struct xfs_inode	*m_rbmip;	/* pointer to bitmap inode */
+	struct xfs_inode	*m_rsumip;	/* pointer to summary inode */
+	struct xfs_inode	*m_rootip;	/* pointer to root directory */
+	dev_t			m_dev;
+	dev_t			m_logdev;
+	dev_t			m_rtdev;
+	__uint8_t		m_dircook_elog;	/* log d-cookie entry bits */
+	__uint8_t		m_blkbit_log;	/* blocklog + NBBY */
+	__uint8_t		m_blkbb_log;	/* blocklog - BBSHIFT */
+	__uint8_t		m_agno_log;	/* log #ag's */
+	__uint8_t		m_agino_log;	/* #bits for agino in inum */
+	__uint16_t		m_inode_cluster_size;/* min inode buf size */
+	uint			m_blockmask;	/* sb_blocksize-1 */
+	uint			m_blockwsize;	/* sb_blocksize in words */
+	uint			m_blockwmask;	/* blockwsize-1 */
+	uint			m_alloc_mxr[2];	/* XFS_ALLOC_BLOCK_MAXRECS */
+	uint			m_alloc_mnr[2];	/* XFS_ALLOC_BLOCK_MINRECS */
+	uint			m_bmap_dmxr[2];	/* XFS_BMAP_BLOCK_DMAXRECS */
+	uint			m_bmap_dmnr[2];	/* XFS_BMAP_BLOCK_DMINRECS */
+	uint			m_inobt_mxr[2];	/* XFS_INOBT_BLOCK_MAXRECS */
+	uint			m_inobt_mnr[2];	/* XFS_INOBT_BLOCK_MINRECS */
+	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */
+	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
+	uint			m_in_maxlevels;	/* XFS_IN_MAXLEVELS */
+	xfs_perag_t		*m_perag;	/* per-ag accounting info */
+	uint			m_flags;	/* global mount flags */
+	uint			m_qflags;	/* quota status flags */
+	uint			m_attroffset;	/* inode attribute offset */
+	int			m_da_node_ents;	/* how many entries in danode */
+	int			m_ialloc_inos;	/* inodes in inode allocation */
+	int			m_ialloc_blks;	/* blocks in inode allocation */
+	int			m_litino;	/* size of inode union area */
+	int			m_inoalign_mask;/* mask sb_inoalignmt if used */
+	xfs_trans_reservations_t m_reservations;/* precomputed res values */
+	__uint64_t		m_maxicount;	/* maximum inode count */
+	int			m_dalign;	/* stripe unit */
+	int			m_swidth;	/* stripe width */
+	int			m_sinoalign;	/* stripe unit inode alignmnt */
+	int			m_dir_magicpct;	/* 37% of the dir blocksize */
+	__uint8_t		m_dirversion;	/* 1 or 2 */
+	int			m_dirblksize;	/* directory block sz--bytes */
+	int			m_dirblkfsbs;	/* directory block sz--fsbs */
+	xfs_dablk_t		m_dirdatablk;	/* blockno of dir data v2 */
+	xfs_dablk_t		m_dirleafblk;	/* blockno of dir non-data v2 */
+	xfs_dablk_t		m_dirfreeblk;	/* blockno of dirfreeindex v2 */
+} xfs_mount_t;
+
+
+extern xfs_mount_t	*libxfs_mount (xfs_mount_t *, xfs_sb_t *,
+				dev_t, dev_t, dev_t, int);
+extern void	libxfs_mount_common (xfs_mount_t *, xfs_sb_t *);
+extern void	libxfs_umount (xfs_mount_t *);
+extern int	libxfs_rtmount_init (xfs_mount_t *);
+extern void	libxfs_alloc_compute_maxlevels (xfs_mount_t *);
+extern void	libxfs_bmap_compute_maxlevels (xfs_mount_t *, int);
+extern void	libxfs_ialloc_compute_maxlevels (xfs_mount_t *);
+extern void	libxfs_trans_init (xfs_mount_t *);
+
+
+/*
+ * Simple I/O interface
+ */
+typedef struct xfs_buf {
+	xfs_daddr_t	b_blkno;
+	unsigned	b_bcount;
+	dev_t		b_dev;
+	void		*b_fsprivate;
+	void		*b_fsprivate2;
+	void		*b_fsprivate3;
+	char		*b_addr;
+	/* b_addr must be the last field */
+} xfs_buf_t;
+#define XFS_BUF_PTR(bp)			((bp)->b_addr)
+#define xfs_buf_offset(bp, offset)	(XFS_BUF_PTR(bp) + (offset))
+#define XFS_BUF_ADDR(bp)		((bp)->b_blkno)
+#define XFS_BUF_COUNT(bp)		((bp)->b_bcount)
+#define XFS_BUF_TARGET(bp)		((bp)->b_dev)
+#define XFS_BUF_SET_PTR(bp,p,cnt)	((bp)->b_addr = (char *)(p)); \
+						XFS_BUF_SETCOUNT(bp,cnt)
+#define XFS_BUF_SET_ADDR(bp,blk)	((bp)->b_blkno = (blk))
+#define XFS_BUF_SETCOUNT(bp,cnt)	((bp)->b_bcount = (cnt))
+
+#define XFS_BUF_FSPRIVATE(bp,type)	((type)(bp)->b_fsprivate)
+#define XFS_BUF_SET_FSPRIVATE(bp,val)	(bp)->b_fsprivate = (void *)(val)
+#define XFS_BUF_FSPRIVATE2(bp,type)	((type)(bp)->b_fsprivate2)
+#define XFS_BUF_SET_FSPRIVATE2(bp,val)	(bp)->b_fsprivate2 = (void *)(val)
+#define XFS_BUF_FSPRIVATE3(bp,type)	((type)(bp)->b_fsprivate3)
+#define XFS_BUF_SET_FSPRIVATE3(bp,val)	(bp)->b_fsprivate3 = (void *)(val)
+
+extern xfs_buf_t	*libxfs_getbuf (dev_t, xfs_daddr_t, int);
+extern xfs_buf_t	*libxfs_readbuf (dev_t, xfs_daddr_t, int, int);
+extern xfs_buf_t	*libxfs_getsb (xfs_mount_t *, int);
+extern int	libxfs_readbufr (dev_t, xfs_daddr_t, xfs_buf_t *, int, int);
+extern int	libxfs_writebuf (xfs_buf_t *, int);
+extern int	libxfs_writebuf_int (xfs_buf_t *, int);
+extern void	libxfs_putbuf (xfs_buf_t *);
+
+
+/*
+ * Transaction interface
+ */
+
+typedef struct xfs_log_item {
+	struct xfs_log_item_desc	*li_desc;	/* ptr to current desc*/
+	struct xfs_mount		*li_mountp;	/* ptr to fs mount */
+	uint				li_type;	/* item type */
+} xfs_log_item_t;
+
+typedef struct xfs_inode_log_item {
+	xfs_log_item_t		ili_item;		/* common portion */
+	struct xfs_inode	*ili_inode;		/* inode pointer */
+	unsigned short		ili_flags;		/* misc flags */
+	unsigned int		ili_last_fields;	/* fields when flushed*/
+	xfs_inode_log_format_t	ili_format;		/* logged structure */
+} xfs_inode_log_item_t;
+
+typedef struct xfs_buf_log_item {
+	xfs_log_item_t		bli_item;	/* common item structure */
+	struct xfs_buf		*bli_buf;	/* real buffer pointer */
+	unsigned int		bli_flags;	/* misc flags */
+	unsigned int		bli_recur;	/* recursion count */
+	xfs_buf_log_format_t	bli_format;	/* in-log header */
+} xfs_buf_log_item_t;
+
+#include <xfs_trans.h>
+
+typedef struct xfs_trans {
+	unsigned int	t_type;			/* transaction type */
+	xfs_mount_t	*t_mountp;		/* ptr to fs mount struct */
+	unsigned int	t_flags;		/* misc flags */
+	long		t_icount_delta;		/* superblock icount change */
+	long		t_ifree_delta;		/* superblock ifree change */
+	long		t_fdblocks_delta;	/* superblock fdblocks chg */
+	long		t_frextents_delta;	/* superblock freextents chg */
+	unsigned int	t_items_free;		/* log item descs free */
+	xfs_log_item_chunk_t	t_items;	/* first log item desc chunk */
+} xfs_trans_t;
+
+extern xfs_trans_t	*libxfs_trans_alloc (xfs_mount_t *, int);
+extern xfs_trans_t	*libxfs_trans_dup (xfs_trans_t *);
+extern int	libxfs_trans_reserve (xfs_trans_t *, uint,uint,uint,uint,uint);
+extern int	libxfs_trans_commit (xfs_trans_t *, uint, xfs_lsn_t *);
+extern void	libxfs_trans_cancel (xfs_trans_t *, int);
+extern void	libxfs_mod_sb (xfs_trans_t *, __int64_t);
+
+extern int	libxfs_trans_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t,
+				uint, struct xfs_inode **);
+extern void	libxfs_trans_iput(xfs_trans_t *, struct xfs_inode *, uint);
+extern void	libxfs_trans_ijoin (xfs_trans_t *, struct xfs_inode *, uint);
+extern void	libxfs_trans_ihold (xfs_trans_t *, struct xfs_inode *);
+extern void	libxfs_trans_log_inode (xfs_trans_t *, struct xfs_inode *,
+				uint);
+
+extern void	libxfs_trans_brelse (xfs_trans_t *, struct xfs_buf *);
+extern void	libxfs_trans_binval (xfs_trans_t *, struct xfs_buf *);
+extern void	libxfs_trans_bjoin (xfs_trans_t *, struct xfs_buf *);
+extern void	libxfs_trans_bhold (xfs_trans_t *, struct xfs_buf *);
+extern void	libxfs_trans_log_buf (xfs_trans_t *, struct xfs_buf *,
+				uint, uint);
+extern xfs_buf_t	*libxfs_trans_get_buf (xfs_trans_t *, dev_t,
+				xfs_daddr_t, int, uint);
+extern int	libxfs_trans_read_buf (xfs_mount_t *, xfs_trans_t *, dev_t,
+				xfs_daddr_t, int, uint, struct xfs_buf **);
+
+
+/*
+ * Simple memory interface
+ */
+typedef struct xfs_zone {
+	int	zone_unitsize;  /* Size in bytes of zone unit           */
+	char	*zone_name;     /* tag name                             */
+        int     allocated;      /* debug: How many currently allocated  */
+} xfs_zone_t;
+
+extern xfs_zone_t	*libxfs_zone_init (int, char *);
+extern void	*libxfs_zone_zalloc (xfs_zone_t *);
+extern void	libxfs_zone_free (xfs_zone_t *, void *);
+extern void	*libxfs_malloc (size_t);
+extern void	libxfs_free (void *);
+extern void	*libxfs_realloc (void *, size_t);
+
+
+/*
+ * Inode interface
+ */
+struct xfs_inode_log_item;
+typedef struct xfs_inode {
+	xfs_mount_t		*i_mount;	/* fs mount struct ptr */
+	xfs_ino_t		i_ino;		/* inode number (agno/agino) */
+	xfs_daddr_t		i_blkno;	/* blkno of inode buffer */
+	dev_t			i_dev;		/* dev for this inode */
+	ushort			i_len;		/* len of inode buffer */
+	ushort			i_boffset;	/* off of inode in buffer */
+	xfs_ifork_t		*i_afp;		/* attribute fork pointer */
+	xfs_ifork_t		i_df;		/* data fork */
+	struct xfs_trans	*i_transp;	/* ptr to owning transaction */
+	struct xfs_inode_log_item *i_itemp;	/* logging information */
+	unsigned int		i_delayed_blks;	/* count of delay alloc blks */
+	xfs_dinode_core_t	i_d;		/* most of ondisk inode */
+} xfs_inode_t;
+
+extern int	libxfs_inode_alloc (xfs_trans_t **, xfs_inode_t *, mode_t,
+				ushort, dev_t, cred_t *, xfs_inode_t **);
+extern void	libxfs_trans_inode_alloc_buf (xfs_trans_t *, xfs_buf_t *);
+
+extern void	libxfs_idata_realloc (xfs_inode_t *, int, int);
+extern int	libxfs_iread (xfs_mount_t *, xfs_trans_t *, xfs_ino_t,
+				xfs_inode_t **, xfs_daddr_t);
+extern void	libxfs_ichgtime (xfs_inode_t *, int);
+extern int	libxfs_iflush_int (xfs_inode_t *, xfs_buf_t *);
+extern int	libxfs_itobp (xfs_mount_t *, xfs_trans_t *, xfs_inode_t *,
+				xfs_dinode_t **, xfs_buf_t **, xfs_daddr_t);
+extern int	libxfs_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t,
+				uint, xfs_inode_t **, xfs_daddr_t);
+extern void	libxfs_iput (xfs_inode_t *, uint);
+
+
+/*
+ * Directory interface
+ */
+extern void	libxfs_dir_mount (xfs_mount_t *);
+extern void	libxfs_dir2_mount (xfs_mount_t *);
+extern int	libxfs_dir_init (xfs_trans_t *, xfs_inode_t *, xfs_inode_t *);
+extern int	libxfs_dir2_init (xfs_trans_t *, xfs_inode_t *, xfs_inode_t *);
+extern int	libxfs_dir_createname (xfs_trans_t *, xfs_inode_t *, char *,
+				int, xfs_ino_t, xfs_fsblock_t *,
+				xfs_bmap_free_t *, xfs_extlen_t);
+extern int	libxfs_dir2_createname (xfs_trans_t *, xfs_inode_t *, char *,
+				int, xfs_ino_t, xfs_fsblock_t *,
+				xfs_bmap_free_t *, xfs_extlen_t);
+extern int	libxfs_dir_lookup (xfs_trans_t *, xfs_inode_t *,
+				char *, int, xfs_ino_t *);
+extern int	libxfs_dir2_lookup (xfs_trans_t *, xfs_inode_t *,
+				char *, int, xfs_ino_t *);
+extern int	libxfs_dir_replace (xfs_trans_t *, xfs_inode_t *,
+				char *, int, xfs_ino_t, xfs_fsblock_t *,
+				xfs_bmap_free_t *, xfs_extlen_t);
+extern int	libxfs_dir2_replace (xfs_trans_t *, xfs_inode_t *,
+				char *, int, xfs_ino_t, xfs_fsblock_t *,
+				xfs_bmap_free_t *, xfs_extlen_t);
+extern int	libxfs_dir_removename (xfs_trans_t *, xfs_inode_t *,
+				char *, int, xfs_ino_t, xfs_fsblock_t *,
+				xfs_bmap_free_t *, xfs_extlen_t);
+extern int	libxfs_dir2_removename (xfs_trans_t *, xfs_inode_t *,
+				char *, int, xfs_ino_t, xfs_fsblock_t *,
+				xfs_bmap_free_t *, xfs_extlen_t);
+extern int	libxfs_dir_bogus_removename (xfs_trans_t *, xfs_inode_t *,
+				char *, xfs_fsblock_t *, xfs_bmap_free_t *,
+				xfs_extlen_t, xfs_dahash_t, int);
+extern int	libxfs_dir2_bogus_removename (xfs_trans_t *, xfs_inode_t *,
+				char *, xfs_fsblock_t *, xfs_bmap_free_t *,
+				xfs_extlen_t, xfs_dahash_t, int);
+
+
+/*
+ * Block map interface
+ */
+extern int	libxfs_bmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t,
+				xfs_filblks_t, int, xfs_fsblock_t *,
+				xfs_extlen_t, xfs_bmbt_irec_t *, int *,
+				xfs_bmap_free_t *);
+extern int	libxfs_bmap_finish (xfs_trans_t **, xfs_bmap_free_t *,
+				xfs_fsblock_t, int *);
+extern int	libxfs_bmap_next_offset (xfs_trans_t *, xfs_inode_t *,
+				xfs_fileoff_t *, int);
+extern int	libxfs_bunmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t,
+				xfs_filblks_t, int, xfs_extnum_t,
+				xfs_fsblock_t *, xfs_bmap_free_t *, int *);
+extern void	libxfs_bmap_del_free (xfs_bmap_free_t *,
+				xfs_bmap_free_item_t *, xfs_bmap_free_item_t *);
+
+
+/*
+ * All other routines we want to keep common...
+ */
+
+extern int	libxfs_highbit32 (__uint32_t);
+extern int	libxfs_highbit64 (__uint64_t);
+extern uint	libxfs_da_log2_roundup (uint);
+
+extern void	libxfs_xlate_sb (void *, xfs_sb_t *, int, xfs_arch_t,
+				__int64_t);
+extern void	libxfs_xlate_dinode_core (xfs_caddr_t buf,
+				xfs_dinode_core_t *, int, xfs_arch_t);
+
+extern int	libxfs_alloc_fix_freelist (xfs_alloc_arg_t *, int);
+extern int	libxfs_alloc_file_space (xfs_inode_t *, xfs_off_t,
+				xfs_off_t, int, int);
+
+extern xfs_dahash_t	libxfs_da_hashname (char *, int);
+extern int	libxfs_attr_leaf_newentsize (xfs_da_args_t *, int, int *);
+
+extern xfs_filblks_t	libxfs_bmbt_get_blockcount (xfs_bmbt_rec_t *);
+extern xfs_fileoff_t	libxfs_bmbt_get_startoff (xfs_bmbt_rec_t *);
+extern void	libxfs_bmbt_get_all (xfs_bmbt_rec_t *, xfs_bmbt_irec_t *);
+
+extern int	libxfs_free_extent (xfs_trans_t *, xfs_fsblock_t, xfs_extlen_t);
+extern int	libxfs_rtfree_extent (xfs_trans_t *, xfs_rtblock_t,
+				xfs_extlen_t);
+
+/* Directory/Attribute routines used by xfs_repair */
+extern void	libxfs_da_bjoin (xfs_trans_t *, xfs_dabuf_t *);
+extern int	libxfs_da_shrink_inode (xfs_da_args_t *, xfs_dablk_t,
+				xfs_dabuf_t *);
+extern int	libxfs_da_grow_inode (xfs_da_args_t *, xfs_dablk_t *);
+extern void	libxfs_da_bhold (xfs_trans_t *, xfs_dabuf_t *);
+extern void	libxfs_da_brelse (xfs_trans_t *, xfs_dabuf_t *);
+extern int	libxfs_da_read_bufr (xfs_trans_t *, xfs_inode_t *, xfs_dablk_t,
+				xfs_daddr_t, xfs_dabuf_t **, int);
+extern int	libxfs_da_read_buf (xfs_trans_t *, xfs_inode_t *,
+				xfs_dablk_t, xfs_daddr_t, xfs_dabuf_t **, int);
+extern int	libxfs_da_get_buf (xfs_trans_t *, xfs_inode_t *,
+				xfs_dablk_t, xfs_daddr_t, xfs_dabuf_t **, int);
+extern void	libxfs_da_log_buf (xfs_trans_t *, xfs_dabuf_t *, uint, uint);
+extern int	libxfs_dir2_shrink_inode (xfs_da_args_t *, xfs_dir2_db_t,
+				xfs_dabuf_t *);
+extern int	libxfs_dir2_grow_inode (xfs_da_args_t *, int, xfs_dir2_db_t *);
+extern int	libxfs_dir2_isleaf (xfs_trans_t *, xfs_inode_t *, int *);
+extern int	libxfs_dir2_isblock (xfs_trans_t *, xfs_inode_t *, int *);
+extern void	libxfs_dir2_data_use_free (xfs_trans_t *, xfs_dabuf_t *,
+				xfs_dir2_data_unused_t *, xfs_dir2_data_aoff_t,
+				xfs_dir2_data_aoff_t, int *, int *);
+extern void	libxfs_dir2_data_make_free (xfs_trans_t *, xfs_dabuf_t *,
+				xfs_dir2_data_aoff_t, xfs_dir2_data_aoff_t,
+				int *, int *);
+extern void	libxfs_dir2_data_log_entry (xfs_trans_t *, xfs_dabuf_t *,
+				xfs_dir2_data_entry_t *);
+extern void	libxfs_dir2_data_log_header (xfs_trans_t *, xfs_dabuf_t *);
+extern void	libxfs_dir2_data_freescan (xfs_mount_t *, xfs_dir2_data_t *,
+				int *, char *);
+extern void	libxfs_dir2_free_log_bests (xfs_trans_t *, xfs_dabuf_t *,
+				int, int);
+
+/* Shared utility routines */
+extern unsigned int	libxfs_log2_roundup(unsigned int i);
+
+
+/* ick */
+extern __inline__ __const__ __u64 __fswab64 (__u64 x);
+
+#endif	/* __LIBXFS_H__ */
diff --git a/include/platform_defs.h.in b/include/platform_defs.h.in
new file mode 100644
index 000000000..9f3437529
--- /dev/null
+++ b/include/platform_defs.h.in
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *
+ * @configure_input@
+ */
+#ifndef __XFS_PLATFORM_DEFS_H__
+#define __XFS_PLATFORM_DEFS_H__
+
+#include <stdio.h>
+#include <assert.h>
+#include <endian.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <sys/types.h>
+
+#if (__GLIBC__ <= 2) && (__GLIBC_MINOR__ <= 1)
+# define constpp	const char * const *
+#else
+# define constpp	char * const *
+#endif
+
+typedef loff_t		xfs_off_t;
+typedef __uint64_t	xfs_ino_t;
+typedef __uint32_t	xfs_dev_t;
+typedef __int64_t	xfs_daddr_t;
+typedef char*		xfs_caddr_t;
+
+/* long and pointer must be either 32 bit or 64 bit */
+#undef HAVE_64BIT_LONG
+#undef HAVE_32BIT_LONG
+#undef HAVE_32BIT_PTR
+#undef HAVE_64BIT_PTR
+
+/* Check if __psint_t is set to something meaningful */
+#undef HAVE___PSINT_T
+#ifndef HAVE___PSINT_T
+# ifdef HAVE_32BIT_PTR
+typedef int __psint_t;
+# elif defined HAVE_64BIT_PTR
+#  ifdef HAVE_64BIT_LONG
+typedef long __psint_t;
+#  else
+/* This is a very strange architecture, which has 64 bit pointers but
+ * not 64 bit longs. So, I'd just punt here and assume long long is Ok */
+typedef long long __psint_t;
+#  endif
+# else
+#  error Unknown pointer size
+# endif
+#endif
+
+/* Check if __psunsigned_t is set to something meaningful */
+#undef HAVE___PSUNSIGNED_T
+#ifndef HAVE___PSUNSIGNED_T
+# ifdef HAVE_32BIT_PTR
+typedef unsigned int __psunsigned_t;
+# elif defined HAVE_64BIT_PTR
+#  ifdef HAVE_64BIT_LONG
+typedef long __psunsigned_t;
+#  else
+/* This is a very strange architecture, which has 64 bit pointers but
+ * not 64 bit longs. So, I'd just punt here and assume long long is Ok */
+typedef unsigned long long __psunsigned_t;
+#  endif
+# else
+#  error Unknown pointer size
+# endif
+#endif
+
+#ifdef DEBUG
+# define ASSERT		assert
+#else
+# define ASSERT(EX)	((void) 0)
+#endif
+
+#endif	/* __XFS_PLATFORM_DEFS_H__ */
diff --git a/include/xfs_ag.h b/include/xfs_ag.h
new file mode 100644
index 000000000..86e4095f8
--- /dev/null
+++ b/include/xfs_ag.h
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_AG_H__
+#define	__XFS_AG_H__
+
+/*
+ * Allocation group header
+ * This is divided into three structures, placed in sequential 512-byte 
+ * buffers after a copy of the superblock (also in a 512-byte buffer).
+ */
+
+struct xfs_buf;
+struct xfs_mount;
+
+#define	XFS_AGF_MAGIC	0x58414746	/* 'XAGF' */
+#define	XFS_AGI_MAGIC	0x58414749	/* 'XAGI' */
+#define	XFS_AGF_VERSION	1
+#define	XFS_AGI_VERSION	1
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_GOOD_VERSION)
+int xfs_agf_good_version(unsigned v);
+#define	XFS_AGF_GOOD_VERSION(v)	xfs_agf_good_version(v)
+#else
+#define XFS_AGF_GOOD_VERSION(v)		((v) == XFS_AGF_VERSION)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_GOOD_VERSION)
+int xfs_agi_good_version(unsigned v);
+#define	XFS_AGI_GOOD_VERSION(v)	xfs_agi_good_version(v)
+#else
+#define XFS_AGI_GOOD_VERSION(v)		((v) == XFS_AGI_VERSION)
+#endif
+
+/*
+ * Btree number 0 is bno, 1 is cnt.  This value gives the size of the
+ * arrays below.
+ */
+#define	XFS_BTNUM_AGF	((int)XFS_BTNUM_CNTi + 1)
+
+/*
+ * The second word of agf_levels in the first a.g. overlaps the EFS
+ * superblock's magic number.  Since the magic numbers valid for EFS
+ * are > 64k, our value cannot be confused for an EFS superblock's.
+ */
+
+typedef struct xfs_agf
+{
+	/*
+	 * Common allocation group header information
+	 */
+	__uint32_t	agf_magicnum;	/* magic number == XFS_AGF_MAGIC */
+	__uint32_t	agf_versionnum;	/* header version == XFS_AGF_VERSION */
+	xfs_agnumber_t	agf_seqno;	/* sequence # starting from 0 */
+	xfs_agblock_t	agf_length;	/* size in blocks of a.g. */
+	/*
+	 * Freespace information
+	 */
+	xfs_agblock_t	agf_roots[XFS_BTNUM_AGF];	/* root blocks */
+	__uint32_t	agf_spare0;	/* spare field */
+	__uint32_t	agf_levels[XFS_BTNUM_AGF];	/* btree levels */
+	__uint32_t	agf_spare1;	/* spare field */
+	__uint32_t	agf_flfirst;	/* first freelist block's index */
+	__uint32_t	agf_fllast;	/* last freelist block's index */
+	__uint32_t	agf_flcount;	/* count of blocks in freelist */
+	xfs_extlen_t	agf_freeblks;	/* total free blocks */
+	xfs_extlen_t	agf_longest;	/* longest free space */
+} xfs_agf_t;
+
+#define	XFS_AGF_MAGICNUM	0x00000001
+#define	XFS_AGF_VERSIONNUM	0x00000002
+#define	XFS_AGF_SEQNO		0x00000004
+#define	XFS_AGF_LENGTH		0x00000008
+#define	XFS_AGF_ROOTS		0x00000010
+#define	XFS_AGF_LEVELS		0x00000020
+#define	XFS_AGF_FLFIRST		0x00000040
+#define	XFS_AGF_FLLAST		0x00000080
+#define	XFS_AGF_FLCOUNT		0x00000100
+#define	XFS_AGF_FREEBLKS	0x00000200
+#define	XFS_AGF_LONGEST		0x00000400
+#define	XFS_AGF_NUM_BITS	11
+#define	XFS_AGF_ALL_BITS	((1 << XFS_AGF_NUM_BITS) - 1)
+
+/* disk block (xfs_daddr_t) in the AG */
+#define	XFS_AGF_DADDR		((xfs_daddr_t)1)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_BLOCK)
+xfs_agblock_t xfs_agf_block(struct xfs_mount *mp);
+#define	XFS_AGF_BLOCK(mp)	xfs_agf_block(mp)
+#else
+#define	XFS_AGF_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGF_DADDR)
+#endif
+
+/*
+ * Size of the unlinked inode hash table in the agi.
+ */
+#define	XFS_AGI_UNLINKED_BUCKETS	64
+
+typedef struct xfs_agi
+{
+	/*
+	 * Common allocation group header information
+	 */
+	__uint32_t	agi_magicnum;	/* magic number == XFS_AGI_MAGIC */
+	__uint32_t	agi_versionnum;	/* header version == XFS_AGI_VERSION */
+	xfs_agnumber_t	agi_seqno;	/* sequence # starting from 0 */
+	xfs_agblock_t	agi_length;	/* size in blocks of a.g. */
+	/*
+	 * Inode information
+	 * Inodes are mapped by interpreting the inode number, so no
+	 * mapping data is needed here.
+	 */
+	xfs_agino_t	agi_count;	/* count of allocated inodes */
+	xfs_agblock_t	agi_root;	/* root of inode btree */
+	__uint32_t	agi_level;	/* levels in inode btree */
+	xfs_agino_t	agi_freecount;	/* number of free inodes */
+	xfs_agino_t	agi_newino;	/* new inode just allocated */
+	xfs_agino_t	agi_dirino;	/* last directory inode chunk */
+	/*
+	 * Hash table of inodes which have been unlinked but are
+	 * still being referenced.
+	 */
+	xfs_agino_t	agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
+} xfs_agi_t;
+
+#define	XFS_AGI_MAGICNUM	0x00000001
+#define	XFS_AGI_VERSIONNUM	0x00000002
+#define	XFS_AGI_SEQNO		0x00000004
+#define	XFS_AGI_LENGTH		0x00000008
+#define	XFS_AGI_COUNT		0x00000010
+#define	XFS_AGI_ROOT		0x00000020
+#define	XFS_AGI_LEVEL		0x00000040
+#define	XFS_AGI_FREECOUNT	0x00000080
+#define	XFS_AGI_NEWINO		0x00000100
+#define	XFS_AGI_DIRINO		0x00000200
+#define	XFS_AGI_UNLINKED	0x00000400
+#define	XFS_AGI_NUM_BITS	11
+#define	XFS_AGI_ALL_BITS	((1 << XFS_AGI_NUM_BITS) - 1)
+
+/* disk block (xfs_daddr_t) in the AG */
+#define	XFS_AGI_DADDR		((xfs_daddr_t)2)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_BLOCK)
+xfs_agblock_t xfs_agi_block(struct xfs_mount *mp);
+#define	XFS_AGI_BLOCK(mp)	xfs_agi_block(mp)
+#else
+#define	XFS_AGI_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGI_DADDR)
+#endif
+
+/*
+ * The third a.g. block contains the a.g. freelist, an array 
+ * of block pointers to blocks owned by the allocation btree code.
+ */
+#define	XFS_AGFL_DADDR		((xfs_daddr_t)3)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGFL_BLOCK)
+xfs_agblock_t xfs_agfl_block(struct xfs_mount *mp);
+#define	XFS_AGFL_BLOCK(mp)	xfs_agfl_block(mp)
+#else
+#define	XFS_AGFL_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR)
+#endif
+#define	XFS_AGFL_SIZE		(BBSIZE / sizeof(xfs_agblock_t))
+typedef	struct xfs_agfl
+{
+	xfs_agblock_t	agfl_bno[XFS_AGFL_SIZE];
+} xfs_agfl_t;
+
+/*
+ * Per-ag incore structure, copies of information in agf and agi,
+ * to improve the performance of allocation group selection.
+ */
+typedef struct xfs_perag
+{
+	char		pagf_init;	/* this agf's entry is initialized */
+	char		pagi_init;	/* this agi's entry is initialized */
+	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
+					/* # of levels in bno & cnt btree */
+	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
+	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
+	xfs_extlen_t	pagf_longest;	/* longest free space */
+	xfs_agino_t	pagi_freecount;	/* number of free inodes */
+} xfs_perag_t;
+
+#define	XFS_AG_MIN_BYTES	(1LL << 24)	/* 16 MB */
+#define	XFS_AG_BEST_BYTES	(1LL << 30)	/*  1 GB */
+#define	XFS_AG_MAX_BYTES	(1LL << 32)	/*  4 GB */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MIN_BLOCKS)
+xfs_extlen_t xfs_ag_min_blocks(int bl);
+#define	XFS_AG_MIN_BLOCKS(bl)		xfs_ag_min_blocks(bl)
+#else
+#define	XFS_AG_MIN_BLOCKS(bl)	((xfs_extlen_t)(XFS_AG_MIN_BYTES >> bl))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_BEST_BLOCKS)
+xfs_extlen_t xfs_ag_best_blocks(int bl);
+#define	XFS_AG_BEST_BLOCKS(bl)		xfs_ag_best_blocks(bl)
+#else
+#define	XFS_AG_BEST_BLOCKS(bl)	((xfs_extlen_t)(XFS_AG_BEST_BYTES >> bl))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAX_BLOCKS)
+xfs_extlen_t xfs_ag_max_blocks(int bl);
+#define	XFS_AG_MAX_BLOCKS(bl)		xfs_ag_max_blocks(bl)
+#else
+#define	XFS_AG_MAX_BLOCKS(bl)	((xfs_extlen_t)(XFS_AG_MAX_BYTES >> bl))
+#endif
+
+#define	XFS_MAX_AGNUMBER	((xfs_agnumber_t)(NULLAGNUMBER - 1))
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAXLEVELS)
+int xfs_ag_maxlevels(struct xfs_mount *mp);
+#define	XFS_AG_MAXLEVELS(mp)		xfs_ag_maxlevels(mp)
+#else
+#define	XFS_AG_MAXLEVELS(mp)	((mp)->m_ag_maxlevels)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST)
+int xfs_min_freelist(xfs_agf_t *a, struct xfs_mount *mp);
+#define	XFS_MIN_FREELIST(a,mp)		xfs_min_freelist(a,mp)
+#else
+#define	XFS_MIN_FREELIST(a,mp)	\
+	XFS_MIN_FREELIST_RAW(	\
+		INT_GET((a)->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT), \
+		INT_GET((a)->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT), mp)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_PAG)
+int xfs_min_freelist_pag(xfs_perag_t *pag, struct xfs_mount *mp);
+#define	XFS_MIN_FREELIST_PAG(pag,mp)	xfs_min_freelist_pag(pag,mp)
+#else
+#define	XFS_MIN_FREELIST_PAG(pag,mp)	\
+	XFS_MIN_FREELIST_RAW((pag)->pagf_levels[XFS_BTNUM_BNOi], \
+			     (pag)->pagf_levels[XFS_BTNUM_CNTi], mp)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_RAW)
+int xfs_min_freelist_raw(int bl, int cl, struct xfs_mount *mp);
+#define	XFS_MIN_FREELIST_RAW(bl,cl,mp)	xfs_min_freelist_raw(bl,cl,mp)
+#else
+#define	XFS_MIN_FREELIST_RAW(bl,cl,mp)	\
+	(MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + \
+	 MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_FSB)
+xfs_fsblock_t xfs_agb_to_fsb(struct xfs_mount *mp, xfs_agnumber_t agno,
+			     xfs_agblock_t agbno);
+#define XFS_AGB_TO_FSB(mp,agno,agbno)	xfs_agb_to_fsb(mp,agno,agbno)
+#else
+#define	XFS_AGB_TO_FSB(mp,agno,agbno) \
+	(((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGNO)
+xfs_agnumber_t xfs_fsb_to_agno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+#define	XFS_FSB_TO_AGNO(mp,fsbno)	xfs_fsb_to_agno(mp,fsbno)
+#else
+#define	XFS_FSB_TO_AGNO(mp,fsbno) \
+	((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGBNO)
+xfs_agblock_t xfs_fsb_to_agbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+#define	XFS_FSB_TO_AGBNO(mp,fsbno)	xfs_fsb_to_agbno(mp,fsbno)
+#else
+#define	XFS_FSB_TO_AGBNO(mp,fsbno) \
+	((xfs_agblock_t)((fsbno) & XFS_MASK32LO((mp)->m_sb.sb_agblklog)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_DADDR)
+xfs_daddr_t xfs_agb_to_daddr(struct xfs_mount *mp, xfs_agnumber_t agno,
+			 xfs_agblock_t agbno);
+#define	XFS_AGB_TO_DADDR(mp,agno,agbno)	xfs_agb_to_daddr(mp,agno,agbno)
+#else
+#define	XFS_AGB_TO_DADDR(mp,agno,agbno) \
+	((xfs_daddr_t)(XFS_FSB_TO_BB(mp, \
+		(xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno))))
+#endif
+/*
+ * XFS_DADDR_TO_AGNO and XFS_DADDR_TO_AGBNO moved to xfs_mount.h
+ * to avoid header file ordering change
+ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_DADDR)
+xfs_daddr_t xfs_ag_daddr(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_daddr_t d);
+#define	XFS_AG_DADDR(mp,agno,d)		xfs_ag_daddr(mp,agno,d)
+#else
+#define	XFS_AG_DADDR(mp,agno,d)	(XFS_AGB_TO_DADDR(mp, agno, 0) + (d))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGF)
+xfs_agf_t *xfs_buf_to_agf(struct xfs_buf *bp);
+#define	XFS_BUF_TO_AGF(bp)		xfs_buf_to_agf(bp)
+#else
+#define	XFS_BUF_TO_AGF(bp)	((xfs_agf_t *)XFS_BUF_PTR(bp))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGI)
+xfs_agi_t *xfs_buf_to_agi(struct xfs_buf *bp);
+#define	XFS_BUF_TO_AGI(bp)		xfs_buf_to_agi(bp)
+#else
+#define	XFS_BUF_TO_AGI(bp)	((xfs_agi_t *)XFS_BUF_PTR(bp))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGFL)
+xfs_agfl_t *xfs_buf_to_agfl(struct xfs_buf *bp);
+#define	XFS_BUF_TO_AGFL(bp)		xfs_buf_to_agfl(bp)
+#else
+#define	XFS_BUF_TO_AGFL(bp)	((xfs_agfl_t *)XFS_BUF_PTR(bp))
+#endif
+
+/*
+ * For checking for bad ranges of xfs_daddr_t's, covering multiple
+ * allocation groups or a single xfs_daddr_t that's a superblock copy.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_CHECK_DADDR)
+void xfs_ag_check_daddr(struct xfs_mount *mp, xfs_daddr_t d, xfs_extlen_t len);
+#define	XFS_AG_CHECK_DADDR(mp,d,len)	xfs_ag_check_daddr(mp,d,len)
+#else
+#define	XFS_AG_CHECK_DADDR(mp,d,len)	\
+	((len) == 1 ? \
+	    ASSERT((d) == XFS_SB_DADDR || \
+		   XFS_DADDR_TO_AGBNO(mp, d) != XFS_SB_DADDR) : \
+	    ASSERT(XFS_DADDR_TO_AGNO(mp, d) == \
+	           XFS_DADDR_TO_AGNO(mp, (d) + (len) - 1)))
+#endif
+
+#endif	/* __XFS_AG_H__ */
diff --git a/include/xfs_alloc.h b/include/xfs_alloc.h
new file mode 100644
index 000000000..55a2efa59
--- /dev/null
+++ b/include/xfs_alloc.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ALLOC_H__
+#define	__XFS_ALLOC_H__
+
+struct xfs_buf;
+struct xfs_mount;
+struct xfs_perag;
+struct xfs_trans;
+
+/*
+ * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
+ */
+typedef enum xfs_alloctype
+{
+	XFS_ALLOCTYPE_ANY_AG,		/* allocate anywhere, use rotor */
+	XFS_ALLOCTYPE_FIRST_AG,		/* ... start at ag 0 */
+	XFS_ALLOCTYPE_START_AG,		/* anywhere, start in this a.g. */
+	XFS_ALLOCTYPE_THIS_AG,		/* anywhere in this a.g. */
+	XFS_ALLOCTYPE_START_BNO,	/* near this block else anywhere */
+	XFS_ALLOCTYPE_NEAR_BNO,		/* in this a.g. and near this block */
+	XFS_ALLOCTYPE_THIS_BNO		/* at exactly this block */
+} xfs_alloctype_t;
+
+/*
+ * Flags for xfs_alloc_fix_freelist.
+ */
+#define	XFS_ALLOC_FLAG_TRYLOCK	0x00000001  /* use trylock for buffer locking */
+
+/*
+ * Argument structure for xfs_alloc routines.
+ * This is turned into a structure to avoid having 20 arguments passed
+ * down several levels of the stack.
+ */
+typedef struct xfs_alloc_arg {
+	struct xfs_trans *tp;		/* transaction pointer */
+	struct xfs_mount *mp;		/* file system mount point */
+	struct xfs_buf	*agbp;		/* buffer for a.g. freelist header */
+	struct xfs_perag *pag;		/* per-ag struct for this agno */
+	xfs_fsblock_t	fsbno;		/* file system block number */
+	xfs_agnumber_t	agno;		/* allocation group number */
+	xfs_agblock_t	agbno;		/* allocation group-relative block # */
+	xfs_extlen_t	minlen;		/* minimum size of extent */
+	xfs_extlen_t	maxlen;		/* maximum size of extent */
+	xfs_extlen_t	mod;		/* mod value for extent size */
+	xfs_extlen_t	prod;		/* prod value for extent size */
+	xfs_extlen_t	minleft;	/* min blocks must be left after us */
+	xfs_extlen_t	total;		/* total blocks needed in xaction */
+	xfs_extlen_t	alignment;	/* align answer to multiple of this */
+	xfs_extlen_t	minalignslop;	/* slop for minlen+alignment calcs */
+	xfs_extlen_t	len;		/* output: actual size of extent */
+	xfs_alloctype_t	type;		/* allocation type XFS_ALLOCTYPE_... */
+	xfs_alloctype_t	otype;		/* original allocation type */
+	char		wasdel;		/* set if allocation was prev delayed */
+	char		wasfromfl;	/* set if allocation is from freelist */
+	char		isfl;		/* set if is freelist blocks - !actg */
+	char		userdata;	/* set if this is user data */
+} xfs_alloc_arg_t;
+
+
+#ifdef __KERNEL__
+
+/*
+ * Types for alloc tracing.
+ */
+#define	XFS_ALLOC_KTRACE_ALLOC	1
+#define	XFS_ALLOC_KTRACE_FREE	2
+#define	XFS_ALLOC_KTRACE_MODAGF	3
+/*
+ * Allocation tracing buffer size.
+ */
+#define	XFS_ALLOC_TRACE_SIZE	4096
+
+#ifdef	XFS_ALL_TRACE
+#define	XFS_ALLOC_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef	XFS_ALLOC_TRACE
+#endif
+
+/*
+ * Prototypes for visible xfs_alloc.c routines
+ */
+
+/*
+ * Compute and fill in value of m_ag_maxlevels.
+ */
+void
+xfs_alloc_compute_maxlevels(
+	struct xfs_mount	*mp);	/* file system mount structure */
+
+/*
+ * Decide whether to use this allocation group for this allocation.
+ * If so, fix up the btree freelist's size.
+ * This is external so mkfs can call it, too.
+ */
+int				/* error */
+xfs_alloc_fix_freelist(
+	xfs_alloc_arg_t	*args,	/* allocation argument structure */
+	int		flags);	/* XFS_ALLOC_FLAG_... */
+
+/*
+ * Get a block from the freelist.
+ * Returns with the buffer for the block gotten.
+ */
+int				/* error */
+xfs_alloc_get_freelist(
+	struct xfs_trans *tp,	/* transaction pointer */
+	struct xfs_buf	*agbp,	/* buffer containing the agf structure */
+	xfs_agblock_t	*bnop);	/* block address retrieved from freelist */
+
+/*
+ * Log the given fields from the agf structure.
+ */
+void
+xfs_alloc_log_agf(
+	struct xfs_trans *tp,	/* transaction pointer */
+	struct xfs_buf	*bp,	/* buffer for a.g. freelist header */
+	int		fields);/* mask of fields to be logged (XFS_AGF_...) */
+
+/*
+ * Interface for inode allocation to force the pag data to be initialized.
+ */
+int				/* error */
+xfs_alloc_pagf_init(
+	struct xfs_mount *mp,	/* file system mount structure */
+	struct xfs_trans *tp,	/* transaction pointer */
+	xfs_agnumber_t	agno,	/* allocation group number */
+	int		flags);	/* XFS_ALLOC_FLAGS_... */
+
+/*
+ * Put the block on the freelist for the allocation group.
+ */
+int				/* error */
+xfs_alloc_put_freelist(
+	struct xfs_trans *tp,	/* transaction pointer */
+	struct xfs_buf	*agbp,	/* buffer for a.g. freelist header */
+	struct xfs_buf	*agflbp,/* buffer for a.g. free block array */
+	xfs_agblock_t	bno);	/* block being freed */
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int					/* error  */
+xfs_alloc_read_agf(
+	struct xfs_mount *mp,		/* mount point structure */
+	struct xfs_trans *tp,		/* transaction pointer */
+	xfs_agnumber_t	agno,		/* allocation group number */
+	int		flags,		/* XFS_ALLOC_FLAG_... */
+	struct xfs_buf	**bpp);		/* buffer for the ag freelist header */
+
+/*
+ * Allocate an extent (variable-size).
+ */
+int				/* error */
+xfs_alloc_vextent(
+	xfs_alloc_arg_t	*args);	/* allocation argument structure */
+
+/*
+ * Free an extent.
+ */
+int				/* error */
+xfs_free_extent(
+	struct xfs_trans *tp,	/* transaction pointer */
+	xfs_fsblock_t	bno,	/* starting block number of extent */
+	xfs_extlen_t	len);	/* length of extent */
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_ALLOC_H__ */
diff --git a/include/xfs_alloc_btree.h b/include/xfs_alloc_btree.h
new file mode 100644
index 000000000..7cd1a8737
--- /dev/null
+++ b/include/xfs_alloc_btree.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ALLOC_BTREE_H__
+#define	__XFS_ALLOC_BTREE_H__
+
+/*
+ * Freespace on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_btree_sblock;
+struct xfs_mount;
+
+/*
+ * There are two on-disk btrees, one sorted by blockno and one sorted
+ * by blockcount and blockno.  All blocks look the same to make the code
+ * simpler; if we have time later, we'll make the optimizations.
+ */
+#define	XFS_ABTB_MAGIC	0x41425442	/* 'ABTB' for bno tree */
+#define	XFS_ABTC_MAGIC	0x41425443	/* 'ABTC' for cnt tree */
+
+/*
+ * Data record/key structure
+ */
+typedef struct xfs_alloc_rec
+{
+	xfs_agblock_t	ar_startblock;	/* starting block number */
+	xfs_extlen_t	ar_blockcount;	/* count of free blocks */
+} xfs_alloc_rec_t, xfs_alloc_key_t;
+
+typedef xfs_agblock_t xfs_alloc_ptr_t;	/* btree pointer type */
+					/* btree block header type */
+typedef	struct xfs_btree_sblock xfs_alloc_block_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_ALLOC_BLOCK)
+xfs_alloc_block_t *xfs_buf_to_alloc_block(struct xfs_buf *bp);
+#define	XFS_BUF_TO_ALLOC_BLOCK(bp)	xfs_buf_to_alloc_block(bp)
+#else
+#define	XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+/*
+ * Real block structures have a size equal to the disk block size.
+ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_SIZE)
+int xfs_alloc_block_size(int lev, struct xfs_btree_cur *cur);
+#define	XFS_ALLOC_BLOCK_SIZE(lev,cur)	xfs_alloc_block_size(lev,cur)
+#else
+#define	XFS_ALLOC_BLOCK_SIZE(lev,cur)	(1 << (cur)->bc_blocklog)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MAXRECS)
+int xfs_alloc_block_maxrecs(int lev, struct xfs_btree_cur *cur);
+#define	XFS_ALLOC_BLOCK_MAXRECS(lev,cur)	xfs_alloc_block_maxrecs(lev,cur)
+#else
+#define	XFS_ALLOC_BLOCK_MAXRECS(lev,cur)	\
+	((cur)->bc_mp->m_alloc_mxr[lev != 0])
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MINRECS)
+int xfs_alloc_block_minrecs(int lev, struct xfs_btree_cur *cur);
+#define	XFS_ALLOC_BLOCK_MINRECS(lev,cur)	xfs_alloc_block_minrecs(lev,cur)
+#else
+#define	XFS_ALLOC_BLOCK_MINRECS(lev,cur)	\
+	((cur)->bc_mp->m_alloc_mnr[lev != 0])
+#endif
+
+/*
+ * Minimum and maximum blocksize.
+ * The blocksize upper limit is pretty much arbitrary.
+ */
+#define	XFS_MIN_BLOCKSIZE_LOG	9	/* i.e. 512 bytes */
+#define	XFS_MAX_BLOCKSIZE_LOG	16	/* i.e. 65536 bytes */
+#define	XFS_MIN_BLOCKSIZE	(1 << XFS_MIN_BLOCKSIZE_LOG)
+#define	XFS_MAX_BLOCKSIZE	(1 << XFS_MAX_BLOCKSIZE_LOG)
+
+/*
+ * block numbers in the AG; SB is BB 0, AGF is BB 1, AGI is BB 2, AGFL is BB 3
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BNO_BLOCK)
+xfs_agblock_t xfs_bno_block(struct xfs_mount *mp);
+#define	XFS_BNO_BLOCK(mp)	xfs_bno_block(mp)
+#else
+#define	XFS_BNO_BLOCK(mp)	((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CNT_BLOCK)
+xfs_agblock_t xfs_cnt_block(struct xfs_mount *mp);
+#define	XFS_CNT_BLOCK(mp)	xfs_cnt_block(mp)
+#else
+#define	XFS_CNT_BLOCK(mp)	((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
+#endif
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_REC_ADDR)
+xfs_alloc_rec_t *xfs_alloc_rec_addr(xfs_alloc_block_t *bb, int i,
+				    struct xfs_btree_cur *cur);
+#define	XFS_ALLOC_REC_ADDR(bb,i,cur)	xfs_alloc_rec_addr(bb,i,cur)
+#else
+#define	XFS_ALLOC_REC_ADDR(bb,i,cur)	\
+	XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, bb, i, \
+		XFS_ALLOC_BLOCK_MAXRECS(0, cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_KEY_ADDR)
+xfs_alloc_key_t *xfs_alloc_key_addr(xfs_alloc_block_t *bb, int i,
+				    struct xfs_btree_cur *cur);
+#define	XFS_ALLOC_KEY_ADDR(bb,i,cur)	xfs_alloc_key_addr(bb,i,cur)
+#else
+#define	XFS_ALLOC_KEY_ADDR(bb,i,cur)	\
+	XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \
+		XFS_ALLOC_BLOCK_MAXRECS(1, cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_PTR_ADDR)
+xfs_alloc_ptr_t *xfs_alloc_ptr_addr(xfs_alloc_block_t *bb, int i,
+				    struct xfs_btree_cur *cur);
+#define	XFS_ALLOC_PTR_ADDR(bb,i,cur)	xfs_alloc_ptr_addr(bb,i,cur)
+#else
+#define	XFS_ALLOC_PTR_ADDR(bb,i,cur)	\
+	XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \
+		XFS_ALLOC_BLOCK_MAXRECS(1, cur))
+#endif
+
+/*
+ * Prototypes for externally visible routines.
+ */
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int					/* error */
+xfs_alloc_decrement(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			level,	/* level in btree, 0 is leaf */
+	int			*stat);	/* success/failure */
+
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int					/* error */
+xfs_alloc_delete(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			*stat);	/* success/failure */
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int					/* error */
+xfs_alloc_get_rec(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		*bno,	/* output: starting block of extent */
+	xfs_extlen_t		*len,	/* output: length of extent */
+	int			*stat);	/* output: success/failure */
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int					/* error */
+xfs_alloc_increment(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			level,	/* level in btree, 0 is leaf */
+	int			*stat);	/* success/failure */
+
+/*
+ * Insert the current record at the point referenced by cur.
+ * The cursor may be inconsistent on return if splits have been done.
+ */
+int					/* error */
+xfs_alloc_insert(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			*stat);	/* success/failure */
+
+/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+int					/* error */
+xfs_alloc_lookup_eq(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	xfs_extlen_t		len,	/* length of extent */
+	int			*stat);	/* success/failure */
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_alloc_lookup_ge(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	xfs_extlen_t		len,	/* length of extent */
+	int			*stat);	/* success/failure */
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_alloc_lookup_le(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	xfs_extlen_t		len,	/* length of extent */
+	int			*stat);	/* success/failure */
+ 
+/*
+ * Update the record referred to by cur, to the value given by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+int					/* error */
+xfs_alloc_update(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	xfs_extlen_t		len);	/* length of extent */
+
+#endif	/* __XFS_ALLOC_BTREE_H__ */
diff --git a/include/xfs_arch.h b/include/xfs_arch.h
new file mode 100644
index 000000000..9a013819a
--- /dev/null
+++ b/include/xfs_arch.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ARCH_H__
+#define __XFS_ARCH_H__
+
+#ifndef XFS_BIG_FILESYSTEMS
+#error XFS_BIG_FILESYSTEMS must be defined true or false
+#endif
+    
+#define DIRINO4_GET_ARCH(pointer,arch) \
+    ( ((arch) == ARCH_NOCONVERT) \
+        ? \
+            (INT_GET_UNALIGNED_32(pointer)) \
+        : \
+            (INT_GET_UNALIGNED_32_BE(pointer)) \
+    )
+    
+#if XFS_BIG_FILESYSTEMS
+#define DIRINO_GET_ARCH(pointer,arch) \
+    ( ((arch) == ARCH_NOCONVERT) \
+        ? \
+            (INT_GET_UNALIGNED_64(pointer)) \
+        : \
+            (INT_GET_UNALIGNED_64_BE(pointer)) \
+    )
+#else
+/* MACHINE ARCHITECTURE dependent */
+#if __BYTE_ORDER == __LITTLE_ENDIAN 
+#define DIRINO_GET_ARCH(pointer,arch) \
+    DIRINO4_GET_ARCH((((__u8*)pointer)+4),arch)
+#else
+#define DIRINO_GET_ARCH(pointer,arch) \
+    DIRINO4_GET_ARCH(pointer,arch)
+#endif
+#endif    
+
+#define DIRINO_COPY_ARCH(from,to,arch) \
+    if ((arch) == ARCH_NOCONVERT) { \
+        bcopy(from,to,sizeof(xfs_ino_t)); \
+    } else { \
+        INT_SWAP_UNALIGNED_64(from,to); \
+    }
+#define DIRINO4_COPY_ARCH(from,to,arch) \
+    if ((arch) == ARCH_NOCONVERT) { \
+        bcopy((((__u8*)from+4)),to,sizeof(xfs_dir2_ino4_t)); \
+    } else { \
+        INT_SWAP_UNALIGNED_32(from,to); \
+    }
+
+#endif	/* __XFS_ARCH_H__ */
diff --git a/include/xfs_attr_leaf.h b/include/xfs_attr_leaf.h
new file mode 100644
index 000000000..41d63b526
--- /dev/null
+++ b/include/xfs_attr_leaf.h
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ATTR_LEAF_H__
+#define	__XFS_ATTR_LEAF_H__
+
+/*
+ * Attribute storage layout, internal structure, access macros, etc.
+ *
+ * Attribute lists are structured around Btrees where all the data
+ * elements are in the leaf nodes.  Attribute names are hashed into an int,
+ * then that int is used as the index into the Btree.  Since the hashval
+ * of an attribute name may not be unique, we may have duplicate keys.  The
+ * internal links in the Btree are logical block offsets into the file.
+ */
+
+struct attrlist;
+struct attrlist_cursor_kern;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_da_state;
+struct xfs_da_state_blk;
+struct xfs_inode;
+struct xfs_trans;
+
+/*========================================================================
+ * Attribute structure when equal to XFS_LBSIZE(mp) bytes.
+ *========================================================================*/
+
+/*
+ * This is the structure of the leaf nodes in the Btree.
+ *
+ * Struct leaf_entry's are packed from the top.  Name/values grow from the
+ * bottom but are not packed.  The freemap contains run-length-encoded entries
+ * for the free bytes after the leaf_entry's, but only the N largest such,
+ * smaller runs are dropped.  When the freemap doesn't show enough space
+ * for an allocation, we compact the name/value area and try again.  If we
+ * still don't have enough space, then we have to split the block.  The
+ * name/value structs (both local and remote versions) must be 32bit aligned.
+ *
+ * Since we have duplicate hash keys, for each key that matches, compare
+ * the actual name string.  The root and intermediate node search always
+ * takes the first-in-the-block key match found, so we should only have
+ * to work "forw"ard.  If none matches, continue with the "forw"ard leaf
+ * nodes until the hash key changes or the attribute name is found.
+ *
+ * We store the fact that an attribute is a ROOT versus USER attribute in
+ * the leaf_entry.  The namespaces are independent only because we also look
+ * at the root/user bit when we are looking for a matching attribute name.
+ *
+ * We also store a "incomplete" bit in the leaf_entry.  It shows that an
+ * attribute is in the middle of being created and should not be shown to
+ * the user if we crash during the time that the bit is set.  We clear the
+ * bit when we have finished setting up the attribute.  We do this because
+ * we cannot create some large attributes inside a single transaction, and we
+ * need some indication that we weren't finished if we crash in the middle.
+ */
+#define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */
+
+typedef struct xfs_attr_leafblock {
+	struct xfs_attr_leaf_hdr {	/* constant-structure header block */
+		xfs_da_blkinfo_t info;	/* block type, links, etc. */
+		__uint16_t count;	/* count of active leaf_entry's */
+		__uint16_t usedbytes;	/* num bytes of names/values stored */
+		__uint16_t firstused;	/* first used byte in name area */
+		__uint8_t  holes;	/* != 0 if blk needs compaction */
+		__uint8_t  pad1;
+		struct xfs_attr_leaf_map {	  /* RLE map of free bytes */
+			__uint16_t base;	  /* base of free region */
+			__uint16_t size;	  /* length of free region */
+		} freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */
+	} hdr;
+	struct xfs_attr_leaf_entry {	/* sorted on key, not name */
+		xfs_dahash_t hashval;	/* hash value of name */
+		__uint16_t nameidx;	/* index into buffer of name/value */
+		__uint8_t flags;	/* LOCAL, ROOT and INCOMPLETE flags */
+		__uint8_t pad2;		/* unused pad byte */
+	} entries[1];			/* variable sized array */
+	struct xfs_attr_leaf_name_local {
+		__uint16_t valuelen;	/* number of bytes in value */
+		__uint8_t namelen;	/* length of name bytes */
+		__uint8_t nameval[1];	/* name/value bytes */
+	} namelist;			/* grows from bottom of buf */
+	struct xfs_attr_leaf_name_remote {
+		xfs_dablk_t valueblk;	/* block number of value bytes */
+		__uint32_t valuelen;	/* number of bytes in value */
+		__uint8_t namelen;	/* length of name bytes */
+		__uint8_t name[1];	/* name bytes */
+	} valuelist;			/* grows from bottom of buf */
+} xfs_attr_leafblock_t;
+typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;
+typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;
+typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;
+typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;
+typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
+
+/*
+ * Flags used in the leaf_entry[i].flags field.
+ * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
+ * on the system call, they are "or"ed together for various operations.
+ */
+#define	XFS_ATTR_LOCAL_BIT	0	/* attr is stored locally */
+#define	XFS_ATTR_ROOT_BIT	1	/* limit access to attr to userid 0 */
+#define	XFS_ATTR_INCOMPLETE_BIT	7	/* attr in middle of create/delete */
+#define XFS_ATTR_LOCAL		(1 << XFS_ATTR_LOCAL_BIT)
+#define XFS_ATTR_ROOT		(1 << XFS_ATTR_ROOT_BIT)
+#define XFS_ATTR_INCOMPLETE	(1 << XFS_ATTR_INCOMPLETE_BIT)
+
+/*
+ * Alignment for namelist and valuelist entries (since they are mixed
+ * there can be only one alignment value)
+ */
+#define	XFS_ATTR_LEAF_NAME_ALIGN	((uint)sizeof(xfs_dablk_t))
+
+/*
+ * Cast typed pointers for "local" and "remote" name/value structs.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME_REMOTE)
+xfs_attr_leaf_name_remote_t *
+xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx);
+#define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx)	\
+	xfs_attr_leaf_name_remote(leafp,idx)
+#else
+#define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx)	/* remote name struct ptr */ \
+	((xfs_attr_leaf_name_remote_t *)		\
+	 &((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME_LOCAL)
+xfs_attr_leaf_name_local_t *
+xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx);
+#define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx)	\
+	xfs_attr_leaf_name_local(leafp,idx)
+#else
+#define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx)	/* local name struct ptr */ \
+	((xfs_attr_leaf_name_local_t *)		\
+	 &((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME)
+char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx);
+#define XFS_ATTR_LEAF_NAME(leafp,idx)		xfs_attr_leaf_name(leafp,idx)
+#else
+#define XFS_ATTR_LEAF_NAME(leafp,idx)		/* generic name struct ptr */ \
+	(&((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
+#endif
+
+/*
+ * Calculate total bytes used (including trailing pad for alignment) for
+ * a "local" name/value structure, a "remote" name/value structure, and
+ * a pointer which might be either.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_REMOTE)
+int xfs_attr_leaf_entsize_remote(int nlen);
+#define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen)	\
+	xfs_attr_leaf_entsize_remote(nlen)
+#else
+#define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen)	/* space for remote struct */ \
+	(((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
+	  XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL)
+int xfs_attr_leaf_entsize_local(int nlen, int vlen);
+#define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen)	\
+	xfs_attr_leaf_entsize_local(nlen,vlen)
+#else
+#define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen)	/* space for local struct */ \
+	(((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + \
+	  XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX)
+int xfs_attr_leaf_entsize_local_max(int bsize);
+#define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize)	\
+	xfs_attr_leaf_entsize_local_max(bsize)
+#else
+#define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize)	/* max local struct size */ \
+	(((bsize) >> 1) + ((bsize) >> 2))
+#endif
+
+
+/*========================================================================
+ * Structure used to pass context around among the routines.
+ *========================================================================*/
+
+typedef struct xfs_attr_list_context {
+	struct xfs_inode		*dp;	/* inode */
+	struct attrlist_cursor_kern	*cursor;/* position in list */
+	struct attrlist			*alist;	/* output buffer */
+	int				count;	/* num used entries */
+	int				dupcnt;	/* count dup hashvals seen */
+	int				bufsize;/* total buffer size */
+	int				firstu;	/* first used byte in buffer */
+	int				flags;	/* from VOP call */
+	int				resynch;/* T/F: resynch with cursor */
+} xfs_attr_list_context_t;
+
+/*
+ * Used to keep a list of "remote value" extents when unlinking an inode.
+ */
+typedef struct xfs_attr_inactive_list {
+	xfs_dablk_t	valueblk;	/* block number of value bytes */
+	int		valuelen;	/* number of bytes in value */
+} xfs_attr_inactive_list_t;
+
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Internal routines when dirsize < XFS_LITINO(mp).
+ */
+int	xfs_attr_shortform_create(struct xfs_da_args *args);
+int	xfs_attr_shortform_add(struct xfs_da_args *add);
+int	xfs_attr_shortform_lookup(struct xfs_da_args *args);
+int	xfs_attr_shortform_getvalue(struct xfs_da_args *args);
+int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int	xfs_attr_shortform_remove(struct xfs_da_args *remove);
+int	xfs_attr_shortform_list(struct xfs_attr_list_context *context);
+int	xfs_attr_shortform_replace(struct xfs_da_args *args);
+int	xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp);
+
+/*
+ * Internal routines when dirsize == XFS_LBSIZE(mp).
+ */
+int	xfs_attr_leaf_to_node(struct xfs_da_args *args);
+int	xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp,
+					  struct xfs_da_args *args);
+int	xfs_attr_leaf_clearflag(struct xfs_da_args *args);
+int	xfs_attr_leaf_setflag(struct xfs_da_args *args);
+int	xfs_attr_leaf_flipflags(xfs_da_args_t *args);
+
+/*
+ * Routines used for growing the Btree.
+ */
+int	xfs_attr_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block,
+				    struct xfs_dabuf **bpp);
+int	xfs_attr_leaf_split(struct xfs_da_state *state,
+				   struct xfs_da_state_blk *oldblk,
+				   struct xfs_da_state_blk *newblk);
+int	xfs_attr_leaf_lookup_int(struct xfs_dabuf *leaf,
+					struct xfs_da_args *args);
+int	xfs_attr_leaf_getvalue(struct xfs_dabuf *bp, struct xfs_da_args *args);
+int	xfs_attr_leaf_add(struct xfs_dabuf *leaf_buffer,
+				 struct xfs_da_args *args);
+int	xfs_attr_leaf_remove(struct xfs_dabuf *leaf_buffer,
+				    struct xfs_da_args *args);
+int	xfs_attr_leaf_list_int(struct xfs_dabuf *bp,
+				      struct xfs_attr_list_context *context);
+
+/*
+ * Routines used for shrinking the Btree.
+ */
+int	xfs_attr_leaf_toosmall(struct xfs_da_state *state, int *retval);
+void	xfs_attr_leaf_unbalance(struct xfs_da_state *state,
+				       struct xfs_da_state_blk *drop_blk,
+				       struct xfs_da_state_blk *save_blk);
+int	xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
+int	xfs_attr_node_inactive(struct xfs_trans **trans, struct xfs_inode *dp,
+				      struct xfs_dabuf *bp, int level);
+int	xfs_attr_leaf_inactive(struct xfs_trans **trans, struct xfs_inode *dp,
+				      struct xfs_dabuf *bp);
+int	xfs_attr_leaf_freextent(struct xfs_trans **trans, struct xfs_inode *dp,
+				       xfs_dablk_t blkno, int blkcnt);
+
+/*
+ * Utility routines.
+ */
+xfs_dahash_t	xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count);
+int	xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
+				   struct xfs_dabuf *leaf2_bp);
+int	xfs_attr_leaf_newentsize(struct xfs_da_args *args, int blocksize,
+					int *local);
+int	xfs_attr_leaf_entsize(struct xfs_attr_leafblock *leaf, int index);
+int	xfs_attr_put_listent(struct xfs_attr_list_context *context,
+				    char *name, int namelen, int valuelen);
+int	xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp);
+
+#endif	/* __XFS_ATTR_LEAF_H__ */
diff --git a/include/xfs_attr_sf.h b/include/xfs_attr_sf.h
new file mode 100644
index 000000000..c5106f87c
--- /dev/null
+++ b/include/xfs_attr_sf.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ATTR_SF_H__
+#define	__XFS_ATTR_SF_H__
+
+/*
+ * Attribute storage when stored inside the inode.
+ *
+ * Small attribute lists are packed as tightly as possible so as
+ * to fit into the literal area of the inode.
+ */
+
+struct xfs_inode;
+
+/*
+ * Entries are packed toward the top as tight as possible.
+ */
+typedef struct xfs_attr_shortform {
+	struct xfs_attr_sf_hdr {	/* constant-structure header block */
+		__uint16_t totsize;	/* total bytes in shortform list */
+		__uint8_t count;	/* count of active entries */
+	} hdr;
+	struct xfs_attr_sf_entry {
+		__uint8_t namelen;	/* actual length of name (no NULL) */
+		__uint8_t valuelen;	/* actual length of value (no NULL) */
+		__uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
+		__uint8_t nameval[1];	/* name & value bytes concatenated */
+	} list[1];			/* variable sized array */
+} xfs_attr_shortform_t;
+typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
+typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
+
+/*
+ * We generate this then sort it, attr_list() must return things in hash-order.
+ */
+typedef struct xfs_attr_sf_sort {
+	__uint8_t	entno;		/* entry number in original list */
+	__uint8_t	namelen;	/* length of name value (no null) */
+	__uint8_t	valuelen;	/* length of value */
+	xfs_dahash_t	hash;		/* this entry's hash value */
+	char		*name;		/* name value, pointer into buffer */
+} xfs_attr_sf_sort_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_ENTSIZE_BYNAME)
+int xfs_attr_sf_entsize_byname(int nlen, int vlen);
+#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen)	\
+	xfs_attr_sf_entsize_byname(nlen,vlen)
+#else
+#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen)	/* space name/value uses */ \
+	((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen))
+#endif
+#define XFS_ATTR_SF_ENTSIZE_MAX			/* max space for name&value */ \
+	((1 << (NBBY*(int)sizeof(__uint8_t))) - 1)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_ENTSIZE)
+int xfs_attr_sf_entsize(xfs_attr_sf_entry_t *sfep);
+#define XFS_ATTR_SF_ENTSIZE(sfep)	xfs_attr_sf_entsize(sfep)
+#else
+#define XFS_ATTR_SF_ENTSIZE(sfep)		/* space an entry uses */ \
+	((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_NEXTENTRY)
+xfs_attr_sf_entry_t *xfs_attr_sf_nextentry(xfs_attr_sf_entry_t *sfep);
+#define XFS_ATTR_SF_NEXTENTRY(sfep)	xfs_attr_sf_nextentry(sfep)
+#else
+#define XFS_ATTR_SF_NEXTENTRY(sfep)		/* next entry in struct */ \
+	((xfs_attr_sf_entry_t *) \
+		((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_TOTSIZE)
+int xfs_attr_sf_totsize(struct xfs_inode *dp);
+#define XFS_ATTR_SF_TOTSIZE(dp)		xfs_attr_sf_totsize(dp)
+#else
+#define XFS_ATTR_SF_TOTSIZE(dp)			/* total space in use */ \
+	(INT_GET(((xfs_attr_shortform_t *)((dp)->i_afp->if_u1.if_data))->hdr.totsize, ARCH_CONVERT))
+#endif
+
+#ifdef XFS_ALL_TRACE
+#define	XFS_ATTR_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef	XFS_ATTR_TRACE
+#endif
+
+/*
+ * Kernel tracing support for attribute lists
+ */
+struct xfs_attr_list_context;
+struct xfs_da_intnode;
+struct xfs_da_node_entry;
+struct xfs_attr_leafblock;
+
+#define	XFS_ATTR_TRACE_SIZE	4096	/* size of global trace buffer */     
+
+/*
+ * Trace record types.
+ */
+#define	XFS_ATTR_KTRACE_L_C	1	/* context */
+#define	XFS_ATTR_KTRACE_L_CN	2	/* context, node */
+#define	XFS_ATTR_KTRACE_L_CB	3	/* context, btree */
+#define	XFS_ATTR_KTRACE_L_CL	4	/* context, leaf */
+
+#if defined(XFS_ATTR_TRACE)
+
+void xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context);
+void xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
+			      struct xfs_da_intnode *node);
+void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
+			      struct xfs_da_node_entry *btree);
+void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
+			      struct xfs_attr_leafblock *leaf);
+void xfs_attr_trace_enter(int type, char *where,
+			     __psunsigned_t a2, __psunsigned_t a3,
+			     __psunsigned_t a4, __psunsigned_t a5,
+			     __psunsigned_t a6, __psunsigned_t a7,
+			     __psunsigned_t a8, __psunsigned_t a9,
+			     __psunsigned_t a10, __psunsigned_t a11,
+			     __psunsigned_t a12, __psunsigned_t a13,
+			     __psunsigned_t a14, __psunsigned_t a15);
+#else
+#define	xfs_attr_trace_l_c(w,c)
+#define	xfs_attr_trace_l_cn(w,c,n)
+#define	xfs_attr_trace_l_cb(w,c,b)
+#define	xfs_attr_trace_l_cl(w,c,l)
+#endif /* XFS_ATTR_TRACE */
+
+#endif	/* __XFS_ATTR_SF_H__ */
diff --git a/include/xfs_bit.h b/include/xfs_bit.h
new file mode 100644
index 000000000..80eccc5e5
--- /dev/null
+++ b/include/xfs_bit.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BIT_H__
+#define	__XFS_BIT_H__
+
+/*
+ * XFS bit manipulation routines.
+ */
+
+/*
+ * masks with n high/low bits set, 32-bit values & 64-bit values
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK32HI)
+__uint32_t xfs_mask32hi(int n);
+#define	XFS_MASK32HI(n)		xfs_mask32hi(n)
+#else
+#define	XFS_MASK32HI(n)		((__uint32_t)-1 << (32 - (n)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK64HI)
+__uint64_t xfs_mask64hi(int n);
+#define	XFS_MASK64HI(n)		xfs_mask64hi(n)
+#else
+#define	XFS_MASK64HI(n)		((__uint64_t)-1 << (64 - (n)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK32LO)
+__uint32_t xfs_mask32lo(int n);
+#define	XFS_MASK32LO(n)		xfs_mask32lo(n)
+#else
+#define	XFS_MASK32LO(n)		(((__uint32_t)1 << (n)) - 1)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK64LO)
+__uint64_t xfs_mask64lo(int n);
+#define	XFS_MASK64LO(n)		xfs_mask64lo(n)
+#else
+#define	XFS_MASK64LO(n)		(((__uint64_t)1 << (n)) - 1)
+#endif
+
+/*
+ * Index of low bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+extern const char xfs_lowbit[256];
+
+/*
+ * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+extern const char xfs_highbit[256];
+
+/*
+ * Count of bits set in byte, 0..8.
+ */
+extern const char xfs_countbit[256];
+
+/*
+ * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
+ */
+extern int xfs_lowbit32(__uint32_t v);
+
+/*
+ * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
+ */
+extern int xfs_highbit32(__uint32_t v);
+
+/*
+ * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
+ */
+extern int xfs_lowbit64(__uint64_t v);
+
+/*
+ * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
+ */
+extern int xfs_highbit64(__uint64_t);
+
+#endif	/* __XFS_BIT_H__ */
diff --git a/include/xfs_bmap.h b/include/xfs_bmap.h
new file mode 100644
index 000000000..fa1a9e45c
--- /dev/null
+++ b/include/xfs_bmap.h
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BMAP_H__
+#define	__XFS_BMAP_H__
+
+struct getbmap;
+struct xfs_bmbt_irec;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * List of extents to be free "later".
+ * The list is kept sorted on xbf_startblock.
+ */
+typedef struct xfs_bmap_free_item
+{
+	xfs_fsblock_t		xbfi_startblock;/* starting fs block number */
+	xfs_extlen_t		xbfi_blockcount;/* number of blocks in extent */
+	struct xfs_bmap_free_item *xbfi_next;	/* link to next entry */
+} xfs_bmap_free_item_t;
+
+/*
+ * Header for free extent list.
+ */
+typedef	struct xfs_bmap_free
+{
+	xfs_bmap_free_item_t	*xbf_first;	/* list of to-be-free extents */
+	int			xbf_count;	/* count of items on list */
+	int			xbf_low;	/* kludge: alloc in low mode */
+} xfs_bmap_free_t;
+
+#define	XFS_BMAP_MAX_NMAP	4
+
+/*
+ * Flags for xfs_bmapi
+ */
+#define	XFS_BMAPI_WRITE		0x001	/* write operation: allocate space */
+#define XFS_BMAPI_DELAY		0x002	/* delayed write operation */
+#define XFS_BMAPI_ENTIRE	0x004	/* return entire extent, not trimmed */
+#define XFS_BMAPI_METADATA	0x008	/* mapping metadata not user data */
+#define XFS_BMAPI_EXACT		0x010	/* allocate only to spec'd bounds */
+#define XFS_BMAPI_ATTRFORK	0x020	/* use attribute fork not data */
+#define XFS_BMAPI_ASYNC		0x040	/* bunmapi xactions can be async */
+#define XFS_BMAPI_RSVBLOCKS	0x080	/* OK to alloc. reserved data blocks */
+#define	XFS_BMAPI_PREALLOC	0x100	/* preallocation op: unwritten space */
+#define	XFS_BMAPI_IGSTATE	0x200	/* Ignore state - */
+					/* combine contig. space */
+#define	XFS_BMAPI_CONTIG	0x400	/* must allocate only one extent */
+#define XFS_BMAPI_DIRECT_IO	0x800	/* Flag from cxfs client, not used
+					 * by xfs directly. Indicates alloc
+					 * request is for direct I/O not
+					 * extent conversion by server */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAPI_AFLAG)
+int xfs_bmapi_aflag(int w);
+#define	XFS_BMAPI_AFLAG(w)	xfs_bmapi_aflag(w)
+#else
+#define	XFS_BMAPI_AFLAG(w)	((w) == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0)
+#endif
+
+/*
+ * Special values for xfs_bmbt_irec_t br_startblock field.
+ */
+#define	DELAYSTARTBLOCK		((xfs_fsblock_t)-1LL)
+#define	HOLESTARTBLOCK		((xfs_fsblock_t)-2LL)
+
+/*
+ * Trace operations for bmap extent tracing
+ */
+#define	XFS_BMAP_KTRACE_DELETE	1
+#define	XFS_BMAP_KTRACE_INSERT	2
+#define	XFS_BMAP_KTRACE_PRE_UP	3
+#define	XFS_BMAP_KTRACE_POST_UP	4
+
+#define	XFS_BMAP_TRACE_SIZE	4096	/* size of global trace buffer */
+#define	XFS_BMAP_KTRACE_SIZE	32	/* size of per-inode trace buffer */
+
+#if defined(XFS_ALL_TRACE)
+#define	XFS_BMAP_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef	XFS_BMAP_TRACE
+#endif
+
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_INIT)
+void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp);
+#define	XFS_BMAP_INIT(flp,fbp)	xfs_bmap_init(flp,fbp)
+#else
+#define	XFS_BMAP_INIT(flp,fbp)	\
+	((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \
+	 (flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK)
+#endif
+
+/*
+ * Argument structure for xfs_bmap_alloc.
+ */
+typedef struct xfs_bmalloca {
+	xfs_fsblock_t		firstblock; /* i/o first block allocated */
+	xfs_fsblock_t		rval;	/* starting block of new extent */
+	xfs_fileoff_t		off;	/* offset in file filling in */
+	struct xfs_trans	*tp;	/* transaction pointer */
+	struct xfs_inode	*ip;	/* incore inode pointer */
+	struct xfs_bmbt_irec	*prevp;	/* extent before the new one */
+	struct xfs_bmbt_irec	*gotp;	/* extent after, or delayed */
+	xfs_extlen_t		alen;	/* i/o length asked/allocated */
+	xfs_extlen_t		total;	/* total blocks needed for xaction */
+	xfs_extlen_t		minlen;	/* mininum allocation size (blocks) */
+	xfs_extlen_t		minleft; /* amount must be left after alloc */
+	int			eof;	/* set if allocating past last extent */
+	int			wasdel;	/* replacing a delayed allocation */
+	int			userdata;/* set if is user data */
+	int			low;	/* low on space, using seq'l ags */
+	int			aeof;   /* allocated space at eof */
+} xfs_bmalloca_t;
+
+#ifdef __KERNEL__
+/*
+ * Convert inode from non-attributed to attributed.
+ * Must not be in a transaction, ip must not be locked.
+ */
+int					/* error code */
+xfs_bmap_add_attrfork(
+	struct xfs_inode	*ip,	/* incore inode pointer */
+	int					rsvd);	/* flag for reserved block allocation */
+
+/*
+ * Add the extent to the list of extents to be free at transaction end.
+ * The list is maintained sorted (by block number).
+ */
+void
+xfs_bmap_add_free(
+	xfs_fsblock_t		bno,		/* fs block number of extent */
+	xfs_filblks_t		len,		/* length of extent */
+	xfs_bmap_free_t		*flist,		/* list of extents */
+	struct xfs_mount	*mp);		/* mount point structure */
+
+/*
+ * Routine to clean up the free list data structure when
+ * an error occurs during a transaction.
+ */
+void
+xfs_bmap_cancel(
+	xfs_bmap_free_t		*flist);	/* free list to clean up */
+
+/*
+ * Routine to check if a specified inode is swap capable.
+ */
+int
+xfs_bmap_check_swappable(
+	struct xfs_inode	*ip);		/* incore inode */
+
+/* 
+ * Compute and fill in the value of the maximum depth of a bmap btree
+ * in this filesystem.  Done once, during mount.
+ */
+void
+xfs_bmap_compute_maxlevels(
+	struct xfs_mount	*mp,	/* file system mount structure */
+	int			whichfork);	/* data or attr fork */
+
+/*
+ * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi 
+ * caller.  Frees all the extents that need freeing, which must be done
+ * last due to locking considerations.
+ *
+ * Return 1 if the given transaction was committed and a new one allocated,
+ * and 0 otherwise.
+ */
+int						/* error */
+xfs_bmap_finish(
+	struct xfs_trans	**tp,		/* transaction pointer addr */
+	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
+	xfs_fsblock_t		firstblock,	/* controlled a.g. for allocs */
+	int			*committed);	/* xact committed or not */
+
+/*
+ * Returns the file-relative block number of the first unused block in the file.
+ * This is the lowest-address hole if the file has holes, else the first block
+ * past the end of file.
+ */
+int						/* error */
+xfs_bmap_first_unused(
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode */
+	xfs_extlen_t		len,		/* size of hole to find */
+	xfs_fileoff_t		*unused,	/* unused block num */
+	int			whichfork);	/* data or attr fork */
+
+/*
+ * Returns the file-relative block number of the last block + 1 before
+ * last_block (input value) in the file.
+ * This is not based on i_size, it is based on the extent list.
+ * Returns 0 for local files, as they do not have an extent list.
+ */
+int						/* error */
+xfs_bmap_last_before(
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode */
+	xfs_fileoff_t		*last_block,	/* last block */
+	int			whichfork);	/* data or attr fork */
+
+/*
+ * Returns the file-relative block number of the first block past eof in
+ * the file.  This is not based on i_size, it is based on the extent list.
+ * Returns 0 for local files, as they do not have an extent list.
+ */
+int						/* error */
+xfs_bmap_last_offset(
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode */
+	xfs_fileoff_t		*unused,	/* last block num */
+	int			whichfork);	/* data or attr fork */
+
+/*
+ * Returns whether the selected fork of the inode has exactly one
+ * block or not.  For the data fork we check this matches di_size,
+ * implying the file's range is 0..bsize-1.
+ */
+int
+xfs_bmap_one_block(
+	struct xfs_inode	*ip,		/* incore inode */
+	int			whichfork);	/* data or attr fork */
+
+/*
+ * Read in the extents to iu_extents.
+ * All inode fields are set up by caller, we just traverse the btree
+ * and copy the records in.
+ */
+int						/* error */
+xfs_bmap_read_extents(
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode */
+	int			whichfork);	/* data or attr fork */
+
+#if defined(XFS_BMAP_TRACE)
+/*
+ * Add bmap trace insert entries for all the contents of the extent list.
+ */
+void
+xfs_bmap_trace_exlist(
+	char			*fname,		/* function name */
+	struct xfs_inode	*ip,		/* incore inode pointer */
+	xfs_extnum_t		cnt,		/* count of entries in list */
+	int			whichfork);	/* data or attr fork */
+#else
+#define	xfs_bmap_trace_exlist(f,ip,c,w)
+#endif
+
+/*
+ * Map file blocks to filesystem blocks.
+ * File range is given by the bno/len pair.
+ * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
+ * into a hole or past eof.
+ * Only allocates blocks from a single allocation group,
+ * to avoid locking problems.
+ * The returned value in "firstblock" from the first call in a transaction
+ * must be remembered and presented to subsequent calls in "firstblock".
+ * An upper bound for the number of blocks to be allocated is supplied to
+ * the first call in "total"; if no allocation group has that many free
+ * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). 
+ */
+int						/* error */
+xfs_bmapi(
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode */
+	xfs_fileoff_t		bno,		/* starting file offs. mapped */
+	xfs_filblks_t		len,		/* length to map in file */
+	int			flags,		/* XFS_BMAPI_... */
+	xfs_fsblock_t		*firstblock,	/* first allocated block
+						   controls a.g. for allocs */
+	xfs_extlen_t		total,		/* total blocks needed */
+	struct xfs_bmbt_irec	*mval,		/* output: map values */
+	int			*nmap,		/* i/o: mval size/count */
+	xfs_bmap_free_t		*flist);	/* i/o: list extents to free */
+
+/*
+ * Map file blocks to filesystem blocks, simple version.
+ * One block only, read-only.
+ * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
+ * For the other flag values, the effect is as if XFS_BMAPI_METADATA
+ * was set and all the others were clear.
+ */
+int						/* error */
+xfs_bmapi_single(
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode */
+	int			whichfork,	/* data or attr fork */
+	xfs_fsblock_t		*fsb,		/* output: mapped block */
+	xfs_fileoff_t		bno);		/* starting file offs. mapped */
+
+/*
+ * Unmap (remove) blocks from a file.
+ * If nexts is nonzero then the number of extents to remove is limited to
+ * that value.  If not all extents in the block range can be removed then
+ * *done is set.
+ */
+int						/* error */
+xfs_bunmapi(
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode */
+	xfs_fileoff_t		bno,		/* starting offset to unmap */
+	xfs_filblks_t		len,		/* length to unmap in file */
+	int			flags,		/* XFS_BMAPI_... */
+	xfs_extnum_t		nexts,		/* number of extents max */
+	xfs_fsblock_t		*firstblock,	/* first allocated block
+						   controls a.g. for allocs */
+	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
+	int			*done);		/* set if not done yet */
+
+/*
+ * Fcntl interface to xfs_bmapi.
+ */
+int						/* error code */
+xfs_getbmap(
+	bhv_desc_t		*bdp,		/* XFS behavior descriptor*/
+	struct getbmap		*bmv,		/* user bmap structure */
+	void			*ap,		/* pointer to user's array */
+	int			iflags);	/* interface flags */
+
+/*
+ * Check the last inode extent to determine whether this allocation will result
+ * in blocks being allocated at the end of the file. When we allocate new data
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ */
+int
+xfs_bmap_isaeof(
+        struct xfs_inode	*ip,
+        xfs_fileoff_t   	off,
+        int             	whichfork,
+        int             	*aeof);
+
+/*
+ * Check if the endoff is outside the last extent. If so the caller will grow 
+ * the allocation to a stripe unit boundary
+ */
+int
+xfs_bmap_eof(
+        struct xfs_inode        *ip,
+        xfs_fileoff_t           endoff,
+        int                     whichfork,
+        int                     *eof);
+
+/*
+ * Count fsblocks of the given fork.
+ */
+int
+xfs_bmap_count_blocks(
+	xfs_trans_t		*tp,
+	xfs_inode_t		*ip,
+	int			whichfork,
+	int			*count);
+
+/*
+ * Check an extent list, which has just been read, for
+ * any bit in the extent flag field.
+ */
+int
+xfs_check_nostate_extents(
+	xfs_bmbt_rec_t		*ep,
+	xfs_extnum_t		num);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_BMAP_H__ */
diff --git a/include/xfs_bmap_btree.h b/include/xfs_bmap_btree.h
new file mode 100644
index 000000000..af8ac671b
--- /dev/null
+++ b/include/xfs_bmap_btree.h
@@ -0,0 +1,661 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BMAP_BTREE_H__
+#define	__XFS_BMAP_BTREE_H__
+
+#define	XFS_BMAP_MAGIC	0x424d4150	/* 'BMAP' */
+
+struct xfs_btree_cur;
+struct xfs_btree_lblock;
+struct xfs_mount;
+struct xfs_inode;
+
+/*
+ * Bmap root header, on-disk form only.
+ */
+typedef struct xfs_bmdr_block
+{
+	__uint16_t	bb_level;	/* 0 is a leaf */
+	__uint16_t	bb_numrecs;	/* current # of data records */
+} xfs_bmdr_block_t;
+
+/*
+ * Bmap btree record and extent descriptor.
+ * For 32-bit kernels,
+ *  l0:31 is an extent flag (value 1 indicates non-normal).
+ *  l0:0-30 and l1:9-31 are startoff.
+ *  l1:0-8, l2:0-31, and l3:21-31 are startblock.
+ *  l3:0-20 are blockcount.
+ * For 64-bit kernels,
+ *  l0:63 is an extent flag (value 1 indicates non-normal).
+ *  l0:9-62 are startoff.
+ *  l0:0-8 and l1:21-63 are startblock.
+ *  l1:0-20 are blockcount.
+ */
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+#define	BMBT_TOTAL_BITLEN	128	/* 128 bits, 16 bytes */
+#define	BMBT_EXNTFLAG_BITOFF	0
+#define	BMBT_EXNTFLAG_BITLEN	1
+#define	BMBT_STARTOFF_BITOFF	(BMBT_EXNTFLAG_BITOFF + BMBT_EXNTFLAG_BITLEN)
+#define	BMBT_STARTOFF_BITLEN	54
+#define	BMBT_STARTBLOCK_BITOFF	(BMBT_STARTOFF_BITOFF + BMBT_STARTOFF_BITLEN)
+#define	BMBT_STARTBLOCK_BITLEN	52
+#define	BMBT_BLOCKCOUNT_BITOFF	\
+	(BMBT_STARTBLOCK_BITOFF + BMBT_STARTBLOCK_BITLEN)
+#define	BMBT_BLOCKCOUNT_BITLEN	(BMBT_TOTAL_BITLEN - BMBT_BLOCKCOUNT_BITOFF)
+
+#else
+
+#define	BMBT_TOTAL_BITLEN	128	/* 128 bits, 16 bytes */
+#define	BMBT_EXNTFLAG_BITOFF	63
+#define	BMBT_EXNTFLAG_BITLEN	1
+#define	BMBT_STARTOFF_BITOFF	(BMBT_EXNTFLAG_BITOFF - BMBT_STARTOFF_BITLEN)
+#define	BMBT_STARTOFF_BITLEN	54
+#define	BMBT_STARTBLOCK_BITOFF	85 /* 128 - 43 (other 9 is in first word) */
+#define	BMBT_STARTBLOCK_BITLEN	52
+#define	BMBT_BLOCKCOUNT_BITOFF	64 /* Start of second 64 bit container */
+#define	BMBT_BLOCKCOUNT_BITLEN	21
+
+#endif
+
+
+#define	BMBT_USE_64	1
+
+typedef struct xfs_bmbt_rec_32
+{
+	__uint32_t		l0, l1, l2, l3;
+} xfs_bmbt_rec_32_t;
+typedef struct xfs_bmbt_rec_64
+{
+	__uint64_t		l0, l1;
+} xfs_bmbt_rec_64_t;
+
+#if BMBT_USE_64
+typedef	__uint64_t	xfs_bmbt_rec_base_t;	/* use this for casts */
+typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t;
+#else	/* !BMBT_USE_64 */
+typedef	__uint32_t	xfs_bmbt_rec_base_t;	/* use this for casts */
+typedef xfs_bmbt_rec_32_t xfs_bmbt_rec_t, xfs_bmdr_rec_t;
+#endif	/* BMBT_USE_64 */
+
+/*
+ * Values and macros for delayed-allocation startblock fields.
+ */
+#define	STARTBLOCKVALBITS	17
+#define	STARTBLOCKMASKBITS	(15 + XFS_BIG_FILESYSTEMS * 20)
+#define	DSTARTBLOCKMASKBITS	(15 + 20)
+#define	STARTBLOCKMASK		\
+	(((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
+#define	DSTARTBLOCKMASK		\
+	(((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_ISNULLSTARTBLOCK)
+int isnullstartblock(xfs_fsblock_t x);
+#define	ISNULLSTARTBLOCK(x)	isnullstartblock(x)
+#else
+#define	ISNULLSTARTBLOCK(x)	(((x) & STARTBLOCKMASK) == STARTBLOCKMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_ISNULLDSTARTBLOCK)
+int isnulldstartblock(xfs_dfsbno_t x);
+#define	ISNULLDSTARTBLOCK(x)	isnulldstartblock(x)
+#else
+#define	ISNULLDSTARTBLOCK(x)	(((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_NULLSTARTBLOCK)
+xfs_fsblock_t nullstartblock(int k);
+#define	NULLSTARTBLOCK(k)	nullstartblock(k)
+#else
+#define	NULLSTARTBLOCK(k)	\
+	((ASSERT(k < (1 << STARTBLOCKVALBITS))), (STARTBLOCKMASK | (k)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_STARTBLOCKVAL)
+xfs_filblks_t startblockval(xfs_fsblock_t x);
+#define	STARTBLOCKVAL(x)	startblockval(x)
+#else
+#define	STARTBLOCKVAL(x)	((xfs_filblks_t)((x) & ~STARTBLOCKMASK))
+#endif
+
+/*
+ * Possible extent formats.
+ */
+typedef	enum {
+	XFS_EXTFMT_NOSTATE = 0,
+	XFS_EXTFMT_HASSTATE
+} xfs_exntfmt_t;
+
+/*
+ * Possible extent states.
+ */
+typedef	enum {
+	XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
+	XFS_EXT_DMAPI_OFFLINE
+} xfs_exntst_t;
+
+/*
+ * Extent state and extent format macros.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTFMT_INODE )
+xfs_exntfmt_t xfs_extfmt_inode(struct xfs_inode *ip);
+#define	XFS_EXTFMT_INODE(x)	xfs_extfmt_inode(x)
+#else
+#define	XFS_EXTFMT_INODE(x) \
+  (XFS_SB_VERSION_HASEXTFLGBIT(&((x)->i_mount->m_sb)) ? \
+	XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
+#endif
+#define	ISUNWRITTEN(x)		((x) == XFS_EXT_UNWRITTEN)
+
+/*
+ * Incore version of above.
+ */
+typedef struct xfs_bmbt_irec
+{
+	xfs_fileoff_t	br_startoff;	/* starting file offset */
+	xfs_fsblock_t	br_startblock;	/* starting block number */
+	xfs_filblks_t	br_blockcount;	/* number of blocks */
+	xfs_exntst_t	br_state;	/* extent state */
+} xfs_bmbt_irec_t;
+
+/*
+ * Key structure for non-leaf levels of the tree.
+ */
+typedef struct xfs_bmbt_key
+{
+	xfs_dfiloff_t	br_startoff;	/* starting file offset */
+} xfs_bmbt_key_t, xfs_bmdr_key_t;
+
+typedef xfs_dfsbno_t xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;	/* btree pointer type */
+					/* btree block header type */
+typedef	struct xfs_btree_lblock xfs_bmbt_block_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_BMBT_BLOCK)
+xfs_bmbt_block_t *xfs_buf_to_bmbt_block(struct xfs_buf *bp);
+#define	XFS_BUF_TO_BMBT_BLOCK(bp)		xfs_buf_to_bmbt_block(bp)
+#else
+#define	XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_RBLOCK_DSIZE)
+int xfs_bmap_rblock_dsize(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_RBLOCK_DSIZE(lev,cur)		xfs_bmap_rblock_dsize(lev,cur)
+#else
+#define	XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_RBLOCK_ISIZE)
+int xfs_bmap_rblock_isize(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_RBLOCK_ISIZE(lev,cur)		xfs_bmap_rblock_isize(lev,cur)
+#else
+#define	XFS_BMAP_RBLOCK_ISIZE(lev,cur) \
+	((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \
+			    (cur)->bc_private.b.whichfork)->if_broot_bytes)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_IBLOCK_SIZE)
+int xfs_bmap_iblock_size(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_IBLOCK_SIZE(lev,cur) 		xfs_bmap_iblock_size(lev,cur)
+#else
+#define	XFS_BMAP_IBLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DSIZE)
+int xfs_bmap_block_dsize(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_BLOCK_DSIZE(lev,cur)		xfs_bmap_block_dsize(lev,cur)
+#else
+#define	XFS_BMAP_BLOCK_DSIZE(lev,cur) \
+	((lev) == (cur)->bc_nlevels - 1 ? \
+		XFS_BMAP_RBLOCK_DSIZE(lev,cur) : \
+		XFS_BMAP_IBLOCK_SIZE(lev,cur))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_ISIZE)
+int xfs_bmap_block_isize(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_BLOCK_ISIZE(lev,cur)		xfs_bmap_block_isize(lev,cur)
+#else
+#define	XFS_BMAP_BLOCK_ISIZE(lev,cur) \
+	((lev) == (cur)->bc_nlevels - 1 ? \
+		XFS_BMAP_RBLOCK_ISIZE(lev,cur) : \
+		XFS_BMAP_IBLOCK_SIZE(lev,cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DMAXRECS)
+int xfs_bmap_block_dmaxrecs(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_BLOCK_DMAXRECS(lev,cur)	xfs_bmap_block_dmaxrecs(lev,cur)
+#else
+#define	XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \
+	((lev) == (cur)->bc_nlevels - 1 ? \
+		XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
+			xfs_bmdr, (lev) == 0) : \
+		((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_IMAXRECS)
+int xfs_bmap_block_imaxrecs(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_BLOCK_IMAXRECS(lev,cur)	xfs_bmap_block_imaxrecs(lev,cur)
+#else
+#define	XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \
+	((lev) == (cur)->bc_nlevels - 1 ? \
+		XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur), \
+			xfs_bmbt, (lev) == 0) : \
+		((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DMINRECS)
+int xfs_bmap_block_dminrecs(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_BLOCK_DMINRECS(lev,cur)	xfs_bmap_block_dminrecs(lev,cur)
+#else
+#define	XFS_BMAP_BLOCK_DMINRECS(lev,cur) \
+	((lev) == (cur)->bc_nlevels - 1 ? \
+		XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
+			xfs_bmdr, (lev) == 0) : \
+		((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_IMINRECS)
+int xfs_bmap_block_iminrecs(int lev, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_BLOCK_IMINRECS(lev,cur)	xfs_bmap_block_iminrecs(lev,cur)
+#else
+#define	XFS_BMAP_BLOCK_IMINRECS(lev,cur) \
+	((lev) == (cur)->bc_nlevels - 1 ? \
+		XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur), \
+			xfs_bmbt, (lev) == 0) : \
+		((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_REC_DADDR)
+xfs_bmbt_rec_t *
+xfs_bmap_rec_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_REC_DADDR(bb,i,cur)		xfs_bmap_rec_daddr(bb,i,cur)
+#else
+#define	XFS_BMAP_REC_DADDR(bb,i,cur) \
+	XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_DSIZE(		\
+		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
+		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
+			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_REC_IADDR)
+xfs_bmbt_rec_t *
+xfs_bmap_rec_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_REC_IADDR(bb,i,cur)		xfs_bmap_rec_iaddr(bb,i,cur)
+#else
+#define	XFS_BMAP_REC_IADDR(bb,i,cur) \
+	XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_ISIZE(		\
+		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
+		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
+			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_KEY_DADDR)
+xfs_bmbt_key_t *
+xfs_bmap_key_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_KEY_DADDR(bb,i,cur)		xfs_bmap_key_daddr(bb,i,cur)
+#else
+#define	XFS_BMAP_KEY_DADDR(bb,i,cur) \
+	XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_DSIZE(		\
+		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
+		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
+			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_KEY_IADDR)
+xfs_bmbt_key_t *
+xfs_bmap_key_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_KEY_IADDR(bb,i,cur)		xfs_bmap_key_iaddr(bb,i,cur)
+#else
+#define	XFS_BMAP_KEY_IADDR(bb,i,cur) \
+	XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_ISIZE(		\
+		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
+		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
+			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_PTR_DADDR)
+xfs_bmbt_ptr_t *
+xfs_bmap_ptr_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_PTR_DADDR(bb,i,cur)		xfs_bmap_ptr_daddr(bb,i,cur)
+#else
+#define	XFS_BMAP_PTR_DADDR(bb,i,cur) \
+	XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_DSIZE(		\
+		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
+		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(	\
+			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_PTR_IADDR)
+xfs_bmbt_ptr_t *
+xfs_bmap_ptr_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_BMAP_PTR_IADDR(bb,i,cur)		xfs_bmap_ptr_iaddr(bb,i,cur)
+#else
+#define	XFS_BMAP_PTR_IADDR(bb,i,cur) \
+	XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_ISIZE(		\
+		INT_GET((bb)->bb_level, ARCH_CONVERT), cur),	\
+		xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(	\
+			INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+
+/*
+ * These are to be used when we know the size of the block and
+ * we don't have a cursor.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_REC_ADDR)
+xfs_bmbt_rec_t *xfs_bmap_broot_rec_addr(xfs_bmbt_block_t *bb, int i, int sz);
+#define	XFS_BMAP_BROOT_REC_ADDR(bb,i,sz)	xfs_bmap_broot_rec_addr(bb,i,sz)
+#else
+#define	XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \
+	XFS_BTREE_REC_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_KEY_ADDR)
+xfs_bmbt_key_t *xfs_bmap_broot_key_addr(xfs_bmbt_block_t *bb, int i, int sz);
+#define	XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz)	xfs_bmap_broot_key_addr(bb,i,sz)
+#else
+#define	XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
+	XFS_BTREE_KEY_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_PTR_ADDR)
+xfs_bmbt_ptr_t *xfs_bmap_broot_ptr_addr(xfs_bmbt_block_t *bb, int i, int sz);
+#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz)	xfs_bmap_broot_ptr_addr(bb,i,sz)
+#else
+#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
+	XFS_BTREE_PTR_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_NUMRECS)
+int xfs_bmap_broot_numrecs(xfs_bmdr_block_t *bb);
+#define	XFS_BMAP_BROOT_NUMRECS(bb)		xfs_bmap_broot_numrecs(bb)
+#else
+#define	XFS_BMAP_BROOT_NUMRECS(bb) (INT_GET((bb)->bb_numrecs, ARCH_CONVERT))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_MAXRECS)
+int xfs_bmap_broot_maxrecs(int sz);
+#define	XFS_BMAP_BROOT_MAXRECS(sz)		xfs_bmap_broot_maxrecs(sz)
+#else
+#define	XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_SPACE_CALC)
+int xfs_bmap_broot_space_calc(int nrecs);
+#define	XFS_BMAP_BROOT_SPACE_CALC(nrecs)	xfs_bmap_broot_space_calc(nrecs)
+#else
+#define	XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
+	((int)(sizeof(xfs_bmbt_block_t) + \
+	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_SPACE)
+int xfs_bmap_broot_space(xfs_bmdr_block_t *bb);
+#define	XFS_BMAP_BROOT_SPACE(bb)		xfs_bmap_broot_space(bb)
+#else
+#define	XFS_BMAP_BROOT_SPACE(bb) \
+	XFS_BMAP_BROOT_SPACE_CALC(INT_GET((bb)->bb_numrecs, ARCH_CONVERT))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMDR_SPACE_CALC)
+int xfs_bmdr_space_calc(int nrecs);
+#define	XFS_BMDR_SPACE_CALC(nrecs)		xfs_bmdr_space_calc(nrecs)
+#else
+#define	XFS_BMDR_SPACE_CALC(nrecs)	\
+	((int)(sizeof(xfs_bmdr_block_t) + \
+	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))))
+#endif
+
+/*
+ * Maximum number of bmap btree levels.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BM_MAXLEVELS)
+int xfs_bm_maxlevels(struct xfs_mount *mp, int w);
+#define	XFS_BM_MAXLEVELS(mp,w)			xfs_bm_maxlevels(mp,w)
+#else
+#define	XFS_BM_MAXLEVELS(mp,w)		((mp)->m_bm_maxlevels[w])
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_SANITY_CHECK)
+int xfs_bmap_sanity_check(struct xfs_mount *mp, xfs_bmbt_block_t *bb,
+	int level);
+#define	XFS_BMAP_SANITY_CHECK(mp,bb,level)	\
+	xfs_bmap_sanity_check(mp,bb,level)
+#else
+#define	XFS_BMAP_SANITY_CHECK(mp,bb,level)	\
+	(INT_GET((bb)->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC && \
+	 INT_GET((bb)->bb_level, ARCH_CONVERT) == level && \
+	 INT_GET((bb)->bb_numrecs, ARCH_CONVERT) > 0 &&	\
+	 INT_GET((bb)->bb_numrecs, ARCH_CONVERT) <= (mp)->m_bmap_dmxr[(level) != 0])
+#endif
+
+/*
+ * Trace buffer entry types.
+ */
+#define	XFS_BMBT_KTRACE_ARGBI	1
+#define	XFS_BMBT_KTRACE_ARGBII	2
+#define	XFS_BMBT_KTRACE_ARGFFFI	3
+#define	XFS_BMBT_KTRACE_ARGI	4
+#define	XFS_BMBT_KTRACE_ARGIFK	5
+#define	XFS_BMBT_KTRACE_ARGIFR	6
+#define	XFS_BMBT_KTRACE_ARGIK	7
+#define	XFS_BMBT_KTRACE_CUR	8
+
+#define	XFS_BMBT_TRACE_SIZE	4096	/* size of global trace buffer */     
+#define	XFS_BMBT_KTRACE_SIZE	32	/* size of per-inode trace buffer */
+
+#if defined(XFS_ALL_TRACE)
+#define	XFS_BMBT_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_BMBT_TRACE
+#endif
+
+
+/*
+ * Prototypes for xfs_bmap.c to call.
+ */
+
+void
+xfs_bmdr_to_bmbt(
+	xfs_bmdr_block_t *,
+	int,
+	xfs_bmbt_block_t *,
+	int);
+
+int
+xfs_bmbt_decrement(
+	struct xfs_btree_cur *,
+	int,
+	int *);
+
+int
+xfs_bmbt_delete(
+	struct xfs_btree_cur *,
+	int,
+	int *);	       
+
+void
+xfs_bmbt_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t	*s);
+
+xfs_bmbt_block_t *
+xfs_bmbt_get_block(
+	struct xfs_btree_cur	*cur,
+	int			level,
+	struct xfs_buf		**bpp);
+
+xfs_filblks_t
+xfs_bmbt_get_blockcount(
+	xfs_bmbt_rec_t	*r);
+
+xfs_fsblock_t
+xfs_bmbt_get_startblock(
+	xfs_bmbt_rec_t	*r);
+
+xfs_fileoff_t
+xfs_bmbt_get_startoff(
+	xfs_bmbt_rec_t	*r);
+
+xfs_exntst_t
+xfs_bmbt_get_state(
+	xfs_bmbt_rec_t	*r);
+
+int
+xfs_bmbt_increment(
+	struct xfs_btree_cur *,
+	int,
+	int *);
+
+int
+xfs_bmbt_insert(
+	struct xfs_btree_cur *,
+	int *);	       
+
+int
+xfs_bmbt_insert_many(
+	struct xfs_btree_cur *,
+	int,
+	xfs_bmbt_rec_t *,
+	int *);	       
+
+void
+xfs_bmbt_log_block(
+	struct xfs_btree_cur *,
+	struct xfs_buf *,
+	int);
+
+void
+xfs_bmbt_log_recs(
+	struct xfs_btree_cur *,
+	struct xfs_buf *,
+	int,
+	int);
+
+int
+xfs_bmbt_lookup_eq(
+	struct xfs_btree_cur *,
+	xfs_fileoff_t,
+	xfs_fsblock_t,
+	xfs_filblks_t,
+	int *);
+
+int
+xfs_bmbt_lookup_ge(
+	struct xfs_btree_cur *,
+	xfs_fileoff_t,
+	xfs_fsblock_t,
+	xfs_filblks_t,
+	int *);
+
+int
+xfs_bmbt_lookup_le(
+	struct xfs_btree_cur *,
+	xfs_fileoff_t,
+	xfs_fsblock_t,
+	xfs_filblks_t,
+	int *);
+
+/*
+ * Give the bmap btree a new root block.  Copy the old broot contents
+ * down into a real block and make the broot point to it.
+ */
+int						/* error */
+xfs_bmbt_newroot(
+	struct xfs_btree_cur	*cur,		/* btree cursor */
+	int			*logflags,	/* logging flags for inode */
+	int			*stat);		/* return status - 0 fail */
+
+void
+xfs_bmbt_set_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t	*s);
+
+void
+xfs_bmbt_set_allf(
+	xfs_bmbt_rec_t	*r,
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b,
+	xfs_filblks_t	c,
+	xfs_exntst_t	v);
+
+void
+xfs_bmbt_set_blockcount(
+	xfs_bmbt_rec_t	*r,
+	xfs_filblks_t	v);
+
+void
+xfs_bmbt_set_startblock(
+	xfs_bmbt_rec_t	*r,
+	xfs_fsblock_t	v);
+
+void
+xfs_bmbt_set_startoff(
+	xfs_bmbt_rec_t	*r,
+	xfs_fileoff_t	v);
+
+void
+xfs_bmbt_set_state(
+	xfs_bmbt_rec_t	*r,
+	xfs_exntst_t	v);
+
+void
+xfs_bmbt_to_bmdr(
+	xfs_bmbt_block_t *,
+	int,
+	xfs_bmdr_block_t *,
+	int);
+
+int
+xfs_bmbt_update(
+	struct xfs_btree_cur *,
+	xfs_fileoff_t,
+	xfs_fsblock_t,
+	xfs_filblks_t,
+	xfs_exntst_t);
+
+#ifdef XFSDEBUG
+/* 
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_bmbt_get_rec(
+	struct xfs_btree_cur *,
+	xfs_fileoff_t *,
+	xfs_fsblock_t *,
+	xfs_filblks_t *,
+	xfs_exntst_t *,
+	int *);
+#endif
+
+
+/*
+ * Search an extent list for the extent which includes block
+ * bno.
+ */
+xfs_bmbt_rec_t *
+xfs_bmap_do_search_extents(
+        xfs_bmbt_rec_t *,
+        xfs_extnum_t,
+        xfs_extnum_t,
+        xfs_fileoff_t,
+        int *,
+        xfs_extnum_t *,
+        xfs_bmbt_irec_t	*,
+        xfs_bmbt_irec_t	*);
+
+
+#endif	/* __XFS_BMAP_BTREE_H__ */
diff --git a/include/xfs_btree.h b/include/xfs_btree.h
new file mode 100644
index 000000000..6f00a8c68
--- /dev/null
+++ b/include/xfs_btree.h
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BTREE_H__
+#define	__XFS_BTREE_H__
+
+struct xfs_buf;
+struct xfs_bmap_free;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * This nonsense is to make -wlint happy.
+ */
+#define	XFS_LOOKUP_EQ	((xfs_lookup_t)XFS_LOOKUP_EQi)
+#define	XFS_LOOKUP_LE	((xfs_lookup_t)XFS_LOOKUP_LEi)
+#define	XFS_LOOKUP_GE	((xfs_lookup_t)XFS_LOOKUP_GEi)
+
+#define	XFS_BTNUM_BNO	((xfs_btnum_t)XFS_BTNUM_BNOi)
+#define	XFS_BTNUM_CNT	((xfs_btnum_t)XFS_BTNUM_CNTi)
+#define	XFS_BTNUM_BMAP	((xfs_btnum_t)XFS_BTNUM_BMAPi)
+#define	XFS_BTNUM_INO	((xfs_btnum_t)XFS_BTNUM_INOi)
+
+/*
+ * Short form header: space allocation btrees.
+ */
+typedef struct xfs_btree_sblock
+{
+	__uint32_t	bb_magic;	/* magic number for block type */
+	__uint16_t	bb_level;	/* 0 is a leaf */
+	__uint16_t	bb_numrecs;	/* current # of data records */
+	xfs_agblock_t	bb_leftsib;	/* left sibling block or NULLAGBLOCK */
+	xfs_agblock_t	bb_rightsib;	/* right sibling block or NULLAGBLOCK */
+} xfs_btree_sblock_t;
+
+/*
+ * Long form header: bmap btrees.
+ */
+typedef struct xfs_btree_lblock
+{
+	__uint32_t	bb_magic;	/* magic number for block type */
+	__uint16_t	bb_level;	/* 0 is a leaf */
+	__uint16_t	bb_numrecs;	/* current # of data records */
+	xfs_dfsbno_t	bb_leftsib;	/* left sibling block or NULLDFSBNO */
+	xfs_dfsbno_t	bb_rightsib;	/* right sibling block or NULLDFSBNO */
+} xfs_btree_lblock_t;
+
+/*
+ * Combined header and structure, used by common code.
+ */
+typedef struct xfs_btree_hdr
+{
+	__uint32_t	bb_magic;	/* magic number for block type */
+	__uint16_t	bb_level;	/* 0 is a leaf */
+	__uint16_t	bb_numrecs;	/* current # of data records */
+} xfs_btree_hdr_t;
+
+typedef struct xfs_btree_block
+{
+	xfs_btree_hdr_t	bb_h;		/* header */
+	union		{
+		struct	{
+			xfs_agblock_t	bb_leftsib;
+			xfs_agblock_t	bb_rightsib;
+		}	s;		/* short form pointers */
+		struct	{
+			xfs_dfsbno_t	bb_leftsib;
+			xfs_dfsbno_t	bb_rightsib;
+		}	l;		/* long form pointers */
+	}		bb_u;		/* rest */
+} xfs_btree_block_t;
+
+/*
+ * For logging record fields.
+ */
+#define	XFS_BB_MAGIC		0x01
+#define	XFS_BB_LEVEL		0x02
+#define	XFS_BB_NUMRECS		0x04
+#define	XFS_BB_LEFTSIB		0x08
+#define	XFS_BB_RIGHTSIB		0x10
+#define	XFS_BB_NUM_BITS		5
+#define	XFS_BB_ALL_BITS		((1 << XFS_BB_NUM_BITS) - 1)
+
+/*
+ * Boolean to select which form of xfs_btree_block_t.bb_u to use.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BTREE_LONG_PTRS)
+int xfs_btree_long_ptrs(xfs_btnum_t btnum);
+#define	XFS_BTREE_LONG_PTRS(btnum)	((btnum) == XFS_BTNUM_BMAP)
+#else
+#define	XFS_BTREE_LONG_PTRS(btnum)	((btnum) == XFS_BTNUM_BMAP)
+#endif
+
+/*
+ * Magic numbers for btree blocks.
+ */
+extern const __uint32_t	xfs_magics[];
+
+/*
+ * Maximum and minimum records in a btree block.
+ * Given block size, type prefix, and leaf flag (0 or 1).
+ * The divisor below is equivalent to lf ? (e1) : (e2) but that produces
+ * compiler warnings.
+ */
+#define	XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf)	\
+	((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \
+	 (((lf) * (uint)sizeof(t ## _rec_t)) + \
+	  ((1 - (lf)) * \
+	   ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t))))))
+#define	XFS_BTREE_BLOCK_MINRECS(bsz,t,lf)	\
+	(XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2)
+
+/*
+ * Record, key, and pointer address calculation macros.
+ * Given block size, type prefix, block pointer, and index of requested entry
+ * (first entry numbered 1).
+ */
+#define	XFS_BTREE_REC_ADDR(bsz,t,bb,i,mxr)	\
+	((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \
+	 ((i) - 1) * sizeof(t ## _rec_t)))
+#define	XFS_BTREE_KEY_ADDR(bsz,t,bb,i,mxr)	\
+	((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \
+	 ((i) - 1) * sizeof(t ## _key_t)))
+#define	XFS_BTREE_PTR_ADDR(bsz,t,bb,i,mxr)	\
+	((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \
+	 (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t)))
+
+#define	XFS_BTREE_MAXLEVELS	8	/* max of all btrees */
+
+/*
+ * Btree cursor structure.
+ * This collects all information needed by the btree code in one place.
+ */
+typedef struct xfs_btree_cur
+{
+	struct xfs_trans	*bc_tp;	/* transaction we're in, if any */
+	struct xfs_mount	*bc_mp;	/* file system mount struct */
+	union {
+		xfs_alloc_rec_t		a;
+		xfs_bmbt_irec_t		b;
+		xfs_inobt_rec_t		i;
+	}		bc_rec;		/* current insert/search record value */
+	struct xfs_buf	*bc_bufs[XFS_BTREE_MAXLEVELS];	/* buf ptr per level */
+	int		bc_ptrs[XFS_BTREE_MAXLEVELS];	/* key/record # */
+	__uint8_t	bc_ra[XFS_BTREE_MAXLEVELS];	/* readahead bits */
+#define	XFS_BTCUR_LEFTRA	1	/* left sibling has been read-ahead */
+#define	XFS_BTCUR_RIGHTRA	2	/* right sibling has been read-ahead */
+	__uint8_t	bc_nlevels;	/* number of levels in the tree */
+	__uint8_t	bc_blocklog;	/* log2(blocksize) of btree blocks */
+	xfs_btnum_t	bc_btnum;	/* identifies which btree type */
+	union {
+		struct {			/* needed for BNO, CNT */
+			struct xfs_buf	*agbp;	/* agf buffer pointer */
+			xfs_agnumber_t	agno;	/* ag number */
+		} a;
+		struct {			/* needed for BMAP */
+			struct xfs_inode *ip;	/* pointer to our inode */
+			struct xfs_bmap_free *flist;	/* list to free after */
+			xfs_fsblock_t	firstblock;	/* 1st blk allocated */
+			int		allocated;	/* count of alloced */
+			short		forksize;	/* fork's inode space */
+			char		whichfork;	/* data or attr fork */
+			char		flags;		/* flags */
+#define	XFS_BTCUR_BPRV_WASDEL	1			/* was delayed */
+		} b;
+		struct {			/* needed for INO */
+			struct xfs_buf	*agbp;	/* agi buffer pointer */
+			xfs_agnumber_t	agno;	/* ag number */
+		} i;
+	}		bc_private;	/* per-btree type data */
+} xfs_btree_cur_t;
+
+#define	XFS_BTREE_NOERROR	0
+#define	XFS_BTREE_ERROR		1
+
+/*
+ * Convert from buffer to btree block header.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_BLOCK)
+xfs_btree_block_t *xfs_buf_to_block(struct xfs_buf *bp);
+#define	XFS_BUF_TO_BLOCK(bp)	xfs_buf_to_block(bp)
+#else
+#define	XFS_BUF_TO_BLOCK(bp)	((xfs_btree_block_t *)(XFS_BUF_PTR(bp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_LBLOCK)
+xfs_btree_lblock_t *xfs_buf_to_lblock(struct xfs_buf *bp);
+#define	XFS_BUF_TO_LBLOCK(bp)	xfs_buf_to_lblock(bp)
+#else
+#define	XFS_BUF_TO_LBLOCK(bp)	((xfs_btree_lblock_t *)(XFS_BUF_PTR(bp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_SBLOCK)
+xfs_btree_sblock_t *xfs_buf_to_sblock(struct xfs_buf *bp);
+#define	XFS_BUF_TO_SBLOCK(bp)	xfs_buf_to_sblock(bp)
+#else
+#define	XFS_BUF_TO_SBLOCK(bp)	((xfs_btree_sblock_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+#ifdef __KERNEL__
+
+#ifdef DEBUG
+/*
+ * Debug routine: check that block header is ok.
+ */
+void
+xfs_btree_check_block(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_btree_block_t	*block,	/* generic btree block pointer */
+	int			level,	/* level of the btree block */
+	struct xfs_buf		*bp);	/* buffer containing block, if any */
+
+/*
+ * Debug routine: check that keys are in the right order.
+ */
+void
+xfs_btree_check_key(
+	xfs_btnum_t		btnum,	/* btree identifier */
+	void			*ak1,	/* pointer to left (lower) key */
+	void			*ak2);	/* pointer to right (higher) key */
+
+/*
+ * Debug routine: check that records are in the right order.
+ */
+void
+xfs_btree_check_rec(
+	xfs_btnum_t		btnum,	/* btree identifier */
+	void			*ar1,	/* pointer to left (lower) record */
+	void			*ar2);	/* pointer to right (higher) record */
+#else
+#define	xfs_btree_check_block(a,b,c,d)
+#define	xfs_btree_check_key(a,b,c)
+#define	xfs_btree_check_rec(a,b,c)
+#endif	/* DEBUG */
+
+/*
+ * Checking routine: check that long form block header is ok.
+ */
+int					/* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lblock(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_btree_lblock_t	*block,	/* btree long form block pointer */
+	int			level,	/* level of the btree block */
+	struct xfs_buf		*bp);	/* buffer containing block, if any */
+
+/*
+ * Checking routine: check that (long) pointer is ok.
+ */
+int					/* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lptr(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_dfsbno_t		ptr,	/* btree block disk address */
+	int			level);	/* btree block level */
+
+/*
+ * Checking routine: check that short form block header is ok.
+ */
+int					/* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sblock(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_btree_sblock_t	*block,	/* btree short form block pointer */
+	int			level,	/* level of the btree block */
+	struct xfs_buf		*bp);	/* buffer containing block */
+
+/*
+ * Checking routine: check that (short) pointer is ok.
+ */
+int					/* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sptr(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_agblock_t		ptr,	/* btree block disk address */
+	int			level);	/* btree block level */
+
+/*
+ * Delete the btree cursor.
+ */
+void
+xfs_btree_del_cursor(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			error);	/* del because of error */
+
+/*
+ * Duplicate the btree cursor.
+ * Allocate a new one, copy the record, re-get the buffers.
+ */
+int					/* error */
+xfs_btree_dup_cursor(
+	xfs_btree_cur_t		*cur,	/* input cursor */
+	xfs_btree_cur_t		**ncur);/* output cursor */
+
+/*
+ * Change the cursor to point to the first record in the current block
+ * at the given level.  Other levels are unaffected.
+ */
+int					/* success=1, failure=0 */
+xfs_btree_firstrec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level);	/* level to change */
+
+/*
+ * Retrieve the block pointer from the cursor at the given level.
+ * This may be a bmap btree root or from a buffer.
+ */
+xfs_btree_block_t *			/* generic btree block pointer */
+xfs_btree_get_block(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level in btree */
+	struct xfs_buf		**bpp);	/* buffer containing the block */
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Long-form addressing.
+ */
+struct xfs_buf *				/* buffer for fsbno */
+xfs_btree_get_bufl(
+	struct xfs_mount	*mp,	/* file system mount point */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_fsblock_t		fsbno,	/* file system block number */
+	uint			lock);	/* lock flags for get_buf */
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Short-form addressing.
+ */
+struct xfs_buf *				/* buffer for agno/agbno */
+xfs_btree_get_bufs(
+	struct xfs_mount	*mp,	/* file system mount point */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_agnumber_t		agno,	/* allocation group number */
+	xfs_agblock_t		agbno,	/* allocation group block number */
+	uint			lock);	/* lock flags for get_buf */
+
+/* 
+ * Allocate a new btree cursor.
+ * The cursor is either for allocation (A) or bmap (B).
+ */
+xfs_btree_cur_t *			/* new btree cursor */
+xfs_btree_init_cursor(
+	struct xfs_mount	*mp,	/* file system mount point */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	struct xfs_buf		*agbp,	/* (A only) buffer for agf structure */
+	xfs_agnumber_t		agno,	/* (A only) allocation group number */
+	xfs_btnum_t		btnum,	/* btree identifier */
+	struct xfs_inode	*ip,	/* (B only) inode owning the btree */
+	int			whichfork); /* (B only) data/attr fork */
+
+/*
+ * Check for the cursor referring to the last block at the given level.
+ */
+int					/* 1=is last block, 0=not last block */
+xfs_btree_islastblock(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level);	/* level to check */
+
+/*
+ * Change the cursor to point to the last record in the current block
+ * at the given level.  Other levels are unaffected.
+ */
+int					/* success=1, failure=0 */
+xfs_btree_lastrec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level);	/* level to change */
+
+/*
+ * Compute first and last byte offsets for the fields given.
+ * Interprets the offsets table, which contains struct field offsets.
+ */
+void
+xfs_btree_offsets(
+	__int64_t		fields,	/* bitmask of fields */
+	const short		*offsets,/* table of field offsets */
+	int			nbits,	/* number of bits to inspect */
+	int			*first,	/* output: first byte offset */
+	int			*last);	/* output: last byte offset */
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Long-form addressing.
+ */
+int					/* error */
+xfs_btree_read_bufl(
+	struct xfs_mount	*mp,	/* file system mount point */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_fsblock_t		fsbno,	/* file system block number */
+	uint			lock,	/* lock flags for read_buf */
+	struct xfs_buf		**bpp,	/* buffer for fsbno */
+	int			refval);/* ref count value for buffer */
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Short-form addressing.
+ */
+int					/* error */
+xfs_btree_read_bufs(
+	struct xfs_mount	*mp,	/* file system mount point */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_agnumber_t		agno,	/* allocation group number */
+	xfs_agblock_t		agbno,	/* allocation group block number */
+	uint			lock,	/* lock flags for read_buf */
+	struct xfs_buf		**bpp,	/* buffer for agno/agbno */
+	int			refval);/* ref count value for buffer */
+
+/*
+ * Read-ahead the block, don't wait for it, don't return a buffer.
+ * Long-form addressing.
+ */
+void					/* error */
+xfs_btree_reada_bufl(
+	struct xfs_mount	*mp,	/* file system mount point */
+	xfs_fsblock_t		fsbno,	/* file system block number */
+	xfs_extlen_t		count);	/* count of filesystem blocks */
+
+/*
+ * Read-ahead the block, don't wait for it, don't return a buffer.
+ * Short-form addressing.
+ */
+void					/* error */
+xfs_btree_reada_bufs(
+	struct xfs_mount	*mp,	/* file system mount point */
+	xfs_agnumber_t		agno,	/* allocation group number */
+	xfs_agblock_t		agbno,	/* allocation group block number */
+	xfs_extlen_t		count);	/* count of filesystem blocks */
+
+/*
+ * Read-ahead btree blocks, at the given level.
+ * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
+ */
+int					/* readahead block count */
+xfs_btree_readahead(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			lev,	/* level in btree */
+	int			lr);	/* left/right bits */
+/*
+ * Set the buffer for level "lev" in the cursor to bp, releasing
+ * any previous buffer.
+ */
+void
+xfs_btree_setbuf(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			lev,	/* level in btree */
+	struct xfs_buf		*bp);	/* new buffer to set */
+
+#endif	/* __KERNEL__ */
+
+
+/*
+ * Min and max functions for extlen, agblock, fileoff, and filblks types.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTLEN_MIN)
+xfs_extlen_t xfs_extlen_min(xfs_extlen_t a, xfs_extlen_t b);
+#define	XFS_EXTLEN_MIN(a,b)	xfs_extlen_min(a,b)
+#else
+#define	XFS_EXTLEN_MIN(a,b)	\
+	((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \
+	 (xfs_extlen_t)(a) : (xfs_extlen_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTLEN_MAX)
+xfs_extlen_t xfs_extlen_max(xfs_extlen_t a, xfs_extlen_t b);
+#define	XFS_EXTLEN_MAX(a,b)	xfs_extlen_max(a,b)
+#else
+#define	XFS_EXTLEN_MAX(a,b)	\
+	((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \
+	 (xfs_extlen_t)(a) : (xfs_extlen_t)(b))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGBLOCK_MIN)
+xfs_agblock_t xfs_agblock_min(xfs_agblock_t a, xfs_agblock_t b);
+#define	XFS_AGBLOCK_MIN(a,b)	xfs_agblock_min(a,b)
+#else
+#define	XFS_AGBLOCK_MIN(a,b)	\
+	((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \
+	 (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGBLOCK_MAX)
+xfs_agblock_t xfs_agblock_max(xfs_agblock_t a, xfs_agblock_t b);
+#define	XFS_AGBLOCK_MAX(a,b)	xfs_agblock_max(a,b)
+#else
+#define	XFS_AGBLOCK_MAX(a,b)	\
+	((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \
+	 (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILEOFF_MIN)
+xfs_fileoff_t xfs_fileoff_min(xfs_fileoff_t a, xfs_fileoff_t b);
+#define	XFS_FILEOFF_MIN(a,b)	xfs_fileoff_min(a,b)
+#else
+#define	XFS_FILEOFF_MIN(a,b)	\
+	((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \
+	 (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILEOFF_MAX)
+xfs_fileoff_t xfs_fileoff_max(xfs_fileoff_t a, xfs_fileoff_t b);
+#define	XFS_FILEOFF_MAX(a,b)	xfs_fileoff_max(a,b)
+#else
+#define	XFS_FILEOFF_MAX(a,b)	\
+	((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \
+	 (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILBLKS_MIN)
+xfs_filblks_t xfs_filblks_min(xfs_filblks_t a, xfs_filblks_t b);
+#define	XFS_FILBLKS_MIN(a,b)	xfs_filblks_min(a,b)
+#else
+#define	XFS_FILBLKS_MIN(a,b)	\
+	((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \
+	 (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILBLKS_MAX)
+xfs_filblks_t xfs_filblks_max(xfs_filblks_t a, xfs_filblks_t b);
+#define	XFS_FILBLKS_MAX(a,b)	xfs_filblks_max(a,b)
+#else
+#define	XFS_FILBLKS_MAX(a,b)	\
+	((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \
+	 (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_SANITY_CHECK)
+int xfs_fsb_sanity_check(struct xfs_mount *mp, xfs_fsblock_t fsb);
+#define	XFS_FSB_SANITY_CHECK(mp,fsb)	xfs_fsb_sanity_check(mp,fsb)
+#else
+#define	XFS_FSB_SANITY_CHECK(mp,fsb)	\
+	(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
+	 XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
+#endif
+
+/*
+ * Macros to set EFSCORRUPTED & return/branch.
+ */
+#define	XFS_WANT_CORRUPTED_GOTO(x,l)	\
+	{ \
+		int fs_is_ok = (x); \
+		ASSERT(fs_is_ok); \
+		if (!fs_is_ok) { \
+			error = XFS_ERROR(EFSCORRUPTED); \
+			goto l; \
+		} \
+	}
+
+#define	XFS_WANT_CORRUPTED_RETURN(x)	\
+	{ \
+		int fs_is_ok = (x); \
+		ASSERT(fs_is_ok); \
+		if (!fs_is_ok) \
+			return XFS_ERROR(EFSCORRUPTED); \
+	}
+
+#endif	/* __XFS_BTREE_H__ */
diff --git a/include/xfs_buf_item.h b/include/xfs_buf_item.h
new file mode 100644
index 000000000..5d097f8e3
--- /dev/null
+++ b/include/xfs_buf_item.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_BUF_ITEM_H__
+#define	__XFS_BUF_ITEM_H__
+
+/*
+ * This is the structure used to lay out a buf log item in the
+ * log.  The data map describes which 128 byte chunks of the buffer
+ * have been logged.  This structure works only on buffers that
+ * reside up to the first TB in the filesystem.  These buffers are
+ * generated only by pre-6.2 systems and are known as XFS_LI_6_1_BUF.
+ */
+typedef struct xfs_buf_log_format_v1 {
+	unsigned short	blf_type;	/* buf log item type indicator */
+	unsigned short	blf_size;	/* size of this item */
+	__int32_t	blf_blkno;	/* starting blkno of this buf */
+	ushort		blf_flags;	/* misc state */
+	ushort		blf_len;	/* number of blocks in this buf */
+	unsigned int	blf_map_size;	/* size of data bitmap in words */
+	unsigned int	blf_data_map[1];/* variable size bitmap of */
+					/*   regions of buffer in this item */
+} xfs_buf_log_format_v1_t;
+
+/*
+ * This is a form of the above structure with a 64 bit blkno field.
+ * For 6.2 and beyond, this is XFS_LI_BUF.  We use this to log everything.
+ */
+typedef struct xfs_buf_log_format_t {
+	unsigned short	blf_type;	/* buf log item type indicator */
+	unsigned short	blf_size;	/* size of this item */
+	ushort		blf_flags;	/* misc state */
+	ushort		blf_len;	/* number of blocks in this buf */
+	__int64_t	blf_blkno;	/* starting blkno of this buf */
+	unsigned int	blf_map_size;	/* size of data bitmap in words */
+	unsigned int	blf_data_map[1];/* variable size bitmap of */
+					/*   regions of buffer in this item */
+} xfs_buf_log_format_t;
+
+/*
+ * This flag indicates that the buffer contains on disk inodes
+ * and requires special recovery handling.
+ */
+#define	XFS_BLI_INODE_BUF	0x1
+/*
+ * This flag indicates that the buffer should not be replayed
+ * during recovery because its blocks are being freed.
+ */
+#define	XFS_BLI_CANCEL		0x2
+/*
+ * This flag indicates that the buffer contains on disk 
+ * user or projectdquots and may require special recovery handling.   
+ */
+#define	XFS_BLI_UDQUOT_BUF	0x4
+#define	XFS_BLI_PDQUOT_BUF	0x8
+
+#define	XFS_BLI_CHUNK		128
+#define	XFS_BLI_SHIFT		7
+#define	BIT_TO_WORD_SHIFT	5
+#define	NBWORD			(NBBY * sizeof(unsigned int))
+
+/*
+ * buf log item flags
+ */
+#define	XFS_BLI_HOLD		0x01
+#define	XFS_BLI_DIRTY		0x02
+#define	XFS_BLI_STALE		0x04
+#define	XFS_BLI_LOGGED		0x08
+#define	XFS_BLI_INODE_ALLOC_BUF	0x10
+
+
+#ifdef __KERNEL__
+
+struct xfs_buf;
+struct ktrace;
+struct xfs_mount;
+
+/*
+ * This is the in core log item structure used to track information
+ * needed to log buffers.  It tracks how many times the lock has been
+ * locked, and which 128 byte chunks of the buffer are dirty.
+ */
+typedef struct xfs_buf_log_item {
+	xfs_log_item_t		bli_item;	/* common item structure */
+	struct xfs_buf		*bli_buf;	/* real buffer pointer */
+	unsigned int		bli_flags;	/* misc flags */
+	unsigned int		bli_recur;	/* lock recursion count */
+	atomic_t		bli_refcount;	/* cnt of tp refs */
+#ifdef DEBUG
+	struct ktrace		*bli_trace;	/* event trace buf */
+#endif
+#ifdef XFS_TRANS_DEBUG
+	char			*bli_orig;	/* original buffer copy */
+	char			*bli_logged;	/* bytes logged (bitmap) */
+#endif
+	xfs_buf_log_format_t	bli_format;	/* in-log header */
+} xfs_buf_log_item_t;
+
+/*
+ * This structure is used during recovery to record the buf log
+ * items which have been canceled and should not be replayed.
+ */
+typedef struct xfs_buf_cancel {
+	xfs_daddr_t			bc_blkno;
+	uint			bc_len;
+	int			bc_refcount;
+	struct xfs_buf_cancel	*bc_next;
+} xfs_buf_cancel_t;
+
+#define	XFS_BLI_TRACE_SIZE	32
+
+
+#if defined(XFS_ALL_TRACE)
+#define	XFS_BLI_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_BLI_TRACE
+#endif
+
+#if defined(XFS_BLI_TRACE)
+void	xfs_buf_item_trace(char *, xfs_buf_log_item_t *);
+#else
+#define	xfs_buf_item_trace(id, bip)
+#endif
+
+void	xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
+void	xfs_buf_item_relse(struct xfs_buf *);
+void	xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
+uint	xfs_buf_item_dirty(xfs_buf_log_item_t *);
+int	xfs_buf_item_bits(uint *, uint, uint);
+int	xfs_buf_item_contig_bits(uint *, uint, uint);
+int	xfs_buf_item_next_bit(uint *, uint, uint);
+void	xfs_buf_attach_iodone(struct xfs_buf *,
+			      void(*)(struct xfs_buf *, xfs_log_item_t *),
+			      xfs_log_item_t *);
+void	xfs_buf_iodone_callbacks(struct xfs_buf *);
+void	xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *);
+
+#ifdef XFS_TRANS_DEBUG
+void
+xfs_buf_item_flush_log_debug(
+	struct xfs_buf *bp,			     
+	uint	first,
+	uint	last);
+#else
+#define	xfs_buf_item_flush_log_debug(bp, first, last)
+#endif
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_BUF_ITEM_H__ */
diff --git a/include/xfs_cred.h b/include/xfs_cred.h
new file mode 100644
index 000000000..523dcddf6
--- /dev/null
+++ b/include/xfs_cred.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef __XFS_CRED_H__
+#define __XFS_CRED_H__
+
+#include <asm/param.h>		/* For NGROUPS */
+#ifdef __KERNEL__
+#include <linux/capability.h>
+#include <linux/sched.h>
+#endif
+
+/*
+ * Capabilities
+ */
+typedef __uint64_t cap_value_t;
+
+typedef struct cap_set {
+	cap_value_t	cap_effective;	/* use in capability checks */
+	cap_value_t	cap_permitted;	/* combined with file attrs */
+	cap_value_t	cap_inheritable;/* pass through exec */
+} cap_set_t;
+
+
+/*
+ * Mandatory Access Control
+ *
+ * Layout of a composite MAC label:
+ * ml_list contains the list of categories (MSEN) followed by the list of
+ * divisions (MINT). This is actually a header for the data structure which
+ * will have an ml_list with more than one element.
+ *
+ *      -------------------------------
+ *      | ml_msen_type | ml_mint_type |
+ *      -------------------------------
+ *      | ml_level     | ml_grade     |
+ *      -------------------------------
+ *      | ml_catcount                 |
+ *      -------------------------------
+ *      | ml_divcount                 |
+ *      -------------------------------
+ *      | category 1                  |
+ *      | . . .                       |
+ *      | category N                  | (where N = ml_catcount)
+ *      -------------------------------
+ *      | division 1                  |
+ *      | . . .                       |
+ *      | division M                  | (where M = ml_divcount)
+ *      -------------------------------
+ */
+#define MAC_MAX_SETS	250
+typedef struct mac_label {
+	unsigned char	ml_msen_type;	/* MSEN label type */
+	unsigned char	ml_mint_type;	/* MINT label type */
+	unsigned char	ml_level;	/* Hierarchical level  */
+	unsigned char	ml_grade;	/* Hierarchical grade  */
+	unsigned short	ml_catcount;	/* Category count */
+	unsigned short	ml_divcount;	/* Division count */
+					/* Category set, then Division set */
+	unsigned short	ml_list[MAC_MAX_SETS];
+} mac_label;
+
+/* Data types required by POSIX P1003.1eD15 */
+typedef struct mac_label * mac_t;
+
+#ifdef __KERNEL__
+extern int mac_enabled;
+extern mac_label *mac_high_low_lp;
+static __inline void mac_never(void) {}
+struct xfs_inode;
+extern int mac_xfs_iaccess(struct xfs_inode *, mode_t);
+#define _MAC_XFS_IACCESS(i,m)	\
+	(mac_enabled? (mac_never(), mac_xfs_iaccess(i,m)): 0)
+#endif	/* __KERNEL__ */
+
+#define MACWRITE	00200
+#define SGI_MAC_FILE "/dev/null"
+#define SGI_MAC_FILE_SIZE 10
+#define SGI_CAP_FILE "/dev/null"
+#define SGI_CAP_FILE_SIZE 10
+
+/* MSEN label type names. Choose an upper case ASCII character.  */
+#define MSEN_ADMIN_LABEL	'A'	/* Admin: low<admin != tcsec<high */
+#define MSEN_EQUAL_LABEL	'E'	/* Wildcard - always equal */
+#define MSEN_HIGH_LABEL		'H'	/* System High - always dominates */
+#define MSEN_MLD_HIGH_LABEL	'I'	/* System High, multi-level dir */
+#define MSEN_LOW_LABEL		'L'	/* System Low - always dominated */
+#define MSEN_MLD_LABEL		'M'	/* TCSEC label on a multi-level dir */
+#define MSEN_MLD_LOW_LABEL	'N'	/* System Low, multi-level dir */
+#define MSEN_TCSEC_LABEL	'T'	/* TCSEC label */
+#define MSEN_UNKNOWN_LABEL	'U'	/* unknown label */
+
+/* MINT label type names. Choose a lower case ASCII character.  */
+#define MINT_BIBA_LABEL		'b'	/* Dual of a TCSEC label */
+#define MINT_EQUAL_LABEL	'e'	/* Wildcard - always equal */
+#define MINT_HIGH_LABEL		'h'	/* High Grade - always dominates */
+#define MINT_LOW_LABEL		'l'	/* Low Grade - always dominated */
+
+
+/*
+ * Credentials
+ */
+typedef struct cred {
+	int	cr_ref;			/* reference count */
+	ushort	cr_ngroups;		/* number of groups in cr_groups */
+	uid_t	cr_uid;			/* effective user id */
+	gid_t	cr_gid;		 	/* effective group id */
+	uid_t	cr_ruid;		/* real user id */
+	gid_t	cr_rgid;		/* real group id */
+	uid_t	cr_suid;		/* "saved" user id (from exec) */
+	gid_t	cr_sgid;		/* "saved" group id (from exec) */
+	struct mac_label *cr_mac;	/* MAC label for B1 and beyond */
+	cap_set_t	  cr_cap;	/* capability (privilege) sets */
+	gid_t	cr_groups[NGROUPS];	/* supplementary group list */
+} cred_t;
+
+#ifdef __KERNEL__
+extern void cred_init(void);
+static __inline cred_t *get_current_cred(void) { return NULL; }
+extern struct cred *sys_cred;
+#endif	/* __KERNEL__ */
+
+#endif  /* __XFS_CRED_H__ */
diff --git a/include/xfs_da_btree.h b/include/xfs_da_btree.h
new file mode 100644
index 000000000..a9d2a1502
--- /dev/null
+++ b/include/xfs_da_btree.h
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DA_BTREE_H__
+#define	__XFS_DA_BTREE_H__
+
+struct xfs_buf;
+struct xfs_bmap_free;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+struct zone;
+
+/*========================================================================
+ * Directory Structure when greater than XFS_LBSIZE(mp) bytes.
+ *========================================================================*/
+
+/*
+ * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
+ *
+ * Is is used to manage a doubly linked list of all blocks at the same
+ * level in the Btree, and to identify which type of block this is.
+ */
+#define XFS_DA_NODE_MAGIC	0xfebe	/* magic number: non-leaf blocks */
+#define XFS_DIR_LEAF_MAGIC	0xfeeb	/* magic number: directory leaf blks */
+#define XFS_ATTR_LEAF_MAGIC	0xfbee	/* magic number: attribute leaf blks */
+#define	XFS_DIR2_LEAF1_MAGIC	0xd2f1	/* magic number: v2 dirlf single blks */
+#define	XFS_DIR2_LEAFN_MAGIC	0xd2ff	/* magic number: v2 dirlf multi blks */
+
+#define	XFS_DIRX_LEAF_MAGIC(mp)	\
+	(XFS_DIR_IS_V1(mp) ? XFS_DIR_LEAF_MAGIC : XFS_DIR2_LEAFN_MAGIC)
+
+typedef struct xfs_da_blkinfo {
+	xfs_dablk_t forw;			/* previous block in list */
+	xfs_dablk_t back;			/* following block in list */
+	__uint16_t magic;			/* validity check on block */
+	__uint16_t pad;				/* unused */
+} xfs_da_blkinfo_t;
+
+/*
+ * This is the structure of the root and intermediate nodes in the Btree.
+ * The leaf nodes are defined above.
+ *
+ * Entries are not packed.
+ *
+ * Since we have duplicate keys, use a binary search but always follow
+ * all match in the block, not just the first match found.
+ */
+#define	XFS_DA_NODE_MAXDEPTH	5	/* max depth of Btree */
+
+typedef struct xfs_da_intnode {
+	struct xfs_da_node_hdr {	/* constant-structure header block */
+		xfs_da_blkinfo_t info;	/* block type, links, etc. */
+		__uint16_t count;	/* count of active entries */
+		__uint16_t level;	/* level above leaves (leaf == 0) */
+	} hdr;
+	struct xfs_da_node_entry {
+		xfs_dahash_t hashval;	/* hash value for this descendant */
+		xfs_dablk_t before;	/* Btree block before this key */
+	} btree[1];			/* variable sized array of keys */
+} xfs_da_intnode_t;
+typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
+typedef struct xfs_da_node_entry xfs_da_node_entry_t;
+
+#define XFS_DA_NODE_ENTSIZE_BYNAME	/* space a name uses */ \
+	(sizeof(xfs_da_node_entry_t))
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_NODE_ENTRIES)
+int xfs_da_node_entries(struct xfs_mount *mp);
+#define XFS_DA_NODE_ENTRIES(mp)		xfs_da_node_entries(mp)
+#else
+#define	XFS_DA_NODE_ENTRIES(mp)		((mp)->m_da_node_ents)
+#endif
+
+#define	XFS_DA_MAXHASH	((xfs_dahash_t)-1) /* largest valid hash value */
+
+/*
+ * Macros used by directory code to interface to the filesystem.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LBSIZE)
+int xfs_lbsize(struct xfs_mount *mp);
+#define	XFS_LBSIZE(mp)			xfs_lbsize(mp)
+#else
+#define	XFS_LBSIZE(mp)	((mp)->m_sb.sb_blocksize)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LBLOG)
+int xfs_lblog(struct xfs_mount *mp);
+#define	XFS_LBLOG(mp)			xfs_lblog(mp)
+#else
+#define	XFS_LBLOG(mp)	((mp)->m_sb.sb_blocklog)
+#endif
+
+/*
+ * Macros used by directory code to interface to the kernel
+ */
+
+/*
+ * Macros used to manipulate directory off_t's
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_MAKE_BNOENTRY)
+__uint32_t xfs_da_make_bnoentry(struct xfs_mount *mp, xfs_dablk_t bno,
+				int entry);
+#define	XFS_DA_MAKE_BNOENTRY(mp,bno,entry)	\
+	xfs_da_make_bnoentry(mp,bno,entry)
+#else
+#define	XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \
+	(((bno) << (mp)->m_dircook_elog) | (entry))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_MAKE_COOKIE)
+xfs_off_t xfs_da_make_cookie(struct xfs_mount *mp, xfs_dablk_t bno, int entry,
+				xfs_dahash_t hash);
+#define	XFS_DA_MAKE_COOKIE(mp,bno,entry,hash)	\
+	xfs_da_make_cookie(mp,bno,entry,hash)
+#else
+#define	XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \
+	(((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_HASH)
+xfs_dahash_t xfs_da_cookie_hash(struct xfs_mount *mp, xfs_off_t cookie);
+#define	XFS_DA_COOKIE_HASH(mp,cookie)		xfs_da_cookie_hash(mp,cookie)
+#else
+#define	XFS_DA_COOKIE_HASH(mp,cookie)	((xfs_dahash_t)(cookie))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_BNO)
+xfs_dablk_t xfs_da_cookie_bno(struct xfs_mount *mp, xfs_off_t cookie);
+#define	XFS_DA_COOKIE_BNO(mp,cookie)		xfs_da_cookie_bno(mp,cookie)
+#else
+#define	XFS_DA_COOKIE_BNO(mp,cookie) \
+	(((xfs_off_t)(cookie) >> 31) == -1LL ? \
+		(xfs_dablk_t)0 : \
+		(xfs_dablk_t)((xfs_off_t)(cookie) >> ((mp)->m_dircook_elog + 32)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_ENTRY)
+int xfs_da_cookie_entry(struct xfs_mount *mp, xfs_off_t cookie);
+#define	XFS_DA_COOKIE_ENTRY(mp,cookie)		xfs_da_cookie_entry(mp,cookie)
+#else
+#define	XFS_DA_COOKIE_ENTRY(mp,cookie) \
+	(((xfs_off_t)(cookie) >> 31) == -1LL ? \
+		(xfs_dablk_t)0 : \
+		(xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \
+			      ((1 << (mp)->m_dircook_elog) - 1)))
+#endif
+
+
+/*========================================================================
+ * Btree searching and modification structure definitions.
+ *========================================================================*/
+
+/*
+ * Structure to ease passing around component names.
+ */
+typedef struct xfs_da_args {
+	char		*name;		/* string (maybe not NULL terminated) */
+	int		namelen;	/* length of string (maybe no NULL) */
+	char		*value;		/* set of bytes (maybe contain NULLs) */
+	int		valuelen;	/* length of value */
+	int		flags;		/* argument flags (eg: ATTR_NOCREATE) */
+	xfs_dahash_t	hashval;	/* hash value of name */
+	xfs_ino_t	inumber;	/* input/output inode number */
+	struct xfs_inode *dp;		/* directory inode to manipulate */
+	xfs_fsblock_t	*firstblock;	/* ptr to firstblock for bmap calls */
+	struct xfs_bmap_free *flist;	/* ptr to freelist for bmap_finish */
+	struct xfs_trans *trans;	/* current trans (changes over time) */
+	xfs_extlen_t	total;		/* total blocks needed, for 1st bmap */
+	int		whichfork;	/* data or attribute fork */
+	xfs_dablk_t	blkno;		/* blkno of attr leaf of interest */
+	int		index;		/* index of attr of interest in blk */
+	xfs_dablk_t	rmtblkno;	/* remote attr value starting blkno */
+	int		rmtblkcnt;	/* remote attr value block count */
+	int		rename;		/* T/F: this is an atomic rename op */
+	xfs_dablk_t	blkno2;		/* blkno of 2nd attr leaf of interest */
+	int		index2;		/* index of 2nd attr in blk */
+	xfs_dablk_t	rmtblkno2;	/* remote attr value starting blkno */
+	int		rmtblkcnt2;	/* remote attr value block count */
+	int		justcheck;	/* check for ok with no space */
+	int		addname;	/* T/F: this is an add operation */
+	int		oknoent;	/* T/F: ok to return ENOENT, else die */
+} xfs_da_args_t;
+
+/*
+ * Structure to describe buffer(s) for a block.
+ * This is needed in the directory version 2 format case, when 
+ * multiple non-contiguous fsblocks might be needed to cover one
+ * logical directory block.
+ * If the buffer count is 1 then the data pointer points to the
+ * same place as the b_addr field for the buffer, else to kmem_alloced memory.
+ */
+typedef struct xfs_dabuf {
+	int		nbuf;		/* number of buffer pointers present */
+	short		dirty;		/* data needs to be copied back */
+	short		bbcount;	/* how large is data in bbs */
+	void		*data;		/* pointer for buffers' data */
+#ifdef XFS_DABUF_DEBUG
+	inst_t		*ra;		/* return address of caller to make */
+	struct xfs_dabuf *next;		/* next in global chain */
+	struct xfs_dabuf *prev;		/* previous in global chain */
+	dev_t		dev;		/* device for buffer */
+	xfs_daddr_t		blkno;		/* daddr first in bps[0] */
+#endif
+	struct xfs_buf	*bps[1];	/* actually nbuf of these */
+} xfs_dabuf_t;
+#define	XFS_DA_BUF_SIZE(n)	\
+	(sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
+
+#ifdef XFS_DABUF_DEBUG
+extern xfs_dabuf_t	*xfs_dabuf_global_list;
+#endif
+
+/*
+ * Storage for holding state during Btree searches and split/join ops.
+ *
+ * Only need space for 5 intermediate nodes.  With a minimum of 62-way
+ * fanout to the Btree, we can support over 900 million directory blocks,
+ * which is slightly more than enough.
+ */
+typedef struct xfs_da_state_blk {
+	xfs_dabuf_t	*bp;		/* buffer containing block */
+	xfs_dablk_t	blkno;		/* filesystem blkno of buffer */
+	xfs_daddr_t		disk_blkno;	/* on-disk blkno (in BBs) of buffer */
+	int		index;		/* relevant index into block */
+	xfs_dahash_t	hashval;	/* last hash value in block */
+	int		magic;		/* blk's magic number, ie: blk type */
+} xfs_da_state_blk_t;
+
+typedef struct xfs_da_state_path {
+	int			active;		/* number of active levels */
+	xfs_da_state_blk_t	blk[XFS_DA_NODE_MAXDEPTH];
+} xfs_da_state_path_t;
+
+typedef struct xfs_da_state {
+	xfs_da_args_t		*args;		/* filename arguments */
+	struct xfs_mount	*mp;		/* filesystem mount point */
+	int			blocksize;	/* logical block size */
+	int			inleaf;		/* insert into 1->lf, 0->splf */
+	xfs_da_state_path_t	path;		/* search/split paths */
+	xfs_da_state_path_t	altpath;	/* alternate path for join */
+	int			extravalid;	/* T/F: extrablk is in use */
+	int			extraafter;	/* T/F: extrablk is after new */
+	xfs_da_state_blk_t	extrablk;	/* for double-splits on leafs */
+						/* for dirv2 extrablk is data */
+} xfs_da_state_t;
+
+/*
+ * Utility macros to aid in logging changed structure fields.
+ */
+#define XFS_DA_LOGOFF(BASE, ADDR)	((char *)(ADDR) - (char *)(BASE))
+#define XFS_DA_LOGRANGE(BASE, ADDR, SIZE)	\
+		(uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
+		(uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Routines used for growing the Btree.
+ */
+int	xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
+					 xfs_dabuf_t **bpp, int whichfork);
+int	xfs_da_split(xfs_da_state_t *state);
+
+/*
+ * Routines used for shrinking the Btree.
+ */
+int	xfs_da_join(xfs_da_state_t *state);
+void	xfs_da_fixhashpath(xfs_da_state_t *state,
+					  xfs_da_state_path_t *path_to_to_fix);
+
+/*
+ * Routines used for finding things in the Btree.
+ */
+int	xfs_da_node_lookup_int(xfs_da_state_t *state, int *result);
+int	xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
+					 int forward, int release, int *result);
+/*
+ * Utility routines.
+ */
+int	xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+					 xfs_da_state_blk_t *save_blk);
+int	xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
+				       xfs_da_state_blk_t *new_blk);
+
+/*
+ * Utility routines.
+ */
+int	xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
+int	xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
+			      xfs_dablk_t bno, xfs_daddr_t mappedbno,
+			      xfs_dabuf_t **bp, int whichfork);
+int	xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
+			       xfs_dablk_t bno, xfs_daddr_t mappedbno,
+			       xfs_dabuf_t **bpp, int whichfork);
+xfs_daddr_t	xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
+			xfs_dablk_t bno, int whichfork);
+int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
+					  xfs_dabuf_t *dead_buf);
+
+uint xfs_da_hashname(char *name_string, int name_length);
+uint xfs_da_log2_roundup(uint i);
+xfs_da_state_t *xfs_da_state_alloc(void);
+void xfs_da_state_free(xfs_da_state_t *state);
+void xfs_da_state_kill_altpath(xfs_da_state_t *state);
+
+void xfs_da_buf_done(xfs_dabuf_t *dabuf);
+void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first,
+			   uint last);
+void xfs_da_brelse(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
+void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
+xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
+
+extern struct xfs_zone *xfs_da_state_zone;
+
+#endif	/* __XFS_DA_BTREE_H__ */
diff --git a/include/xfs_dfrag.h b/include/xfs_dfrag.h
new file mode 100644
index 000000000..a6f1b0937
--- /dev/null
+++ b/include/xfs_dfrag.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DFRAG_H__
+#define	__XFS_DFRAG_H__
+
+/*
+ * Structure passed to xfs_swapext
+ */
+
+typedef struct xfs_swapext
+{
+	__int64_t	sx_version;	/* version */	
+	__int64_t	sx_fdtarget;	/* fd of target file */
+	__int64_t	sx_fdtmp;	/* fd of tmp file */
+	xfs_off_t	sx_offset; 	/* offset into file */
+	xfs_off_t	sx_length; 	/* leng from offset */
+	char		sx_pad[16];	/* pad space, unused */
+	xfs_bstat_t	sx_stat;	/* stat of target b4 copy */
+} xfs_swapext_t;
+
+/* 
+ * Version flag
+ */
+#define XFS_SX_VERSION		0
+
+#ifdef __KERNEL__
+/*
+ * Prototypes for visible xfs_dfrag.c routines.
+ */
+
+/*
+ * Syscall interface for xfs_swapext
+ */
+int	xfs_swapext(struct xfs_swapext *sx);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_DFRAG_H__ */
diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h
new file mode 100644
index 000000000..7bda53ebe
--- /dev/null
+++ b/include/xfs_dinode.h
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DINODE_H__
+#define	__XFS_DINODE_H__
+
+struct xfs_buf;
+struct xfs_mount;
+
+#define	XFS_DINODE_VERSION_1	1
+#define	XFS_DINODE_VERSION_2	2
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DINODE_GOOD_VERSION)
+int xfs_dinode_good_version(int v);
+#define XFS_DINODE_GOOD_VERSION(v)	xfs_dinode_good_version(v)
+#else
+#define XFS_DINODE_GOOD_VERSION(v)	(((v) == XFS_DINODE_VERSION_1) || \
+					 ((v) == XFS_DINODE_VERSION_2))
+#endif
+#define	XFS_DINODE_MAGIC	0x494e	/* 'IN' */
+
+/*
+ * Disk inode structure.
+ * This is just the header; the inode is expanded to fill a variable size
+ * with the last field expanding.  It is split into the core and "other"
+ * because we only need the core part in the in-core inode.
+ */
+typedef struct xfs_timestamp {
+	__int32_t	t_sec;		/* timestamp seconds */
+	__int32_t	t_nsec;		/* timestamp nanoseconds */
+} xfs_timestamp_t;
+
+/*
+ * Note: Coordinate changes to this structure with the XFS_DI_* #defines
+ * below and the offsets table in xfs_ialloc_log_di().
+ */
+typedef struct xfs_dinode_core
+{
+	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
+	__uint16_t	di_mode;	/* mode and type of file */
+	__int8_t	di_version;	/* inode version */
+	__int8_t	di_format;	/* format of di_c data */
+	__uint16_t	di_onlink;	/* old number of links to file */
+	__uint32_t	di_uid;		/* owner's user id */
+	__uint32_t	di_gid;		/* owner's group id */
+	__uint32_t	di_nlink;	/* number of links to file */
+	__uint16_t	di_projid;	/* owner's project id */
+	__uint8_t	di_pad[10];	/* unused, zeroed space */
+	xfs_timestamp_t	di_atime;	/* time last accessed */
+	xfs_timestamp_t	di_mtime;	/* time last modified */
+	xfs_timestamp_t	di_ctime;	/* time created/inode modified */
+	xfs_fsize_t	di_size;	/* number of bytes in file */
+	xfs_drfsbno_t	di_nblocks;	/* # of direct & btree blocks used */
+	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
+	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
+	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
+	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
+	__int8_t	di_aformat;	/* format of attr fork's data */
+	__uint32_t	di_dmevmask;	/* DMIG event mask */
+	__uint16_t	di_dmstate;	/* DMIG state info */
+	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
+	__uint32_t	di_gen;		/* generation number */
+} xfs_dinode_core_t;
+
+typedef struct xfs_dinode
+{
+	xfs_dinode_core_t	di_core;
+	/*
+	 * In adding anything between the core and the union, be
+	 * sure to update the macros like XFS_LITINO below and
+	 * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h.
+	 */
+	xfs_agino_t		di_next_unlinked;/* agi unlinked list ptr */
+	union {
+		xfs_bmdr_block_t di_bmbt;	/* btree root block */
+		xfs_bmbt_rec_32_t di_bmx[1];	/* extent list */
+		xfs_dir_shortform_t di_dirsf;	/* shortform directory */
+		xfs_dir2_sf_t	di_dir2sf;	/* shortform directory v2 */
+		char		di_c[1];	/* local contents */
+		xfs_dev_t	di_dev;		/* device for IFCHR/IFBLK */
+		uuid_t		di_muuid;	/* mount point value */
+		char		di_symlink[1];	/* local symbolic link */
+	}		di_u;
+	union {
+		xfs_bmdr_block_t di_abmbt;	/* btree root block */
+		xfs_bmbt_rec_32_t di_abmx[1];	/* extent list */
+		xfs_attr_shortform_t di_attrsf;	/* shortform attribute list */
+	}		di_a;
+} xfs_dinode_t;
+
+/*
+ * The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
+ * Since the pathconf interface is signed, we use 2^31 - 1 instead.
+ * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX.
+ */
+#define	XFS_MAXLINK		((1U << 31) - 1U)
+#define	XFS_MAXLINK_1		65535U
+
+/*
+ * Bit names for logging disk inodes only
+ */
+#define	XFS_DI_MAGIC		0x0000001
+#define	XFS_DI_MODE		0x0000002
+#define	XFS_DI_VERSION		0x0000004
+#define	XFS_DI_FORMAT		0x0000008
+#define	XFS_DI_ONLINK		0x0000010
+#define	XFS_DI_UID		0x0000020
+#define	XFS_DI_GID		0x0000040
+#define	XFS_DI_NLINK		0x0000080
+#define	XFS_DI_PROJID		0x0000100
+#define	XFS_DI_PAD		0x0000200
+#define	XFS_DI_ATIME		0x0000400
+#define	XFS_DI_MTIME		0x0000800
+#define	XFS_DI_CTIME		0x0001000
+#define	XFS_DI_SIZE		0x0002000
+#define	XFS_DI_NBLOCKS		0x0004000
+#define	XFS_DI_EXTSIZE		0x0008000
+#define	XFS_DI_NEXTENTS		0x0010000
+#define	XFS_DI_NAEXTENTS	0x0020000
+#define	XFS_DI_FORKOFF		0x0040000
+#define	XFS_DI_AFORMAT		0x0080000
+#define	XFS_DI_DMEVMASK		0x0100000
+#define	XFS_DI_DMSTATE		0x0200000
+#define	XFS_DI_FLAGS		0x0400000
+#define	XFS_DI_GEN		0x0800000
+#define	XFS_DI_NEXT_UNLINKED	0x1000000
+#define	XFS_DI_U		0x2000000
+#define	XFS_DI_A		0x4000000
+#define	XFS_DI_NUM_BITS		27
+#define	XFS_DI_ALL_BITS		((1 << XFS_DI_NUM_BITS) - 1)
+#define	XFS_DI_CORE_BITS	(XFS_DI_ALL_BITS & ~(XFS_DI_U|XFS_DI_A))
+
+/*
+ * Values for di_format
+ */
+typedef enum xfs_dinode_fmt
+{
+	XFS_DINODE_FMT_DEV,		/* CHR, BLK: di_dev */
+	XFS_DINODE_FMT_LOCAL,		/* DIR, REG: di_c */
+					/* LNK: di_symlink */
+	XFS_DINODE_FMT_EXTENTS,		/* DIR, REG, LNK: di_bmx */
+	XFS_DINODE_FMT_BTREE,		/* DIR, REG, LNK: di_bmbt */
+	XFS_DINODE_FMT_UUID 		/* MNT: di_uuid */
+} xfs_dinode_fmt_t;
+
+/*
+ * Inode minimum and maximum sizes.
+ */
+#define	XFS_DINODE_MIN_LOG	8
+#define	XFS_DINODE_MAX_LOG	11
+#define	XFS_DINODE_MIN_SIZE	(1 << XFS_DINODE_MIN_LOG)
+#define	XFS_DINODE_MAX_SIZE	(1 << XFS_DINODE_MAX_LOG)
+
+/*
+ * Inode size for given fs.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LITINO)
+int xfs_litino(struct xfs_mount *mp);
+#define	XFS_LITINO(mp)		xfs_litino(mp)
+#else
+#define	XFS_LITINO(mp)	((mp)->m_litino)
+#endif
+#define	XFS_BROOT_SIZE_ADJ	\
+	(sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t))
+
+/*
+ * Fork identifiers.  Here so utilities can use them without including
+ * xfs_inode.h.
+ */
+#define	XFS_DATA_FORK	0
+#define	XFS_ATTR_FORK	1
+
+/*
+ * Inode data & attribute fork sizes, per inode.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_Q)
+int xfs_cfork_q_arch(xfs_dinode_core_t *dcp, xfs_arch_t arch);
+int xfs_cfork_q(xfs_dinode_core_t *dcp);
+#define	XFS_CFORK_Q_ARCH(dcp,arch)          xfs_cfork_q_arch(dcp,arch)
+#define	XFS_CFORK_Q(dcp)                    xfs_cfork_q(dcp)
+#else
+#define	XFS_CFORK_Q_ARCH(dcp,arch)	    (INT_GET((dcp)->di_forkoff, arch) != 0)
+#define XFS_CFORK_Q(dcp)                    XFS_CFORK_Q_ARCH(dcp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_BOFF)
+int xfs_cfork_boff_arch(xfs_dinode_core_t *dcp, xfs_arch_t arch);
+int xfs_cfork_boff(xfs_dinode_core_t *dcp);
+#define	XFS_CFORK_BOFF_ARCH(dcp,arch)	    xfs_cfork_boff_arch(dcp,arch)
+#define	XFS_CFORK_BOFF(dcp)	            xfs_cfork_boff(dcp)
+#else
+#define	XFS_CFORK_BOFF_ARCH(dcp,arch)	    ((int)(INT_GET((dcp)->di_forkoff, arch) << 3))
+#define XFS_CFORK_BOFF(dcp)                 XFS_CFORK_BOFF_ARCH(dcp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_DSIZE)
+int xfs_cfork_dsize_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, xfs_arch_t arch);
+int xfs_cfork_dsize(xfs_dinode_core_t *dcp, struct xfs_mount *mp);
+#define	XFS_CFORK_DSIZE_ARCH(dcp,mp,arch)   xfs_cfork_dsize_arch(dcp,mp,arch)
+#define	XFS_CFORK_DSIZE(dcp,mp)             xfs_cfork_dsize(dcp,mp)
+#else
+#define	XFS_CFORK_DSIZE_ARCH(dcp,mp,arch) \
+	(XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_CFORK_BOFF_ARCH(dcp, arch) : XFS_LITINO(mp))
+#define XFS_CFORK_DSIZE(dcp,mp)             XFS_CFORK_DSIZE_ARCH(dcp,mp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_ASIZE)
+int xfs_cfork_asize_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, xfs_arch_t arch);
+int xfs_cfork_asize(xfs_dinode_core_t *dcp, struct xfs_mount *mp);
+#define	XFS_CFORK_ASIZE_ARCH(dcp,mp,arch)   xfs_cfork_asize_arch(dcp,mp,arch)
+#define	XFS_CFORK_ASIZE(dcp,mp)             xfs_cfork_asize(dcp,mp) 
+#else
+#define	XFS_CFORK_ASIZE_ARCH(dcp,mp,arch) \
+	(XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_ARCH(dcp, arch) : 0)
+#define XFS_CFORK_ASIZE(dcp,mp)             XFS_CFORK_ASIZE_ARCH(dcp,mp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_SIZE)
+int xfs_cfork_size_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w, xfs_arch_t arch);
+int xfs_cfork_size(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w);
+#define	XFS_CFORK_SIZE_ARCH(dcp,mp,w,arch)  xfs_cfork_size_arch(dcp,mp,w,arch)
+#define	XFS_CFORK_SIZE(dcp,mp,w)            xfs_cfork_size(dcp,mp,w)
+#else
+#define	XFS_CFORK_SIZE_ARCH(dcp,mp,w,arch) \
+	((w) == XFS_DATA_FORK ? \
+		XFS_CFORK_DSIZE_ARCH(dcp, mp, arch) : XFS_CFORK_ASIZE_ARCH(dcp, mp, arch))
+#define XFS_CFORK_SIZE(dcp,mp,w)            XFS_CFORK_SIZE_ARCH(dcp,mp,w,ARCH_NOCONVERT)
+
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_DSIZE)
+int xfs_dfork_dsize_arch(xfs_dinode_t *dip, struct xfs_mount *mp, xfs_arch_t arch);
+int xfs_dfork_dsize(xfs_dinode_t *dip, struct xfs_mount *mp);
+#define	XFS_DFORK_DSIZE_ARCH(dip,mp,arch)   xfs_dfork_dsize_arch(dip,mp,arch)
+#define	XFS_DFORK_DSIZE(dip,mp)             xfs_dfork_dsize(dip,mp)
+#else
+#define	XFS_DFORK_DSIZE_ARCH(dip,mp,arch)   XFS_CFORK_DSIZE_ARCH(&(dip)->di_core, mp, arch)
+#define XFS_DFORK_DSIZE(dip,mp)             XFS_DFORK_DSIZE_ARCH(dip,mp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_ASIZE)
+int xfs_dfork_asize_arch(xfs_dinode_t *dip, struct xfs_mount *mp, xfs_arch_t arch);
+int xfs_dfork_asize(xfs_dinode_t *dip, struct xfs_mount *mp);
+#define	XFS_DFORK_ASIZE_ARCH(dip,mp,arch)   xfs_dfork_asize_arch(dip,mp,arch)
+#define	XFS_DFORK_ASIZE(dip,mp)             xfs_dfork_asize(dip,mp)
+#else
+#define	XFS_DFORK_ASIZE_ARCH(dip,mp,arch)   XFS_CFORK_ASIZE_ARCH(&(dip)->di_core, mp, arch)
+#define XFS_DFORK_ASIZE(dip,mp)             XFS_DFORK_ASIZE_ARCH(dip,mp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_SIZE)
+int xfs_dfork_size_arch(xfs_dinode_t *dip, struct xfs_mount *mp, int w, xfs_arch_t arch);
+int xfs_dfork_size(xfs_dinode_t *dip, struct xfs_mount *mp, int w);
+#define	XFS_DFORK_SIZE_ARCH(dip,mp,w,arch)  xfs_dfork_size_arch(dip,mp,w,arch)
+#define	XFS_DFORK_SIZE(dip,mp,w)            xfs_dfork_size(dip,mp,w) 
+#else
+#define	XFS_DFORK_SIZE_ARCH(dip,mp,w,arch)  XFS_CFORK_SIZE_ARCH(&(dip)->di_core, mp, w, arch)
+#define XFS_DFORK_SIZE(dip,mp,w)            XFS_DFORK_SIZE_ARCH(dip,mp,w,ARCH_NOCONVERT)
+
+#endif
+
+/*
+ * Macros for accessing per-fork disk inode information.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_Q)
+int xfs_dfork_q_arch(xfs_dinode_t *dip, xfs_arch_t arch);
+int xfs_dfork_q(xfs_dinode_t *dip);
+#define	XFS_DFORK_Q_ARCH(dip,arch)	    xfs_dfork_q_arch(dip,arch)
+#define	XFS_DFORK_Q(dip)	            xfs_dfork_q(dip)
+#else
+#define	XFS_DFORK_Q_ARCH(dip,arch)	    XFS_CFORK_Q_ARCH(&(dip)->di_core, arch)
+#define XFS_DFORK_Q(dip)                    XFS_DFORK_Q_ARCH(dip,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_BOFF)
+int xfs_dfork_boff_arch(xfs_dinode_t *dip, xfs_arch_t arch);
+int xfs_dfork_boff(xfs_dinode_t *dip);
+#define	XFS_DFORK_BOFF_ARCH(dip,arch)	    xfs_dfork_boff_arch(dip,arch)
+#define	XFS_DFORK_BOFF(dip)	            xfs_dfork_boff(dip)
+#else
+#define	XFS_DFORK_BOFF_ARCH(dip,arch)	    XFS_CFORK_BOFF_ARCH(&(dip)->di_core, arch)
+#define XFS_DFORK_BOFF(dip)                 XFS_DFORK_BOFF_ARCH(dip,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_DPTR)
+char *xfs_dfork_dptr_arch(xfs_dinode_t *dip, xfs_arch_t arch);
+char *xfs_dfork_dptr(xfs_dinode_t *dip);
+#define	XFS_DFORK_DPTR_ARCH(dip,arch)	    xfs_dfork_dptr_arch(dip,arch)
+#define	XFS_DFORK_DPTR(dip)	            xfs_dfork_dptr(dip)
+#else
+#define	XFS_DFORK_DPTR_ARCH(dip,arch)	    ((dip)->di_u.di_c)
+#define XFS_DFORK_DPTR(dip)                 XFS_DFORK_DPTR_ARCH(dip,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_APTR)
+char *xfs_dfork_aptr_arch(xfs_dinode_t *dip, xfs_arch_t arch);
+char *xfs_dfork_aptr(xfs_dinode_t *dip);
+#define	XFS_DFORK_APTR_ARCH(dip,arch)       xfs_dfork_aptr_arch(dip,arch)
+#define	XFS_DFORK_APTR(dip)                 xfs_dfork_aptr(dip) 
+#else
+#define	XFS_DFORK_APTR_ARCH(dip,arch)	    ((dip)->di_u.di_c + XFS_DFORK_BOFF_ARCH(dip, arch))
+#define XFS_DFORK_APTR(dip)                 XFS_DFORK_APTR_ARCH(dip,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_PTR)
+char *xfs_dfork_ptr_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch);
+char *xfs_dfork_ptr(xfs_dinode_t *dip, int w);
+#define	XFS_DFORK_PTR_ARCH(dip,w,arch)      xfs_dfork_ptr_arch(dip,w,arch)
+#define	XFS_DFORK_PTR(dip,w)                xfs_dfork_ptr(dip,w)
+#else
+#define	XFS_DFORK_PTR_ARCH(dip,w,arch)	\
+	((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR_ARCH(dip, arch) : XFS_DFORK_APTR_ARCH(dip, arch))
+#define XFS_DFORK_PTR(dip,w)                XFS_DFORK_PTR_ARCH(dip,w,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_FORMAT)
+int xfs_cfork_format_arch(xfs_dinode_core_t *dcp, int w, xfs_arch_t arch);
+int xfs_cfork_format(xfs_dinode_core_t *dcp, int w);
+#define	XFS_CFORK_FORMAT_ARCH(dcp,w,arch)   xfs_cfork_format_arch(dcp,w,arch)
+#define	XFS_CFORK_FORMAT(dcp,w)             xfs_cfork_format(dcp,w)
+#else
+#define	XFS_CFORK_FORMAT_ARCH(dcp,w,arch) \
+	((w) == XFS_DATA_FORK ? INT_GET((dcp)->di_format, arch) : INT_GET((dcp)->di_aformat, arch))
+#define XFS_CFORK_FORMAT(dcp,w)             XFS_CFORK_FORMAT_ARCH(dcp,w,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_FMT_SET)
+void xfs_cfork_fmt_set_arch(xfs_dinode_core_t *dcp, int w, int n, xfs_arch_t arch);
+void xfs_cfork_fmt_set(xfs_dinode_core_t *dcp, int w, int n);
+#define	XFS_CFORK_FMT_SET_ARCH(dcp,w,n,arch) xfs_cfork_fmt_set_arch(dcp,w,n,arch)
+#define	XFS_CFORK_FMT_SET(dcp,w,n)           xfs_cfork_fmt_set(dcp,w,n)
+#else
+#define	XFS_CFORK_FMT_SET_ARCH(dcp,w,n,arch) \
+	((w) == XFS_DATA_FORK ? \
+		(INT_SET((dcp)->di_format, arch, (n))) : \
+		(INT_SET((dcp)->di_aformat, arch, (n))))
+#define XFS_CFORK_FMT_SET(dcp,w,n)           XFS_CFORK_FMT_SET_ARCH(dcp,w,n,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_NEXTENTS)
+int xfs_cfork_nextents_arch(xfs_dinode_core_t *dcp, int w, xfs_arch_t arch);
+int xfs_cfork_nextents(xfs_dinode_core_t *dcp, int w);
+#define	XFS_CFORK_NEXTENTS_ARCH(dcp,w,arch)  xfs_cfork_nextents_arch(dcp,w,arch)
+#define	XFS_CFORK_NEXTENTS(dcp,w)            xfs_cfork_nextents(dcp,w)
+#else
+#define	XFS_CFORK_NEXTENTS_ARCH(dcp,w,arch) \
+	((w) == XFS_DATA_FORK ? INT_GET((dcp)->di_nextents, arch) : INT_GET((dcp)->di_anextents, arch))
+#define XFS_CFORK_NEXTENTS(dcp,w)            XFS_CFORK_NEXTENTS_ARCH(dcp,w,ARCH_NOCONVERT) 
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_NEXT_SET)
+void xfs_cfork_next_set_arch(xfs_dinode_core_t *dcp, int w, int n, xfs_arch_t arch);
+void xfs_cfork_next_set(xfs_dinode_core_t *dcp, int w, int n);
+#define	XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,arch)	xfs_cfork_next_set_arch(dcp,w,n,arch)
+#define	XFS_CFORK_NEXT_SET(dcp,w,n)	        xfs_cfork_next_set(dcp,w,n)
+#else
+#define	XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,arch) \
+	((w) == XFS_DATA_FORK ? \
+		(INT_SET((dcp)->di_nextents, arch, (n))) : \
+		(INT_SET((dcp)->di_anextents, arch, (n))))
+#define XFS_CFORK_NEXT_SET(dcp,w,n)             XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_FORMAT)
+int xfs_dfork_format_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch);
+int xfs_dfork_format(xfs_dinode_t *dip, int w);
+#define	XFS_DFORK_FORMAT_ARCH(dip,w,arch)   xfs_dfork_format_arch(dip,w,arch)
+#define	XFS_DFORK_FORMAT(dip,w)             xfs_dfork_format(dip,w)
+#else
+#define	XFS_DFORK_FORMAT_ARCH(dip,w,arch)   XFS_CFORK_FORMAT_ARCH(&(dip)->di_core, w, arch)
+#define XFS_DFORK_FORMAT(dip,w)             XFS_DFORK_FORMAT_ARCH(dip,w,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_FMT_SET)
+void xfs_dfork_fmt_set_arch(xfs_dinode_t *dip, int w, int n, xfs_arch_t arch);
+void xfs_dfork_fmt_set(xfs_dinode_t *dip, int w, int n);
+#define	XFS_DFORK_FMT_SET_ARCH(dip,w,n,arch)    xfs_dfork_fmt_set_arch(dip,w,n,arch)
+#define	XFS_DFORK_FMT_SET(dip,w,n)              xfs_dfork_fmt_set(dip,w,n)
+#else
+#define	XFS_DFORK_FMT_SET_ARCH(dip,w,n,arch)	XFS_CFORK_FMT_SET_ARCH(&(dip)->di_core, w, n, arch)
+#define XFS_DFORK_FMT_SET(dip,w,n)              XFS_DFORK_FMT_SET_ARCH(dip,w,n,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_NEXTENTS)
+int xfs_dfork_nextents_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch);
+int xfs_dfork_nextents(xfs_dinode_t *dip, int w);
+#define	XFS_DFORK_NEXTENTS_ARCH(dip,w,arch) xfs_dfork_nextents_arch(dip,w,arch)
+#define	XFS_DFORK_NEXTENTS(dip,w)           xfs_dfork_nextents(dip,w)
+#else
+#define	XFS_DFORK_NEXTENTS_ARCH(dip,w,arch) XFS_CFORK_NEXTENTS_ARCH(&(dip)->di_core, w, arch)
+#define XFS_DFORK_NEXTENTS(dip,w)           XFS_DFORK_NEXTENTS_ARCH(dip,w,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_NEXT_SET)
+void xfs_dfork_next_set_arch(xfs_dinode_t *dip, int w, int n, xfs_arch_t arch);
+void xfs_dfork_next_set(xfs_dinode_t *dip, int w, int n);
+#define	XFS_DFORK_NEXT_SET_ARCH(dip,w,n,arch)   xfs_dfork_next_set_arch(dip,w,n,arch)
+#define	XFS_DFORK_NEXT_SET(dip,w,n)             xfs_dfork_next_set(dip,w,n)
+#else
+#define	XFS_DFORK_NEXT_SET_ARCH(dip,w,n,arch)	XFS_CFORK_NEXT_SET_ARCH(&(dip)->di_core, w, n, arch)
+#define XFS_DFORK_NEXT_SET(dip,w,n)             XFS_DFORK_NEXT_SET_ARCH(dip,w,n,ARCH_NOCONVERT)
+
+#endif
+
+/*
+ * File types (mode field)
+ */
+#define	IFMT		0170000		/* type of file */
+#define	IFIFO		0010000		/* named pipe (fifo) */
+#define	IFCHR		0020000		/* character special */
+#define	IFDIR		0040000		/* directory */
+#define	IFBLK		0060000		/* block special */
+#define	IFREG		0100000		/* regular */
+#define	IFLNK		0120000		/* symbolic link */
+#define	IFSOCK		0140000		/* socket */
+#define	IFMNT		0160000		/* mount point */
+
+/*
+ * File execution and access modes.
+ */
+#define	ISUID		04000		/* set user id on execution */
+#define	ISGID		02000		/* set group id on execution */
+#define	ISVTX		01000		/* sticky directory */
+#define	IREAD		0400		/* read, write, execute permissions */
+#define	IWRITE		0200
+#define	IEXEC		0100
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_DINODE)
+xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
+#define	XFS_BUF_TO_DINODE(bp)	xfs_buf_to_dinode(bp)
+#else
+#define	XFS_BUF_TO_DINODE(bp)	((xfs_dinode_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+/*
+ * Values for di_flags
+ * There should be a one-to-one correspondence between these flags and the
+ * XFS_XFLAG_s.
+ */
+#define XFS_DIFLAG_REALTIME_BIT	0	/* file's blocks come from rt area */
+#define XFS_DIFLAG_PREALLOC_BIT	1	/* file space has been preallocated */
+#define	XFS_DIFLAG_NEWRTBM_BIT	2	/* for rtbitmap inode, new format */
+#define XFS_DIFLAG_REALTIME     (1 << XFS_DIFLAG_REALTIME_BIT)
+#define XFS_DIFLAG_PREALLOC	(1 << XFS_DIFLAG_PREALLOC_BIT)
+#define	XFS_DIFLAG_NEWRTBM	(1 << XFS_DIFLAG_NEWRTBM_BIT)
+#define XFS_DIFLAG_ALL  \
+	(XFS_DIFLAG_REALTIME|XFS_DIFLAG_PREALLOC|XFS_DIFLAG_NEWRTBM)
+
+#endif	/* __XFS_DINODE_H__ */
diff --git a/include/xfs_dir.h b/include/xfs_dir.h
new file mode 100644
index 000000000..ead2621c4
--- /dev/null
+++ b/include/xfs_dir.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR_H__
+#define	__XFS_DIR_H__
+
+/*
+ * Large directories are structured around Btrees where all the data
+ * elements are in the leaf nodes.  Filenames are hashed into an int,
+ * then that int is used as the index into the Btree.  Since the hashval
+ * of a filename may not be unique, we may have duplicate keys.  The
+ * internal links in the Btree are logical block offsets into the file.
+ *
+ * Small directories use a different format and are packed as tightly
+ * as possible so as to fit into the literal area of the inode.
+ */
+
+#ifdef XFS_ALL_TRACE
+#define	XFS_DIR_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_DIR_TRACE
+#endif
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+struct uio;
+struct xfs_bmap_free;
+struct xfs_da_args;
+struct xfs_dinode;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Directory function types.
+ * Put in structures (xfs_dirops_t) for v1 and v2 directories.
+ */
+typedef void	(*xfs_dir_mount_t)(struct xfs_mount *mp);
+typedef int	(*xfs_dir_isempty_t)(struct xfs_inode *dp);
+typedef int	(*xfs_dir_init_t)(struct xfs_trans *tp,
+				  struct xfs_inode *dp,
+				  struct xfs_inode *pdp);
+typedef int	(*xfs_dir_createname_t)(struct xfs_trans *tp,
+					struct xfs_inode *dp,
+					char *name,
+					int namelen,
+					xfs_ino_t inum,
+					xfs_fsblock_t *first,
+					struct xfs_bmap_free *flist,
+					xfs_extlen_t total);
+typedef int	(*xfs_dir_lookup_t)(struct xfs_trans *tp,
+				    struct xfs_inode *dp,
+				    char *name,
+				    int namelen,
+				    xfs_ino_t *inum);
+typedef int	(*xfs_dir_removename_t)(struct xfs_trans *tp,
+					struct xfs_inode *dp,
+					char *name,
+					int namelen,
+					xfs_ino_t ino,
+					xfs_fsblock_t *first,
+					struct xfs_bmap_free *flist,
+					xfs_extlen_t total);
+typedef int	(*xfs_dir_getdents_t)(struct xfs_trans *tp,
+				      struct xfs_inode *dp,
+				      struct uio *uio,
+				      int *eofp);
+typedef int	(*xfs_dir_replace_t)(struct xfs_trans *tp,
+				     struct xfs_inode *dp,
+				     char *name,
+				     int namelen,
+				     xfs_ino_t inum,
+				     xfs_fsblock_t *first,
+				     struct xfs_bmap_free *flist,
+				     xfs_extlen_t total);
+typedef int	(*xfs_dir_canenter_t)(struct xfs_trans *tp,
+				      struct xfs_inode *dp,
+				      char *name,
+				      int namelen);
+typedef int	(*xfs_dir_shortform_validate_ondisk_t)(struct xfs_mount *mp,
+						       struct xfs_dinode *dip);
+typedef int	(*xfs_dir_shortform_to_single_t)(struct xfs_da_args *args);
+
+typedef struct xfs_dirops {
+	xfs_dir_mount_t				xd_mount;
+	xfs_dir_isempty_t			xd_isempty;
+	xfs_dir_init_t				xd_init;
+	xfs_dir_createname_t			xd_createname;
+	xfs_dir_lookup_t			xd_lookup;
+	xfs_dir_removename_t			xd_removename;
+	xfs_dir_getdents_t			xd_getdents;
+	xfs_dir_replace_t			xd_replace;
+	xfs_dir_canenter_t			xd_canenter;
+	xfs_dir_shortform_validate_ondisk_t	xd_shortform_validate_ondisk;
+	xfs_dir_shortform_to_single_t		xd_shortform_to_single;
+} xfs_dirops_t;
+
+/*
+ * Overall external interface routines.
+ */
+void	xfs_dir_startup(void);	/* called exactly once */
+
+#define	XFS_DIR_MOUNT(mp)	\
+	((mp)->m_dirops.xd_mount(mp))
+#define	XFS_DIR_ISEMPTY(mp,dp)	\
+	((mp)->m_dirops.xd_isempty(dp))
+#define	XFS_DIR_INIT(mp,tp,dp,pdp)	\
+	((mp)->m_dirops.xd_init(tp,dp,pdp))
+#define	XFS_DIR_CREATENAME(mp,tp,dp,name,namelen,inum,first,flist,total) \
+	((mp)->m_dirops.xd_createname(tp,dp,name,namelen,inum,first,flist,\
+				      total))
+#define	XFS_DIR_LOOKUP(mp,tp,dp,name,namelen,inum)	\
+	((mp)->m_dirops.xd_lookup(tp,dp,name,namelen,inum))
+#define	XFS_DIR_REMOVENAME(mp,tp,dp,name,namelen,ino,first,flist,total)	\
+	((mp)->m_dirops.xd_removename(tp,dp,name,namelen,ino,first,flist,total))
+#define	XFS_DIR_GETDENTS(mp,tp,dp,uio,eofp)	\
+	((mp)->m_dirops.xd_getdents(tp,dp,uio,eofp))
+#define	XFS_DIR_REPLACE(mp,tp,dp,name,namelen,inum,first,flist,total)	\
+	((mp)->m_dirops.xd_replace(tp,dp,name,namelen,inum,first,flist,total))
+#define	XFS_DIR_CANENTER(mp,tp,dp,name,namelen)	\
+	((mp)->m_dirops.xd_canenter(tp,dp,name,namelen))
+#define	XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip)	\
+	((mp)->m_dirops.xd_shortform_validate_ondisk(mp,dip))
+#define	XFS_DIR_SHORTFORM_TO_SINGLE(mp,args)	\
+	((mp)->m_dirops.xd_shortform_to_single(args))
+
+#define	XFS_DIR_IS_V1(mp)	((mp)->m_dirversion == 1)
+extern xfs_dirops_t xfsv1_dirops;
+
+#endif	/* __XFS_DIR_H__ */
diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h
new file mode 100644
index 000000000..f723933e1
--- /dev/null
+++ b/include/xfs_dir2.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_H__
+#define	__XFS_DIR2_H__
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_dir2_put_args;
+struct xfs_inode;
+struct xfs_trans;
+
+/*
+ * Directory version 2.
+ * There are 4 possible formats:
+ *	shortform
+ *	single block - data with embedded leaf at the end
+ *	multiple data blocks, single leaf+freeindex block
+ *	data blocks, node&leaf blocks (btree), freeindex blocks
+ *
+ *	The shortform format is in xfs_dir2_sf.h.
+ *	The single block format is in xfs_dir2_block.h.
+ *	The data block format is in xfs_dir2_data.h.
+ *	The leaf and freeindex block formats are in xfs_dir2_leaf.h.
+ *	Node blocks are the same as the other version, in xfs_da_btree.h.
+ */
+
+/*
+ * Byte offset in data block and shortform entry.
+ */
+typedef	__uint16_t	xfs_dir2_data_off_t;
+#define	NULLDATAOFF	0xffffU
+typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
+
+/*
+ * Directory block number (logical dirblk in file)
+ */
+typedef	__uint32_t	xfs_dir2_db_t;
+
+/*
+ * Byte offset in a directory.
+ */
+typedef	xfs_off_t		xfs_dir2_off_t;
+
+/*
+ * For getdents, argument struct for put routines.
+ */
+typedef int (*xfs_dir2_put_t)(struct xfs_dir2_put_args *pa);
+typedef struct xfs_dir2_put_args {
+	xfs_off_t		cook;		/* cookie of (next) entry */
+	xfs_intino_t	ino;		/* inode number */
+	struct dirent	*dbp;		/* buffer pointer */
+	char		*name;		/* directory entry name */
+	int		namelen;	/* length of name */
+	int		done;		/* output: set if value was stored */
+	xfs_dir2_put_t	put;		/* put function ptr (i/o) */
+	struct uio	*uio;		/* uio control structure */
+	unsigned char	type;		/* file type (see include/linux/fs.h) */
+} xfs_dir2_put_args_t;
+
+#define	XFS_DIR_IS_V2(mp)	((mp)->m_dirversion == 2)
+extern xfs_dirops_t	xfsv2_dirops;
+
+/*
+ * Other interfaces used by the rest of the dir v2 code.
+ */
+extern int
+	xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+			    xfs_dir2_db_t *dbp);
+
+extern int
+	xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *vp);
+
+extern int
+	xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *vp);
+
+extern int
+	xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+			      struct xfs_dabuf *bp);
+
+#endif	/* __XFS_DIR2_H__ */
diff --git a/include/xfs_dir2_block.h b/include/xfs_dir2_block.h
new file mode 100644
index 000000000..049f598da
--- /dev/null
+++ b/include/xfs_dir2_block.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_BLOCK_H__
+#define	__XFS_DIR2_BLOCK_H__
+
+/*
+ * xfs_dir2_block.h
+ * Directory version 2, single block format structures
+ */
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_dir2_data_hdr;
+struct xfs_dir2_leaf_entry;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * The single block format is as follows:
+ * xfs_dir2_data_hdr_t structure
+ * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures
+ * xfs_dir2_leaf_entry_t structures
+ * xfs_dir2_block_tail_t structure
+ */
+
+#define	XFS_DIR2_BLOCK_MAGIC	0x58443242	/* XD2B: for one block dirs */
+
+typedef struct xfs_dir2_block_tail {
+	__uint32_t	count;			/* count of leaf entries */
+	__uint32_t	stale;			/* count of stale lf entries */
+} xfs_dir2_block_tail_t;
+
+/*
+ * Generic single-block structure, for xfs_db.
+ */
+typedef struct xfs_dir2_block {
+	xfs_dir2_data_hdr_t	hdr;		/* magic XFS_DIR2_BLOCK_MAGIC */
+	xfs_dir2_data_union_t	u[1];
+	xfs_dir2_leaf_entry_t	leaf[1];
+	xfs_dir2_block_tail_t	tail;
+} xfs_dir2_block_t;
+
+/*
+ * Pointer to the leaf header embedded in a data block (1-block format)
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BLOCK_TAIL_P)
+xfs_dir2_block_tail_t *
+xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block);
+#define	XFS_DIR2_BLOCK_TAIL_P(mp,block)	xfs_dir2_block_tail_p(mp,block) 
+#else
+#define	XFS_DIR2_BLOCK_TAIL_P(mp,block)	\
+	(((xfs_dir2_block_tail_t *)((char *)(block) + (mp)->m_dirblksize)) - 1)
+#endif
+
+/*
+ * Pointer to the leaf entries embedded in a data block (1-block format)
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BLOCK_LEAF_P)
+struct xfs_dir2_leaf_entry *xfs_dir2_block_leaf_p_arch(
+        xfs_dir2_block_tail_t *btp, xfs_arch_t arch);
+#define	XFS_DIR2_BLOCK_LEAF_P_ARCH(btp,arch) \
+        xfs_dir2_block_leaf_p_arch(btp,arch)
+#else
+#define	XFS_DIR2_BLOCK_LEAF_P_ARCH(btp,arch)	\
+	(((struct xfs_dir2_leaf_entry *)(btp)) - INT_GET((btp)->count, arch))
+#endif
+
+/*
+ * Function declarations.
+ */
+
+extern int
+	xfs_dir2_block_addname(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_block_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
+				struct uio *uio, int *eofp, struct dirent *dbp,
+				xfs_dir2_put_t put);
+
+extern int
+	xfs_dir2_block_lookup(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_block_removename(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_block_replace(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_leaf_to_block(struct xfs_da_args *args, struct xfs_dabuf *lbp,
+			       struct xfs_dabuf *dbp);
+
+extern int
+	xfs_dir2_sf_to_block(struct xfs_da_args *args);
+
+#endif	/* __XFS_DIR2_BLOCK_H__ */
diff --git a/include/xfs_dir2_data.h b/include/xfs_dir2_data.h
new file mode 100644
index 000000000..96c850d66
--- /dev/null
+++ b/include/xfs_dir2_data.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_DATA_H__
+#define	__XFS_DIR2_DATA_H__
+
+/*
+ * Directory format 2, data block structures.
+ */
+
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_inode;
+struct xfs_trans;
+
+/*
+ * Constants.
+ */
+#define	XFS_DIR2_DATA_MAGIC	0x58443244	/* XD2D: for multiblock dirs */
+#define	XFS_DIR2_DATA_ALIGN_LOG	3		/* i.e., 8 bytes */
+#define	XFS_DIR2_DATA_ALIGN	(1 << XFS_DIR2_DATA_ALIGN_LOG)
+#define	XFS_DIR2_DATA_FREE_TAG	0xffff
+#define	XFS_DIR2_DATA_FD_COUNT	3
+
+/*
+ * Directory address space divided into sections,
+ * spaces separated by 32gb.
+ */
+#define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
+#define	XFS_DIR2_DATA_SPACE	0
+#define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
+#define	XFS_DIR2_DATA_FIRSTDB(mp)	\
+	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET)
+
+/*
+ * Offsets of . and .. in data space (always block 0)
+ */
+#define	XFS_DIR2_DATA_DOT_OFFSET	\
+	((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
+#define	XFS_DIR2_DATA_DOTDOT_OFFSET	\
+	(XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1))
+#define	XFS_DIR2_DATA_FIRST_OFFSET		\
+	(XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2))
+
+/*
+ * Structures.
+ */
+
+/*
+ * Describe a free area in the data block.
+ * The freespace will be formatted as a xfs_dir2_data_unused_t.
+ */
+typedef struct xfs_dir2_data_free {
+	xfs_dir2_data_off_t	offset;		/* start of freespace */
+	xfs_dir2_data_off_t	length;		/* length of freespace */
+} xfs_dir2_data_free_t;
+
+/*
+ * Header for the data blocks.
+ * Always at the beginning of a directory-sized block.
+ * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
+ */
+typedef struct xfs_dir2_data_hdr {
+	__uint32_t		magic;		/* XFS_DIR2_DATA_MAGIC */
+						/* or XFS_DIR2_BLOCK_MAGIC */
+	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
+} xfs_dir2_data_hdr_t;
+
+/*
+ * Active entry in a data block.  Aligned to 8 bytes.
+ * Tag appears as the last 2 bytes.
+ */
+typedef struct xfs_dir2_data_entry {
+	xfs_ino_t		inumber;	/* inode number */
+	__uint8_t		namelen;	/* name length */
+	__uint8_t		name[1];	/* name bytes, no null */
+						/* variable offset */
+	xfs_dir2_data_off_t	tag;		/* starting offset of us */
+} xfs_dir2_data_entry_t;
+
+/*
+ * Unused entry in a data block.  Aligned to 8 bytes.
+ * Tag appears as the last 2 bytes.
+ */
+typedef struct xfs_dir2_data_unused {
+	__uint16_t		freetag;	/* XFS_DIR2_DATA_FREE_TAG */
+	xfs_dir2_data_off_t	length;		/* total free length */
+						/* variable offset */
+	xfs_dir2_data_off_t	tag;		/* starting offset of us */
+} xfs_dir2_data_unused_t;
+
+typedef union {
+	xfs_dir2_data_entry_t	entry;
+	xfs_dir2_data_unused_t	unused;
+} xfs_dir2_data_union_t;
+
+/*
+ * Generic data block structure, for xfs_db.
+ */
+typedef struct xfs_dir2_data {
+	xfs_dir2_data_hdr_t	hdr;		/* magic XFS_DIR2_DATA_MAGIC */
+	xfs_dir2_data_union_t	u[1];
+} xfs_dir2_data_t;
+
+/*
+ * Macros.
+ */
+
+/*
+ * Size of a data entry.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_ENTSIZE)
+int xfs_dir2_data_entsize(int n);
+#define XFS_DIR2_DATA_ENTSIZE(n)	xfs_dir2_data_entsize(n)
+#else
+#define	XFS_DIR2_DATA_ENTSIZE(n)	\
+	((int)(roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
+		 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN)))
+#endif
+
+/*
+ * Pointer to an entry's tag word.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_ENTRY_TAG_P)
+xfs_dir2_data_off_t *xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep);
+#define	XFS_DIR2_DATA_ENTRY_TAG_P(dep)	xfs_dir2_data_entry_tag_p(dep)
+#else
+#define	XFS_DIR2_DATA_ENTRY_TAG_P(dep)	\
+	((xfs_dir2_data_off_t *)\
+	 ((char *)(dep) + XFS_DIR2_DATA_ENTSIZE((dep)->namelen) - \
+	  (uint)sizeof(xfs_dir2_data_off_t)))
+#endif
+
+/*
+ * Pointer to a freespace's tag word.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_UNUSED_TAG_P)
+xfs_dir2_data_off_t *xfs_dir2_data_unused_tag_p_arch(
+        xfs_dir2_data_unused_t *dup, xfs_arch_t arch);
+#define	XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup,arch) \
+        xfs_dir2_data_unused_tag_p_arch(dup,arch)
+#else
+#define	XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup,arch)	\
+	((xfs_dir2_data_off_t *)\
+	 ((char *)(dup) + INT_GET((dup)->length, arch) \
+                        - (uint)sizeof(xfs_dir2_data_off_t)))
+#endif
+
+/*
+ * Function declarations.
+ */
+
+#ifdef DEBUG
+extern void
+	xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
+#else
+#define	xfs_dir2_data_check(dp,bp)
+#endif
+
+extern xfs_dir2_data_free_t *
+	xfs_dir2_data_freefind(xfs_dir2_data_t *d,
+			       xfs_dir2_data_unused_t *dup);
+
+extern xfs_dir2_data_free_t *
+	xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
+				 xfs_dir2_data_unused_t *dup, int *loghead);
+
+extern void
+	xfs_dir2_data_freeremove(xfs_dir2_data_t *d,
+				 xfs_dir2_data_free_t *dfp, int *loghead);
+
+extern void
+	xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
+			       int *loghead, char *aendp);
+
+extern int
+	xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
+			   struct xfs_dabuf **bpp);
+
+extern void
+	xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				xfs_dir2_data_entry_t *dep);
+
+extern void
+	xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp);
+
+extern void
+	xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				 xfs_dir2_data_unused_t *dup);
+
+extern void
+	xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				xfs_dir2_data_aoff_t offset,
+				xfs_dir2_data_aoff_t len, int *needlogp,
+				int *needscanp);
+
+extern void
+	xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+			       xfs_dir2_data_unused_t *dup,
+			       xfs_dir2_data_aoff_t offset,
+			       xfs_dir2_data_aoff_t len, int *needlogp,
+			       int *needscanp);
+
+#endif	/* __XFS_DIR2_DATA_H__ */
diff --git a/include/xfs_dir2_leaf.h b/include/xfs_dir2_leaf.h
new file mode 100644
index 000000000..f7ef39678
--- /dev/null
+++ b/include/xfs_dir2_leaf.h
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_LEAF_H__
+#define	__XFS_DIR2_LEAF_H__
+
+/*
+ * Directory version 2, leaf block structures.
+ */
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Constants.
+ */
+
+/*
+ * Offset of the leaf/node space.  First block in this space
+ * is the btree root.
+ */
+#define	XFS_DIR2_LEAF_SPACE	1
+#define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
+#define	XFS_DIR2_LEAF_FIRSTDB(mp)	\
+	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET)
+
+/*
+ * Types.
+ */
+
+/*
+ * Offset in data space of a data entry.
+ */
+typedef	__uint32_t	xfs_dir2_dataptr_t;
+#define	XFS_DIR2_MAX_DATAPTR	((xfs_dir2_dataptr_t)0x7fffffff)
+#define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
+
+/*
+ * Structures.
+ */
+
+/*
+ * Leaf block header.
+ */
+typedef struct xfs_dir2_leaf_hdr {
+	xfs_da_blkinfo_t	info;		/* header for da routines */
+	__uint16_t		count;		/* count of entries */
+	__uint16_t		stale;		/* count of stale entries */
+} xfs_dir2_leaf_hdr_t;
+
+/*
+ * Leaf block entry.
+ */
+typedef struct xfs_dir2_leaf_entry {
+	xfs_dahash_t		hashval;	/* hash value of name */
+	xfs_dir2_dataptr_t	address;	/* address of data entry */
+} xfs_dir2_leaf_entry_t;
+
+/*
+ * Leaf block tail.
+ */
+typedef struct xfs_dir2_leaf_tail {
+	__uint32_t		bestcount;
+} xfs_dir2_leaf_tail_t;
+
+/*
+ * Leaf block.
+ * bests and tail are at the end of the block for single-leaf only
+ * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC).
+ */
+typedef struct xfs_dir2_leaf {
+	xfs_dir2_leaf_hdr_t	hdr;		/* leaf header */
+	xfs_dir2_leaf_entry_t	ents[1];	/* entries */
+						/* ... */
+	xfs_dir2_data_off_t	bests[1];	/* best free counts */
+	xfs_dir2_leaf_tail_t	tail;		/* leaf tail */
+} xfs_dir2_leaf_t;
+
+/*
+ * Macros.
+ * The DB blocks are logical directory block numbers, not filesystem blocks.
+ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_MAX_LEAF_ENTS)
+int
+xfs_dir2_max_leaf_ents(struct xfs_mount *mp);
+#define	XFS_DIR2_MAX_LEAF_ENTS(mp)	\
+	xfs_dir2_max_leaf_ents(mp)
+#else
+#define	XFS_DIR2_MAX_LEAF_ENTS(mp)	\
+	((int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / \
+	       (uint)sizeof(xfs_dir2_leaf_entry_t)))
+#endif
+
+/*
+ * Get address of the bestcount field in the single-leaf block.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_LEAF_TAIL_P)
+xfs_dir2_leaf_tail_t *
+xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp);
+#define	XFS_DIR2_LEAF_TAIL_P(mp,lp)	\
+	xfs_dir2_leaf_tail_p(mp, lp)
+#else
+#define	XFS_DIR2_LEAF_TAIL_P(mp,lp)	\
+	((xfs_dir2_leaf_tail_t *)\
+	 ((char *)(lp) + (mp)->m_dirblksize - \
+	  (uint)sizeof(xfs_dir2_leaf_tail_t)))
+#endif
+
+/*
+ * Get address of the bests array in the single-leaf block.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_LEAF_BESTS_P)
+xfs_dir2_data_off_t *
+xfs_dir2_leaf_bests_p_arch(xfs_dir2_leaf_tail_t *ltp, xfs_arch_t arch);
+#define	XFS_DIR2_LEAF_BESTS_P_ARCH(ltp,arch)	xfs_dir2_leaf_bests_p_arch(ltp,arch)
+#else
+#define	XFS_DIR2_LEAF_BESTS_P_ARCH(ltp,arch)	\
+	((xfs_dir2_data_off_t *)(ltp) - INT_GET((ltp)->bestcount, arch))
+#endif
+
+/*
+ * Convert dataptr to byte in file space
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_BYTE)
+xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
+#define	XFS_DIR2_DATAPTR_TO_BYTE(mp,dp)	xfs_dir2_dataptr_to_byte(mp, dp)
+#else
+#define	XFS_DIR2_DATAPTR_TO_BYTE(mp,dp)	\
+	((xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG)
+#endif
+
+/*
+ * Convert byte in file space to dataptr.  It had better be aligned.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DATAPTR)
+xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by);
+#define	XFS_DIR2_BYTE_TO_DATAPTR(mp,by)	xfs_dir2_byte_to_dataptr(mp,by)
+#else
+#define	XFS_DIR2_BYTE_TO_DATAPTR(mp,by)	\
+	((xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG))
+#endif
+
+/*
+ * Convert dataptr to a block number
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_DB)
+xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
+#define	XFS_DIR2_DATAPTR_TO_DB(mp,dp)	xfs_dir2_dataptr_to_db(mp, dp)
+#else
+#define	XFS_DIR2_DATAPTR_TO_DB(mp,dp)	\
+	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp))
+#endif
+
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_OFF)
+xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
+#define	XFS_DIR2_DATAPTR_TO_OFF(mp,dp)	xfs_dir2_dataptr_to_off(mp, dp)
+#else
+#define	XFS_DIR2_DATAPTR_TO_OFF(mp,dp)	\
+	XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp))
+#endif
+
+/*
+ * Convert block and offset to byte in space
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_OFF_TO_BYTE)
+xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
+			xfs_dir2_data_aoff_t o);
+#define	XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)	\
+	xfs_dir2_db_off_to_byte(mp, db, o)
+#else
+#define	XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)	\
+	(((xfs_dir2_off_t)(db) << \
+	 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o))
+#endif
+
+/*
+ * Convert byte in space to (DB) block
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DB)
+xfs_dir2_db_t xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by);
+#define	XFS_DIR2_BYTE_TO_DB(mp,by)	xfs_dir2_byte_to_db(mp, by)
+#else
+#define	XFS_DIR2_BYTE_TO_DB(mp,by)	\
+	((xfs_dir2_db_t)((by) >> \
+			 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)))
+#endif
+
+/*
+ * Convert byte in space to (DA) block
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DA)
+xfs_dablk_t xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by);
+#define	XFS_DIR2_BYTE_TO_DA(mp,by)	xfs_dir2_byte_to_da(mp, by)
+#else
+#define	XFS_DIR2_BYTE_TO_DA(mp,by)	\
+	XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by))
+#endif
+
+/*
+ * Convert byte in space to offset in a block
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_OFF)
+xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by);
+#define	XFS_DIR2_BYTE_TO_OFF(mp,by)	xfs_dir2_byte_to_off(mp, by)
+#else
+#define	XFS_DIR2_BYTE_TO_OFF(mp,by)	\
+	((xfs_dir2_data_aoff_t)((by) & \
+				((1 << ((mp)->m_sb.sb_blocklog + \
+					(mp)->m_sb.sb_dirblklog)) - 1)))
+#endif
+
+/*
+ * Convert block and offset to dataptr
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_OFF_TO_DATAPTR)
+xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
+			   xfs_dir2_data_aoff_t o);
+#define	XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)	\
+	xfs_dir2_db_off_to_dataptr(mp, db, o)
+#else
+#define	XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)	\
+	XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o))
+#endif
+
+/*
+ * Convert block (DB) to block (dablk)
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_DA)
+xfs_dablk_t xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db);
+#define	XFS_DIR2_DB_TO_DA(mp,db)	xfs_dir2_db_to_da(mp, db)
+#else
+#define	XFS_DIR2_DB_TO_DA(mp,db)	\
+	((xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog))
+#endif
+
+/*
+ * Convert block (dablk) to block (DB)
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DA_TO_DB)
+xfs_dir2_db_t xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da);
+#define	XFS_DIR2_DA_TO_DB(mp,da)	xfs_dir2_da_to_db(mp, da)
+#else
+#define	XFS_DIR2_DA_TO_DB(mp,da)	\
+	((xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog))
+#endif
+
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DA_TO_BYTE)
+xfs_dir2_off_t xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da);
+#define XFS_DIR2_DA_TO_BYTE(mp,da)	xfs_dir2_da_to_byte(mp, da)
+#else
+#define	XFS_DIR2_DA_TO_BYTE(mp,da)	\
+	XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0)
+#endif
+
+/*
+ * Function declarations.
+ */
+
+extern int
+	xfs_dir2_block_to_leaf(struct xfs_da_args *args, struct xfs_dabuf *dbp);
+
+extern int
+	xfs_dir2_leaf_addname(struct xfs_da_args *args);
+
+extern void
+	xfs_dir2_leaf_compact(struct xfs_da_args *args, struct xfs_dabuf *bp);
+
+extern void
+	xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
+				 int *lowstalep, int *highstalep, int *lowlogp,
+				 int *highlogp);
+
+extern int
+	xfs_dir2_leaf_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
+			       struct uio *uio, int *eofp, struct dirent *dbp,
+			       xfs_dir2_put_t put);
+
+extern int
+	xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
+			   struct xfs_dabuf **bpp, int magic);
+
+extern void
+	xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
+			       int first, int last);
+
+extern void
+	xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				int first, int last);
+
+extern void
+	xfs_dir2_leaf_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp);
+
+extern void
+	xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
+
+extern int
+	xfs_dir2_leaf_lookup(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_leaf_removename(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_leaf_replace(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
+				  struct xfs_dabuf *lbp);
+extern int
+	xfs_dir2_leaf_trim_data(struct xfs_da_args *args, struct xfs_dabuf *lbp,				xfs_dir2_db_t db);
+
+extern int
+	xfs_dir2_node_to_leaf(struct xfs_da_state *state);
+
+#endif	/* __XFS_DIR2_LEAF_H__ */
diff --git a/include/xfs_dir2_node.h b/include/xfs_dir2_node.h
new file mode 100644
index 000000000..4ec4d1e11
--- /dev/null
+++ b/include/xfs_dir2_node.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_NODE_H__
+#define	__XFS_DIR2_NODE_H__
+
+/*
+ * Directory version 2, btree node format structures
+ */
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_da_state;
+struct xfs_da_state_blk;
+struct xfs_inode;
+struct xfs_trans;
+
+/*
+ * Constants.
+ */
+
+/*
+ * Offset of the freespace index.
+ */
+#define	XFS_DIR2_FREE_SPACE	2
+#define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
+#define	XFS_DIR2_FREE_FIRSTDB(mp)	\
+	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET)
+
+#define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F */
+
+/*
+ * Structures.
+ */
+typedef	struct xfs_dir2_free_hdr {
+	__uint32_t		magic;		/* XFS_DIR2_FREE_MAGIC */
+	__int32_t		firstdb;	/* db of first entry */
+	__int32_t		nvalid;		/* count of valid entries */
+	__int32_t		nused;		/* count of used entries */
+} xfs_dir2_free_hdr_t;
+
+typedef struct xfs_dir2_free {
+	xfs_dir2_free_hdr_t	hdr;		/* block header */
+	xfs_dir2_data_off_t	bests[1];	/* best free counts */
+						/* unused entries are -1 */
+} xfs_dir2_free_t;
+#define	XFS_DIR2_MAX_FREE_BESTS(mp)	\
+	(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
+	 (uint)sizeof(xfs_dir2_data_off_t))
+
+/*
+ * Macros.
+ */
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_FDB)
+xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db);
+#define	XFS_DIR2_DB_TO_FDB(mp,db)	xfs_dir2_db_to_fdb(mp, db)
+#else
+#define	XFS_DIR2_DB_TO_FDB(mp,db)	\
+	(XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp))
+#endif
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_FDINDEX)
+int
+xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db);
+#define	XFS_DIR2_DB_TO_FDINDEX(mp,db)	xfs_dir2_db_to_fdindex(mp, db)
+#else
+#define	XFS_DIR2_DB_TO_FDINDEX(mp,db)	((db) % XFS_DIR2_MAX_FREE_BESTS(mp))
+#endif
+
+/* 
+ * Functions.
+ */
+
+extern void
+	xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				int first, int last);
+
+extern int
+	xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_dabuf *lbp);
+
+extern xfs_dahash_t
+	xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
+
+extern int
+	xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
+				  struct xfs_da_args *args, int *indexp,
+				  struct xfs_da_state *state);
+
+extern int
+	xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
+			     struct xfs_dabuf *leaf2_bp);
+
+extern int
+	xfs_dir2_leafn_split(struct xfs_da_state *state,
+			     struct xfs_da_state_blk *oldblk,
+			     struct xfs_da_state_blk *newblk);
+
+extern int
+	xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
+
+extern void
+	xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
+				 struct xfs_da_state_blk *drop_blk,
+				 struct xfs_da_state_blk *save_blk);
+
+extern int
+	xfs_dir2_node_addname(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_node_lookup(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_node_removename(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_node_replace(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
+				int *rvalp);
+
+#endif	/* __XFS_DIR2_NODE_H__ */
diff --git a/include/xfs_dir2_sf.h b/include/xfs_dir2_sf.h
new file mode 100644
index 000000000..b74dd752e
--- /dev/null
+++ b/include/xfs_dir2_sf.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_SF_H__
+#define	__XFS_DIR2_SF_H__
+
+/*
+ * Directory layout when stored internal to an inode.
+ *
+ * Small directories are packed as tightly as possible so as to
+ * fit into the literal area of the inode.
+ */
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_dir2_block;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Maximum size of a shortform directory.
+ */
+#define	XFS_DIR2_SF_MAX_SIZE	\
+	(XFS_DINODE_MAX_SIZE - (uint)sizeof(xfs_dinode_core_t) - \
+	 (uint)sizeof(xfs_agino_t))
+
+/*
+ * Inode number stored as 8 8-bit values.
+ */
+typedef	struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
+
+#define	XFS_DIR2_SF_GET_INO8_ARCH(di,arch)	\
+	(xfs_ino_t)(DIRINO_GET_ARCH(&di,arch))
+#define	XFS_DIR2_SF_GET_INO8(di)	        \
+        XFS_DIR2_SF_GET_INO8_ARCH(di,ARCH_NOCONVERT)
+
+/*
+ * Inode number stored as 4 8-bit values.
+ * Works a lot of the time, when all the inode numbers in a directory
+ * fit in 32 bits.
+ */
+typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
+#define	XFS_DIR2_SF_GET_INO4_ARCH(di,arch)	\
+	(xfs_ino_t)(DIRINO4_GET_ARCH(&di,arch))
+#define	XFS_DIR2_SF_GET_INO4(di)	        \
+	XFS_DIR2_SF_GET_INO4_ARCH(di,ARCH_NOCONVERT)
+
+typedef union {
+	xfs_dir2_ino8_t	i8;
+	xfs_dir2_ino4_t	i4;
+} xfs_dir2_inou_t;
+#define	XFS_DIR2_MAX_SHORT_INUM	((xfs_ino_t)0xffffffffULL)
+
+/*
+ * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
+ * Only need 16 bits, this is the byte offset into the single block form.
+ */
+typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t;
+
+/*
+ * The parent directory has a dedicated field, and the self-pointer must
+ * be calculated on the fly.
+ *
+ * Entries are packed toward the top as tightly as possible.  The header
+ * and the elements must be bcopy()'d out into a work area to get correct
+ * alignment for the inode number fields.
+ */
+typedef struct xfs_dir2_sf_hdr {
+	__uint8_t		count;		/* count of entries */
+	__uint8_t		i8count;	/* count of 8-byte inode #s */
+	xfs_dir2_inou_t		parent;		/* parent dir inode number */
+} xfs_dir2_sf_hdr_t;
+
+typedef struct xfs_dir2_sf_entry {
+	__uint8_t		namelen;	/* actual name length */
+	xfs_dir2_sf_off_t	offset;		/* saved offset */
+	__uint8_t		name[1];	/* name, variable size */
+	xfs_dir2_inou_t		inumber;	/* inode number, var. offset */
+} xfs_dir2_sf_entry_t;
+
+typedef struct xfs_dir2_sf {
+	xfs_dir2_sf_hdr_t	hdr;		/* shortform header */
+	xfs_dir2_sf_entry_t	list[1];	/* shortform entries */
+} xfs_dir2_sf_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_HDR_SIZE)
+int xfs_dir2_sf_hdr_size(int i8count);
+#define	XFS_DIR2_SF_HDR_SIZE(i8count)	xfs_dir2_sf_hdr_size(i8count)
+#else
+#define	XFS_DIR2_SF_HDR_SIZE(i8count)	\
+	((uint)sizeof(xfs_dir2_sf_hdr_t) - \
+	 ((i8count) == 0) * \
+	 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_INUMBERP)
+xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep);
+#define	XFS_DIR2_SF_INUMBERP(sfep)	xfs_dir2_sf_inumberp(sfep)
+#else
+#define	XFS_DIR2_SF_INUMBERP(sfep)	\
+	((xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen])
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_GET_INUMBER)
+xfs_intino_t xfs_dir2_sf_get_inumber_arch(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from, 
+                                            xfs_arch_t arch);
+#define	XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, from, arch)	\
+	xfs_dir2_sf_get_inumber_arch(sfp, from, arch)
+
+#else
+#define	XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, from, arch)	\
+	((sfp)->hdr.i8count == 0 ? \
+		(xfs_intino_t)XFS_DIR2_SF_GET_INO4_ARCH(*(from), arch) : \
+		(xfs_intino_t)XFS_DIR2_SF_GET_INO8_ARCH(*(from), arch))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_PUT_INUMBER)
+void xfs_dir2_sf_put_inumber_arch(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
+			             xfs_dir2_inou_t *to, xfs_arch_t arch);
+#define	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp,from,to,arch)	\
+	xfs_dir2_sf_put_inumber_arch(sfp,from,to,arch)
+#else
+#define	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp,from,to,arch)	\
+	if ((sfp)->hdr.i8count == 0) { \
+            DIRINO4_COPY_ARCH(from,to,arch); \
+        } else { \
+            DIRINO_COPY_ARCH(from,to,arch); \
+        }
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_GET_OFFSET)
+xfs_dir2_data_aoff_t xfs_dir2_sf_get_offset_arch(xfs_dir2_sf_entry_t *sfep, 
+                                                    xfs_arch_t arch);
+xfs_dir2_data_aoff_t xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep);
+#define	XFS_DIR2_SF_GET_OFFSET_ARCH(sfep,arch)	\
+        xfs_dir2_sf_get_offset_arch(sfep,arch)
+#else
+#define	XFS_DIR2_SF_GET_OFFSET_ARCH(sfep,arch)	\
+        INT_GET_UNALIGNED_16_ARCH(&(sfep)->offset.i,arch)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_PUT_OFFSET)
+void xfs_dir2_sf_put_offset_arch(xfs_dir2_sf_entry_t *sfep,
+			            xfs_dir2_data_aoff_t off, xfs_arch_t arch);
+#define	XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,off,arch) \
+        xfs_dir2_sf_put_offset_arch(sfep,off,arch)
+#else
+#define	XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,off,arch)	\
+        INT_SET_UNALIGNED_16_ARCH(&(sfep)->offset.i,off,arch)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_ENTSIZE_BYNAME)
+int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len);
+#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)	\
+	xfs_dir2_sf_entsize_byname(sfp,len)
+#else
+#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)	/* space a name uses */ \
+	((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
+	 ((sfp)->hdr.i8count == 0) * \
+	 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_ENTSIZE_BYENTRY)
+int xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep);
+#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)	\
+	xfs_dir2_sf_entsize_byentry(sfp,sfep)
+#else
+#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)	/* space an entry uses */ \
+	((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
+	 ((sfp)->hdr.i8count == 0) * \
+	 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_FIRSTENTRY)
+xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp);
+#define XFS_DIR2_SF_FIRSTENTRY(sfp)	xfs_dir2_sf_firstentry(sfp)
+#else
+#define XFS_DIR2_SF_FIRSTENTRY(sfp)	/* first entry in struct */ \
+	((xfs_dir2_sf_entry_t *) \
+	 ((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_NEXTENTRY)
+xfs_dir2_sf_entry_t *xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp,
+					   xfs_dir2_sf_entry_t *sfep);
+#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)		xfs_dir2_sf_nextentry(sfp,sfep)
+#else
+#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)		/* next entry in struct */ \
+	((xfs_dir2_sf_entry_t *) \
+		((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)))
+#endif
+
+/*
+ * Functions.
+ */
+
+extern int
+	xfs_dir2_block_sfsize(struct xfs_inode *dp,
+			      struct xfs_dir2_block *block,
+			      xfs_dir2_sf_hdr_t *sfhp);
+
+extern int
+	xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
+			     int size, xfs_dir2_sf_hdr_t *sfhp);
+
+extern int
+	xfs_dir2_sf_addname(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
+
+extern int
+	xfs_dir2_sf_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp,
+			     struct dirent *dbp, xfs_dir2_put_t put);
+
+extern int
+	xfs_dir2_sf_lookup(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_sf_removename(struct xfs_da_args *args);
+
+extern int
+	xfs_dir2_sf_replace(struct xfs_da_args *args);
+
+#endif	/* __XFS_DIR2_SF_H__ */
diff --git a/include/xfs_dir_leaf.h b/include/xfs_dir_leaf.h
new file mode 100644
index 000000000..d330ff8ec
--- /dev/null
+++ b/include/xfs_dir_leaf.h
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR_LEAF_H__
+#define	__XFS_DIR_LEAF_H__
+
+/*
+ * Directory layout, internal structure, access macros, etc.
+ *
+ * Large directories are structured around Btrees where all the data
+ * elements are in the leaf nodes.  Filenames are hashed into an int,
+ * then that int is used as the index into the Btree.  Since the hashval
+ * of a filename may not be unique, we may have duplicate keys.  The
+ * internal links in the Btree are logical block offsets into the file.
+ */
+
+struct dirent;
+struct uio;
+struct xfs_bmap_free;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_da_state;
+struct xfs_da_state_blk;
+struct xfs_dir_put_args;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*========================================================================
+ * Directory Structure when equal to XFS_LBSIZE(mp) bytes.
+ *========================================================================*/
+
+/*
+ * This is the structure of the leaf nodes in the Btree.
+ *
+ * Struct leaf_entry's are packed from the top.  Names grow from the bottom
+ * but are not packed.  The freemap contains run-length-encoded entries
+ * for the free bytes after the leaf_entry's, but only the N largest such,
+ * smaller runs are dropped.  When the freemap doesn't show enough space
+ * for an allocation, we compact the namelist area and try again.  If we
+ * still don't have enough space, then we have to split the block.
+ *
+ * Since we have duplicate hash keys, for each key that matches, compare
+ * the actual string.  The root and intermediate node search always takes
+ * the first-in-the-block key match found, so we should only have to work
+ * "forw"ard.  If none matches, continue with the "forw"ard leaf nodes
+ * until the hash key changes or the filename is found.
+ *
+ * The parent directory and the self-pointer are explicitly represented
+ * (ie: there are entries for "." and "..").
+ *
+ * Note that the count being a __uint16_t limits us to something like a 
+ * blocksize of 1.3MB in the face of worst case (short) filenames.
+ */
+#define XFS_DIR_LEAF_MAPSIZE	3	/* how many freespace slots */
+
+typedef struct xfs_dir_leafblock {
+	struct xfs_dir_leaf_hdr {	/* constant-structure header block */
+		xfs_da_blkinfo_t info;	/* block type, links, etc. */
+		__uint16_t count;	/* count of active leaf_entry's */
+		__uint16_t namebytes;	/* num bytes of name strings stored */
+		__uint16_t firstused;	/* first used byte in name area */
+		__uint8_t  holes;	/* != 0 if blk needs compaction */
+		__uint8_t  pad1;
+		struct xfs_dir_leaf_map {/* RLE map of free bytes */
+			__uint16_t base; /* base of free region */
+			__uint16_t size; /* run length of free region */
+		} freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */
+	} hdr;
+	struct xfs_dir_leaf_entry {	/* sorted on key, not name */
+		xfs_dahash_t hashval;	/* hash value of name */
+		__uint16_t nameidx;	/* index into buffer of name */
+		__uint8_t namelen;	/* length of name string */
+		__uint8_t pad2;
+	} entries[1];			/* var sized array */
+	struct xfs_dir_leaf_name {
+		xfs_dir_ino_t inumber;	/* inode number for this key */
+		__uint8_t name[1];	/* name string itself */
+	} namelist[1];			/* grows from bottom of buf */
+} xfs_dir_leafblock_t;
+typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;
+typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;
+typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;
+typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;
+
+/*
+ * Length of name for which a 512-byte block filesystem
+ * can get a double split.
+ */
+#define	XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN	\
+	(512 - (uint)sizeof(xfs_dir_leaf_hdr_t) - \
+	 (uint)sizeof(xfs_dir_leaf_entry_t) * 2 - \
+	 (uint)sizeof(xfs_dir_leaf_name_t) * 2 - (MAXNAMELEN - 2) + 1 + 1)
+
+typedef int (*xfs_dir_put_t)(struct xfs_dir_put_args *pa);
+
+typedef union {
+	xfs_off_t		o;		/* offset (cookie) */
+	/*
+	 * Watch the order here (endian-ness dependent).
+	 */
+	struct {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+		xfs_dahash_t	h;	/* hash value */
+		__uint32_t	be;	/* block and entry */
+#else	/* __BYTE_ORDER == __BIG_ENDIAN */
+		__uint32_t	be;	/* block and entry */
+		xfs_dahash_t	h;	/* hash value */
+#endif	/* __BYTE_ORDER == __BIG_ENDIAN */
+	} s;
+} xfs_dircook_t;
+
+#define	XFS_PUT_COOKIE(c,mp,bno,entry,hash)	\
+	((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
+
+#define	XFS_GET_DIR_INO_ARCH(mp,di,arch) \
+    DIRINO_GET_ARCH(&(di),arch)
+#define	XFS_GET_DIR_INO(mp,di) \
+    XFS_GET_DIR_INO_ARCH(mp,di,ARCH_NOCONVERT)
+
+typedef struct xfs_dir_put_args
+{
+	xfs_dircook_t	cook;		/* cookie of (next) entry */
+	xfs_intino_t	ino;		/* inode number */
+	struct dirent	*dbp;		/* buffer pointer */
+	char		*name;		/* directory entry name */
+	int		namelen;	/* length of name */
+	int		done;		/* output: set if value was stored */
+	xfs_dir_put_t	put;		/* put function ptr (i/o) */
+	struct uio	*uio;		/* uio control structure */
+	unsigned char	type;		/* file type (see include/linux/fs.h) */
+} xfs_dir_put_args_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYNAME)
+int xfs_dir_leaf_entsize_byname(int len);
+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)	xfs_dir_leaf_entsize_byname(len)
+#else
+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)	/* space a name will use */ \
+	((uint)sizeof(xfs_dir_leaf_name_t)-1 + len)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYENTRY)
+int xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry);
+#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)	\
+	xfs_dir_leaf_entsize_byentry(entry)
+#else
+#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)	/* space an entry will use */ \
+	((uint)sizeof(xfs_dir_leaf_name_t)-1 + (entry)->namelen)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_NAMESTRUCT)
+xfs_dir_leaf_name_t *
+xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset);
+#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset)	\
+	xfs_dir_leaf_namestruct(leafp,offset)
+#else
+#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset)	/* point to name struct */ \
+	((xfs_dir_leaf_name_t *)&((char *)(leafp))[offset])
+#endif
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Internal routines when dirsize < XFS_LITINO(mp).
+ */
+int xfs_dir_shortform_create(struct xfs_da_args *args, xfs_ino_t parent);
+int xfs_dir_shortform_addname(struct xfs_da_args *args);
+int xfs_dir_shortform_lookup(struct xfs_da_args *args);
+int xfs_dir_shortform_to_leaf(struct xfs_da_args *args);
+int xfs_dir_shortform_removename(struct xfs_da_args *args);
+int xfs_dir_shortform_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp,
+				      struct dirent *dbp, xfs_dir_put_t put);
+int xfs_dir_shortform_replace(struct xfs_da_args *args);
+
+/*
+ * Internal routines when dirsize == XFS_LBSIZE(mp).
+ */
+int xfs_dir_leaf_to_node(struct xfs_da_args *args);
+int xfs_dir_leaf_to_shortform(struct xfs_da_args *args);
+
+/*
+ * Routines used for growing the Btree.
+ */
+int	xfs_dir_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block,
+				   struct xfs_dabuf **bpp);
+int	xfs_dir_leaf_split(struct xfs_da_state *state,
+				  struct xfs_da_state_blk *oldblk,
+				  struct xfs_da_state_blk *newblk);
+int	xfs_dir_leaf_add(struct xfs_dabuf *leaf_buffer,
+				struct xfs_da_args *args, int insertion_index);
+int	xfs_dir_leaf_addname(struct xfs_da_args *args);
+int	xfs_dir_leaf_lookup_int(struct xfs_dabuf *leaf_buffer,
+				       struct xfs_da_args *args,
+				       int *index_found_at);
+int	xfs_dir_leaf_remove(struct xfs_trans *trans,
+				   struct xfs_dabuf *leaf_buffer,
+				   int index_to_remove);
+int	xfs_dir_leaf_getdents_int(struct xfs_dabuf *bp, struct xfs_inode *dp,
+					 xfs_dablk_t bno, struct uio *uio,
+					 int *eobp, struct dirent *dbp,
+					 xfs_dir_put_t put, xfs_daddr_t nextda);
+
+/*
+ * Routines used for shrinking the Btree.
+ */
+int	xfs_dir_leaf_toosmall(struct xfs_da_state *state, int *retval);
+void	xfs_dir_leaf_unbalance(struct xfs_da_state *state,
+					     struct xfs_da_state_blk *drop_blk,
+					     struct xfs_da_state_blk *save_blk);
+
+/*
+ * Utility routines.
+ */
+uint	xfs_dir_leaf_lasthash(struct xfs_dabuf *bp, int *count);
+int	xfs_dir_leaf_order(struct xfs_dabuf *leaf1_bp,
+				  struct xfs_dabuf *leaf2_bp);
+int	xfs_dir_put_dirent32_direct(xfs_dir_put_args_t *pa);
+int	xfs_dir_put_dirent32_uio(xfs_dir_put_args_t *pa);
+int	xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa);
+int	xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa);
+int	xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+
+
+/*
+ * Global data.
+ */
+extern xfs_dahash_t	xfs_dir_hash_dot, xfs_dir_hash_dotdot;
+
+#endif /* __XFS_DIR_LEAF_H__ */
diff --git a/include/xfs_dir_sf.h b/include/xfs_dir_sf.h
new file mode 100644
index 000000000..d875da4b3
--- /dev/null
+++ b/include/xfs_dir_sf.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR_SF_H__
+#define	__XFS_DIR_SF_H__
+
+/*
+ * Directory layout when stored internal to an inode.
+ *
+ * Small directories are packed as tightly as possible so as to
+ * fit into the literal area of the inode.
+ */
+
+typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t;
+
+/*
+ * The parent directory has a dedicated field, and the self-pointer must
+ * be calculated on the fly.
+ *
+ * Entries are packed toward the top as tight as possible.  The header
+ * and the elements much be bcopy()'d out into a work area to get correct
+ * alignment for the inode number fields.
+ */
+typedef struct xfs_dir_shortform {
+	struct xfs_dir_sf_hdr {		/* constant-structure header block */
+		xfs_dir_ino_t parent;	/* parent dir inode number */
+		__uint8_t count;	/* count of active entries */
+	} hdr;
+	struct xfs_dir_sf_entry {
+		xfs_dir_ino_t inumber;	/* referenced inode number */
+		__uint8_t namelen;	/* actual length of name (no NULL) */
+		__uint8_t name[1];	/* name */
+	} list[1];			/* variable sized array */
+} xfs_dir_shortform_t;
+typedef struct xfs_dir_sf_hdr xfs_dir_sf_hdr_t;
+typedef struct xfs_dir_sf_entry xfs_dir_sf_entry_t;
+
+/*
+ * We generate this then sort it, so that readdirs are returned in
+ * hash-order.  Else seekdir won't work.
+ */
+typedef struct xfs_dir_sf_sort {
+	__uint8_t	entno;		/* .=0, ..=1, else entry# + 2 */
+	__uint8_t	seqno;		/* sequence # with same hash value */
+	__uint8_t	namelen;	/* length of name value (no null) */
+	xfs_dahash_t	hash;		/* this entry's hash value */
+	xfs_intino_t	ino;		/* this entry's inode number */
+	char		*name;		/* name value, pointer into buffer */
+} xfs_dir_sf_sort_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_GET_DIRINO)
+void xfs_dir_sf_get_dirino_arch(xfs_dir_ino_t *from, xfs_ino_t *to, xfs_arch_t arch);
+void xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to);
+#define	XFS_DIR_SF_GET_DIRINO_ARCH(from,to,arch)    xfs_dir_sf_get_dirino_arch(from, to, arch)
+#define	XFS_DIR_SF_GET_DIRINO(from,to)		    xfs_dir_sf_get_dirino(from, to)
+#else
+#define	XFS_DIR_SF_GET_DIRINO_ARCH(from,to,arch)    DIRINO_COPY_ARCH(from,to,arch)	
+#define	XFS_DIR_SF_GET_DIRINO(from,to)	            DIRINO_COPY_ARCH(from,to,ARCH_NOCONVERT)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_PUT_DIRINO)
+void xfs_dir_sf_put_dirino_arch(xfs_ino_t *from, xfs_dir_ino_t *to, xfs_arch_t arch);
+void xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to);
+#define	XFS_DIR_SF_PUT_DIRINO_ARCH(from,to,arch)    xfs_dir_sf_put_dirino_arch(from, to, arch)
+#define	XFS_DIR_SF_PUT_DIRINO(from,to)		    xfs_dir_sf_put_dirino(from, to)
+#else
+#define	XFS_DIR_SF_PUT_DIRINO_ARCH(from,to,arch)    DIRINO_COPY_ARCH(from,to,arch)
+#define	XFS_DIR_SF_PUT_DIRINO(from,to)	            DIRINO_COPY_ARCH(from,to,ARCH_NOCONVERT)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ENTSIZE_BYNAME)
+int xfs_dir_sf_entsize_byname(int len);
+#define XFS_DIR_SF_ENTSIZE_BYNAME(len)		xfs_dir_sf_entsize_byname(len)
+#else
+#define XFS_DIR_SF_ENTSIZE_BYNAME(len)		/* space a name uses */ \
+	((uint)sizeof(xfs_dir_sf_entry_t)-1 + (len))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ENTSIZE_BYENTRY)
+int xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep);
+#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)	xfs_dir_sf_entsize_byentry(sfep)
+#else
+#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)	/* space an entry uses */ \
+	((uint)sizeof(xfs_dir_sf_entry_t)-1 + (sfep)->namelen)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_NEXTENTRY)
+xfs_dir_sf_entry_t *xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep);
+#define XFS_DIR_SF_NEXTENTRY(sfep)		xfs_dir_sf_nextentry(sfep)
+#else
+#define XFS_DIR_SF_NEXTENTRY(sfep)		/* next entry in struct */ \
+	((xfs_dir_sf_entry_t *) \
+		((char *)(sfep) + XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ALLFIT)
+int xfs_dir_sf_allfit(int count, int totallen);
+#define XFS_DIR_SF_ALLFIT(count,totallen)	\
+	xfs_dir_sf_allfit(count,totallen)
+#else
+#define XFS_DIR_SF_ALLFIT(count,totallen)	/* will all entries fit? */ \
+	((uint)sizeof(xfs_dir_sf_hdr_t) + \
+	       ((uint)sizeof(xfs_dir_sf_entry_t)-1)*(count) + (totallen))
+#endif
+
+#ifdef XFS_ALL_TRACE
+#define	XFS_DIR_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_DIR_TRACE
+#endif
+
+/*
+ * Kernel tracing support for directories.
+ */
+struct uio;
+struct xfs_inode;
+struct xfs_da_intnode;
+struct xfs_dinode;
+struct xfs_dir_leafblock;
+struct xfs_dir_leaf_entry;
+
+#define	XFS_DIR_TRACE_SIZE	4096	/* size of global trace buffer */     
+
+/*
+ * Trace record types.
+ */
+#define	XFS_DIR_KTRACE_G_DU	1	/* dp, uio */
+#define	XFS_DIR_KTRACE_G_DUB	2	/* dp, uio, bno */
+#define	XFS_DIR_KTRACE_G_DUN	3	/* dp, uio, node */
+#define	XFS_DIR_KTRACE_G_DUL	4	/* dp, uio, leaf */
+#define	XFS_DIR_KTRACE_G_DUE	5	/* dp, uio, leaf entry */
+#define	XFS_DIR_KTRACE_G_DUC	6	/* dp, uio, cookie */
+
+#if defined(XFS_DIR_TRACE)
+
+void xfs_dir_trace_g_du(char *where, struct xfs_inode *dp, struct uio *uio);
+void xfs_dir_trace_g_dub(char *where, struct xfs_inode *dp, struct uio *uio,
+			      xfs_dablk_t bno);
+void xfs_dir_trace_g_dun(char *where, struct xfs_inode *dp, struct uio *uio,
+			      struct xfs_da_intnode *node);
+void xfs_dir_trace_g_dul(char *where, struct xfs_inode *dp, struct uio *uio,
+			      struct xfs_dir_leafblock *leaf);
+void xfs_dir_trace_g_due(char *where, struct xfs_inode *dp, struct uio *uio,
+			      struct xfs_dir_leaf_entry *entry);
+void xfs_dir_trace_g_duc(char *where, struct xfs_inode *dp, struct uio *uio,
+			      xfs_off_t cookie);
+void xfs_dir_trace_enter(int type, char *where,
+			     __psunsigned_t a0, __psunsigned_t a1,
+			     __psunsigned_t a2, __psunsigned_t a3,
+			     __psunsigned_t a4, __psunsigned_t a5,
+			     __psunsigned_t a6, __psunsigned_t a7,
+			     __psunsigned_t a8, __psunsigned_t a9,
+			     __psunsigned_t a10, __psunsigned_t a11);
+#else
+#define	xfs_dir_trace_g_du(w,d,u)
+#define	xfs_dir_trace_g_dub(w,d,u,b)
+#define	xfs_dir_trace_g_dun(w,d,u,n)
+#define	xfs_dir_trace_g_dul(w,d,u,l)
+#define	xfs_dir_trace_g_due(w,d,u,e)
+#define	xfs_dir_trace_g_duc(w,d,u,c)
+#endif /* DEBUG */
+
+#endif	/* __XFS_DIR_SF_H__ */
diff --git a/include/xfs_dqblk.h b/include/xfs_dqblk.h
new file mode 100644
index 000000000..ec1704d86
--- /dev/null
+++ b/include/xfs_dqblk.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DQBLK_H__
+#define __XFS_DQBLK_H__
+
+/*
+ * The ondisk form of a dquot structure.
+ */
+#define XFS_DQUOT_MAGIC	 	0x4451	 	/* 'DQ' */
+#define XFS_DQUOT_VERSION	(u_int8_t)0x01	/* latest version number */
+
+/* 
+ * This is the main portion of the on-disk representation of quota 
+ * information for a user. This is the q_core of the xfs_dquot_t that
+ * is kept in kernel memory. We pad this with some more expansion room
+ * to construct the on disk structure.
+ */
+typedef struct	xfs_disk_dquot {
+/*16*/	u_int16_t	d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
+/*8 */	u_int8_t	d_version;	/* dquot version */
+/*8 */	u_int8_t	d_flags;	/* XFS_DQ_USER/DQ_PROJ */
+/*32*/	xfs_dqid_t	d_id;		/* user id or proj id */
+/*64*/	xfs_qcnt_t	d_blk_hardlimit;/* absolute limit on disk blks */
+/*64*/	xfs_qcnt_t	d_blk_softlimit;/* preferred limit on disk blks */
+/*64*/	xfs_qcnt_t	d_ino_hardlimit;/* maximum # allocated inodes */
+/*64*/	xfs_qcnt_t	d_ino_softlimit;/* preferred inode limit */
+/*64*/	xfs_qcnt_t	d_bcount;	/* disk blocks owned by the user */
+/*64*/	xfs_qcnt_t	d_icount;	/* inodes owned by the user */
+/*32*/	__int32_t	d_itimer;	/* zero if within inode limits if not, 
+					   this is when we refuse service */
+/*32*/	__int32_t	d_btimer;	/* similar to above; for disk blocks */
+/*16*/	xfs_qwarncnt_t  d_iwarns;       /* warnings issued wrt num inodes */
+/*16*/	xfs_qwarncnt_t  d_bwarns;       /* warnings issued wrt disk blocks */
+/*32*/	__int32_t	d_pad0;		/* 64 bit align */
+/*64*/	xfs_qcnt_t	d_rtb_hardlimit;/* absolute limit on realtime blks */
+/*64*/	xfs_qcnt_t	d_rtb_softlimit;/* preferred limit on RT disk blks */
+/*64*/	xfs_qcnt_t	d_rtbcount;	/* realtime blocks owned */
+/*32*/	__int32_t	d_rtbtimer;	/* similar to above; for RT disk blocks */
+/*16*/	xfs_qwarncnt_t  d_rtbwarns;     /* warnings issued wrt RT disk blocks */
+/*16*/	__uint16_t	d_pad;
+} xfs_disk_dquot_t;
+
+/*
+ * This is what goes on disk. This is separated from the xfs_disk_dquot because
+ * carrying the unnecessary padding would be a waste of memory.
+ */
+typedef struct xfs_dqblk {
+	xfs_disk_dquot_t  dd_diskdq;	/* portion that lives incore as well */
+	char              dd_fill[32];	/* filling for posterity */
+} xfs_dqblk_t;
+
+/*
+ * flags for q_flags field in the dquot.
+ */
+#define XFS_DQ_USER	 	0x0001		/* a user quota */
+#define XFS_DQ_PROJ	 	0x0002		/* a project quota */
+
+#define XFS_DQ_FLOCKED		0x0008		/* flush lock taken */
+#define XFS_DQ_DIRTY		0x0010		/* dquot is dirty */
+#define XFS_DQ_WANT		0x0020		/* for lookup/reclaim race */
+#define XFS_DQ_INACTIVE		0x0040		/* dq off mplist & hashlist */
+#define XFS_DQ_MARKER		0x0080		/* sentinel */
+
+/*
+ * In the worst case, when both user and proj quotas on,
+ * we can have a max of three dquots changing in a single transaction.
+ */
+#define XFS_DQUOT_LOGRES(mp)	(sizeof(xfs_disk_dquot_t) * 3)
+
+#endif	/* __XFS_DQBLK_H__ */
diff --git a/include/xfs_dquot_item.h b/include/xfs_dquot_item.h
new file mode 100644
index 000000000..f8d9049a0
--- /dev/null
+++ b/include/xfs_dquot_item.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DQUOT_ITEM_H__
+#define __XFS_DQUOT_ITEM_H__
+
+/*
+ * These are the structures used to lay out dquots and quotaoff
+ * records on the log. Quite similar to those of inodes.
+ */
+
+/*
+ * log format struct for dquots.
+ * The first two fields must be the type and size fitting into
+ * 32 bits : log_recovery code assumes that.
+ */
+typedef struct xfs_dq_logformat {
+	__uint16_t		qlf_type;      /* dquot log item type */
+	__uint16_t		qlf_size;      /* size of this item */
+	xfs_dqid_t		qlf_id;	       /* usr/proj id number : 32 bits */
+	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
+	__int32_t		qlf_len;       /* len of dquot buffer */
+	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
+} xfs_dq_logformat_t;
+
+/*
+ * log format struct for QUOTAOFF records.
+ * The first two fields must be the type and size fitting into
+ * 32 bits : log_recovery code assumes that.
+ * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
+ * to the first and ensures that the first logitem is taken out of the AIL
+ * only when the last one is securely committed.
+ */	
+typedef struct xfs_qoff_logformat {
+	unsigned short		qf_type;	/* quotaoff log item type */
+	unsigned short		qf_size;	/* size of this item */
+	unsigned int		qf_flags;	/* USR and/or PRJ */
+	char			qf_pad[12];	/* padding for future */
+} xfs_qoff_logformat_t;
+
+
+#ifdef __KERNEL__
+
+struct xfs_dquot;
+struct xfs_trans;
+struct xfs_mount;
+typedef struct xfs_dq_logitem {
+	xfs_log_item_t		 qli_item;	   /* common portion */
+	struct xfs_dquot	*qli_dquot;	   /* dquot ptr */
+	xfs_lsn_t		 qli_flush_lsn;	   /* lsn at last flush */
+	unsigned short           qli_pushbuf_flag; /* one bit used in push_ail */
+#ifdef DEBUG
+	uint64_t                 qli_push_owner;
+#endif
+	xfs_dq_logformat_t	 qli_format;	   /* logged structure */
+} xfs_dq_logitem_t;
+
+
+typedef struct xfs_qoff_logitem {
+	xfs_log_item_t		 qql_item;	/* common portion */
+	struct xfs_qoff_logitem	*qql_start_lip;	/* qoff-start logitem, if any */
+	xfs_qoff_logformat_t	 qql_format;	/* logged structure */
+} xfs_qoff_logitem_t;
+
+
+extern void		   xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, 
+						    xfs_qoff_logitem_t *, uint);
+extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, 
+						   xfs_qoff_logitem_t *, uint);
+extern void		   xfs_trans_log_quotaoff_item(struct xfs_trans *,
+						       xfs_qoff_logitem_t *);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_DQUOT_ITEM_H__ */
diff --git a/include/xfs_extfree_item.h b/include/xfs_extfree_item.h
new file mode 100644
index 000000000..640f8e220
--- /dev/null
+++ b/include/xfs_extfree_item.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_EXTFREE_ITEM_H__
+#define	__XFS_EXTFREE_ITEM_H__
+
+struct xfs_mount;
+struct xfs_zone;
+
+typedef struct xfs_extent {
+	xfs_dfsbno_t	ext_start;
+	xfs_extlen_t	ext_len;
+} xfs_extent_t;
+
+/*
+ * This is the structure used to lay out an efi log item in the
+ * log.  The efi_extents field is a variable size array whose
+ * size is given by efi_nextents.
+ */
+typedef struct xfs_efi_log_format {
+	unsigned short		efi_type;	/* efi log item type */
+	unsigned short		efi_size;	/* size of this item */
+	uint			efi_nextents;	/* # extents to free */
+	__uint64_t		efi_id;		/* efi identifier */
+	xfs_extent_t		efi_extents[1];	/* array of extents to free */
+} xfs_efi_log_format_t;
+
+/*
+ * This is the structure used to lay out an efd log item in the
+ * log.  The efd_extents array is a variable size array whose
+ * size is given by efd_nextents;
+ */
+typedef struct xfs_efd_log_format {
+	unsigned short		efd_type;	/* efd log item type */
+	unsigned short		efd_size;	/* size of this item */
+	uint			efd_nextents;	/* # of extents freed */
+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	xfs_extent_t		efd_extents[1];	/* array of extents freed */
+} xfs_efd_log_format_t;
+
+
+#ifdef __KERNEL__
+
+/*
+ * Max number of extents in fast allocation path.
+ */
+#define	XFS_EFI_MAX_FAST_EXTENTS	16
+
+/*
+ * Define EFI flags.
+ */
+#define	XFS_EFI_RECOVERED	0x1
+#define	XFS_EFI_COMMITTED	0x2
+#define	XFS_EFI_CANCELED	0x4
+
+/*
+ * This is the "extent free intention" log item.  It is used
+ * to log the fact that some extents need to be free.  It is
+ * used in conjunction with the "extent free done" log item
+ * described below.
+ */
+typedef struct xfs_efi_log_item {
+	xfs_log_item_t		efi_item;
+	uint			efi_flags;	/* misc flags */
+	uint			efi_next_extent;
+	xfs_efi_log_format_t	efi_format;
+} xfs_efi_log_item_t;
+
+/*
+ * This is the "extent free done" log item.  It is used to log
+ * the fact that some extents earlier mentioned in an efi item
+ * have been freed.
+ */
+typedef struct xfs_efd_log_item {
+	xfs_log_item_t		efd_item;
+	xfs_efi_log_item_t	*efd_efip;
+	uint			efd_next_extent;
+	xfs_efd_log_format_t	efd_format;
+} xfs_efd_log_item_t;
+
+/*
+ * Max number of extents in fast allocation path.
+ */
+#define	XFS_EFD_MAX_FAST_EXTENTS	16
+
+extern struct xfs_zone	*xfs_efi_zone;
+extern struct xfs_zone	*xfs_efd_zone;
+
+xfs_efi_log_item_t	*xfs_efi_init(struct xfs_mount *, uint);
+xfs_efd_log_item_t	*xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
+				      uint);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_EXTFREE_ITEM_H__ */
diff --git a/include/xfs_fs.h b/include/xfs_fs.h
new file mode 100644
index 000000000..81a8c3c98
--- /dev/null
+++ b/include/xfs_fs.h
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef _LINUX_XFS_FS_H
+#define _LINUX_XFS_FS_H
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+
+/*
+ * SGI's XFS filesystem's major stuff (constants, structures)
+ */
+
+#define XFS_SUPER_MAGIC 0x58465342
+#define XFS_NAME	"xfs"
+
+struct biosize {
+	__u32	 	biosz_flags;
+	__s32		biosz_read;
+	__s32		biosz_write;
+	__s32		dfl_biosz_read;
+	__s32		dfl_biosz_write;
+};
+	
+/* 
+ * direct I/O attribute record used with F_DIOINFO
+ * d_miniosz is the min xfer size, xfer size multiple and file seek offset
+ * alignment.
+ */
+struct dioattr {
+	__u32		d_mem;		/* data buffer memory alignment	*/
+	__u32		d_miniosz;	/* min xfer size		*/
+	__u32		d_maxiosz;	/* max xfer size		*/
+};
+
+/*
+ * Structure for F_FSGETXATTR[A] and F_FSSETXATTR.
+ */
+struct fsxattr {
+	__u32	 	fsx_xflags;	/* xflags field value (get/set)	*/
+	__u32	 	fsx_extsize;	/* extsize field value (get/set)*/
+	__u32	 	fsx_nextents;	/* nextents field value (get)	*/
+	unsigned char	fsx_pad[16];
+};
+
+/*
+ * Flags for the bs_xflags/fsx_xflags field
+ * There should be a one-to-one correspondence between these flags and the
+ * XFS_DIFLAG_s.
+ */
+#define	XFS_XFLAG_REALTIME	0x00000001
+#define	XFS_XFLAG_PREALLOC	0x00000002
+#define	XFS_XFLAG_HASATTR	0x80000000      /* no DIFLAG for this	*/
+#define	XFS_XFLAG_ALL           \
+	( XFS_XFLAG_REALTIME|XFS_XFLAG_PREALLOC|XFS_XFLAG_HASATTR )
+
+
+/*
+ * Structure for F_GETBMAP.
+ * On input, fill in bmv_offset and bmv_length of the first structure
+ * to indicate the area of interest in the file, and bmv_entry with the
+ * number of array elements given.  The first structure is updated on
+ * return to give the offset and length for the next call.
+ */
+struct getbmap {
+	__s64		bmv_offset;	/* file offset of segment in blocks */
+	__s64		bmv_block;	/* starting block (64-bit daddr_t)  */
+	__s64		bmv_length;	/* length of segment, blocks	    */
+	__s32		bmv_count;	/* # of entries in array incl. 1st  */
+	__s32		bmv_entries;	/* # of entries filled in (output)  */
+};
+
+/*
+ *	Structure for F_GETBMAPX.  The fields bmv_offset through bmv_entries
+ *	are used exactly as in the getbmap structure.  The getbmapx structure
+ *	has additional bmv_iflags and bmv_oflags fields. The bmv_iflags field
+ *	is only used for the first structure.  It contains input flags 
+ *	specifying F_GETBMAPX actions.  The bmv_oflags field is filled in
+ *	by the F_GETBMAPX command for each returned structure after the first.
+ */
+struct getbmapx {
+	__s64		bmv_offset;	/* file offset of segment in blocks */
+	__s64		bmv_block;	/* starting block (64-bit daddr_t)  */
+	__s64		bmv_length;	/* length of segment, blocks	    */
+	__s32		bmv_count;	/* # of entries in array incl. 1st  */
+	__s32		bmv_entries;	/* # of entries filled in (output). */
+	__s32		bmv_iflags;	/* input flags (1st structure)	    */
+	__s32		bmv_oflags;	/* output flags (after 1st structure)*/
+	__s32		bmv_unused1;	/* future use			    */
+	__s32		bmv_unused2;	/* future use			    */
+};
+
+/*	bmv_iflags values - set by F_GETBMAPX caller.	*/
+
+#define	BMV_IF_ATTRFORK		0x1	/* return attr fork rather than data */
+#define BMV_IF_NO_DMAPI_READ	0x2	/* Do not generate DMAPI read event  */
+#define BMV_IF_PREALLOC		0x4	/* rtn status BMV_OF_PREALLOC if req */
+
+#define BMV_IF_VALID	(BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC)
+
+/*	bmv_oflags values - returned from F_GETBMAPX for each non-header segment */
+
+#define BMV_OF_PREALLOC		0x1	/* segment = unwritten pre-allocation */
+
+/*	Convert getbmap <-> getbmapx - move fields from p1 to p2. */
+
+#define	GETBMAP_CONVERT(p1,p2) {	\
+	p2.bmv_offset = p1.bmv_offset;	\
+	p2.bmv_block = p1.bmv_block;	\
+	p2.bmv_length = p1.bmv_length;	\
+	p2.bmv_count = p1.bmv_count;	\
+	p2.bmv_entries = p1.bmv_entries;  }
+
+#ifdef __KERNEL__
+
+/*	Kernel only bmv_iflags value.	*/
+#define	BMV_IF_EXTENDED	0x40000000	/* getpmapx if set */
+
+#endif	/* __KERNEL__ */
+
+/*
+ * Structure for F_FSSETDM.
+ * For use by backup and restore programs to set the XFS on-disk inode
+ * fields di_dmevmask and di_dmstate.  These must be set to exactly and
+ * only values previously obtained via xfs_bulkstat!  (Specifically the
+ * xfs_bstat_t fields bs_dmevmask and bs_dmstate.)
+ */
+struct fsdmidata {
+	__s32		fsd_dmevmask;	/* corresponds to di_dmevmask */
+	__u16		fsd_padding;
+	__u16		fsd_dmstate;	/* corresponds to di_dmstate  */
+};
+
+/*
+ * File segment locking set data type for 64 bit access.
+ * Also used for all the RESV/FREE interfaces.
+ */
+typedef struct xfs_flock64 {
+	__s16		l_type;
+	__s16		l_whence;
+	__s64		l_start;
+	__s64		l_len;		/* len == 0 means until end of file */
+        __s32		l_sysid;
+        pid_t		l_pid;
+	__s32		l_pad[4];	/* reserve area			    */
+} xfs_flock64_t;
+
+/*
+ * Output for XFS_IOC_FSGEOMETRY
+ */
+typedef struct xfs_fsop_geom {
+	__u32		blocksize;	/* filesystem (data) block size	*/
+	__u32		rtextsize;	/* realtime extent size		*/
+	__u32		agblocks;	/* fsblocks in an AG		*/
+	__u32		agcount;	/* number of allocation groups  */
+	__u32		logblocks;	/* fsblocks in the log		*/
+	__u32		sectsize;	/* (data) sector size, bytes	*/
+	__u32		inodesize;	/* inode size in bytes		*/
+	__u32		imaxpct;	/* max allowed inode space(%)	*/
+	__u64		datablocks;	/* fsblocks in data subvolume	*/
+	__u64		rtblocks;	/* fsblocks in realtime subvol	*/
+	__u64		rtextents;	/* rt extents in realtime subvol*/
+	__u64		logstart;	/* starting fsblock of the log	*/
+	unsigned char	uuid[16];	/* unique id of the filesystem	*/
+	__u32		sunit;		/* stripe unit, fsblocks	*/
+	__u32		swidth;		/* stripe width, fsblocks	*/
+	__s32		version;	/* structure version		*/
+	__u32		flags;		/* superblock version flags	*/
+	__u32		logsectsize;	/* log sector size, bytes	*/
+	__u32		rtsectsize;	/* realtime sector size, bytes	*/
+	__u32		dirblocksize;	/* directory block size, bytes	*/
+} xfs_fsop_geom_t;
+
+/* Output for XFS_FS_COUNTS */
+typedef struct xfs_fsop_counts {
+	__u64	freedata;	/* free data section blocks */
+	__u64	freertx;	/* free rt extents */
+	__u64	freeino;	/* free inodes */
+	__u64	allocino;	/* total allocated inodes */
+} xfs_fsop_counts_t;
+
+/* Input/Output for XFS_GET_RESBLKS and XFS_SET_RESBLKS */
+typedef struct xfs_fsop_resblks {
+	__u64  resblks;
+	__u64  resblks_avail;
+} xfs_fsop_resblks_t;
+
+#define	XFS_FSOP_GEOM_VERSION	0
+
+#define	XFS_FSOP_GEOM_FLAGS_ATTR	0x01	/* attributes in use	*/
+#define	XFS_FSOP_GEOM_FLAGS_NLINK	0x02	/* 32-bit nlink values	*/
+#define	XFS_FSOP_GEOM_FLAGS_QUOTA	0x04	/* quotas enabled	*/
+#define	XFS_FSOP_GEOM_FLAGS_IALIGN	0x08	/* inode alignment	*/
+#define	XFS_FSOP_GEOM_FLAGS_DALIGN	0x10	/* large data alignment	*/
+#define	XFS_FSOP_GEOM_FLAGS_SHARED	0x20	/* read-only shared	*/
+#define	XFS_FSOP_GEOM_FLAGS_EXTFLG	0x40	/* special extent flag	*/
+#define	XFS_FSOP_GEOM_FLAGS_DIRV2	0x80	/* directory version 2	*/
+
+
+/*
+ * Minimum and maximum sizes need for growth checks
+ */
+#define	XFS_MIN_AG_BLOCKS	64
+#define	XFS_MIN_LOG_BLOCKS	512
+#define	XFS_MAX_LOG_BLOCKS	(64 * 1024)
+#define	XFS_MIN_LOG_BYTES	(256 * 1024)
+#define	XFS_MAX_LOG_BYTES	(128 * 1024 * 1024)
+
+/*
+ * XFS_IOC_FSGROWFSDATA
+ */
+typedef struct xfs_growfs_data {
+	__u64		newblocks;	/* new data subvol size, fsblocks */
+	__u32		imaxpct;	/* new inode space percentage limit */
+} xfs_growfs_data_t;
+
+/*
+ * XFS_IOC_FSGROWFSLOG
+ */
+typedef struct xfs_growfs_log {
+	__u32		newblocks;	/* new log size, fsblocks */
+	__u32		isint;		/* 1 if new log is internal */
+} xfs_growfs_log_t;
+
+/*
+ * XFS_IOC_FSGROWFSRT
+ */
+typedef struct xfs_growfs_rt {
+	__u64		newblocks;	/* new realtime size, fsblocks */
+	__u32		extsize;	/* new realtime extent size, fsblocks */
+} xfs_growfs_rt_t;
+
+
+/*
+ * Structures returned from ioctl XFS_IOC_FSBULKSTAT & XFS_IOC_FSBULKSTAT_SINGLE
+ */
+typedef struct xfs_bstime {
+	time_t		tv_sec;		/* seconds		*/
+	__s32		tv_nsec;	/* and nanoseconds	*/
+} xfs_bstime_t;
+
+typedef struct xfs_bstat {
+	__u64		bs_ino;		/* inode number			*/
+	__u16		bs_mode;	/* type and mode		*/
+	__u16		bs_nlink;	/* number of links		*/
+	__u32		bs_uid;		/* user id			*/
+	__u32		bs_gid;		/* group id			*/
+	__u32		bs_rdev;	/* device value			*/
+	__s32		bs_blksize;	/* block size			*/
+	__s64		bs_size;	/* file size			*/
+	xfs_bstime_t	bs_atime;	/* access time			*/
+	xfs_bstime_t	bs_mtime;	/* modify time			*/
+	xfs_bstime_t	bs_ctime;	/* inode change time		*/
+	int64_t		bs_blocks;	/* number of blocks		*/
+	__u32		bs_xflags;	/* extended flags		*/
+	__s32		bs_extsize;	/* extent size			*/
+	__s32		bs_extents;	/* number of extents		*/
+	__u32		bs_gen;		/* generation count		*/
+	__u16		bs_projid;	/* project id			*/
+	unsigned char	bs_pad[14];	/* pad space, unused		*/
+	__u32		bs_dmevmask;	/* DMIG event mask		*/
+	__u16		bs_dmstate;	/* DMIG state info		*/
+	__u16		bs_aextents;	/* attribute number of extents	*/
+} xfs_bstat_t;
+
+/*
+ * The user-level BulkStat Request interface structure.
+ */
+typedef struct xfs_fsop_bulkreq {
+	__u64		*lastip;	/* last inode # pointer		*/
+	__s32		icount;		/* count of entries in buffer	*/
+	void		*ubuffer;	/* user buffer for inode desc.	*/
+	__s32		*ocount;	/* output count pointer		*/
+} xfs_fsop_bulkreq_t;
+
+
+/*
+ * Structures returned from xfs_inumbers syssgi routine.
+ */
+typedef struct xfs_inogrp {
+	__u64		xi_startino;	/* starting inode number	*/
+	__s32		xi_alloccount;	/* # bits set in allocmask	*/
+	__u64		xi_allocmask;	/* mask of allocated inodes	*/
+} xfs_inogrp_t;
+
+
+/*
+ * The user-level Handle Request interface structure.
+ */
+typedef struct xfs_fsop_handlereq {
+	__u32		fd;		/* fd for FD_TO_HANDLE		*/
+	void		*path;		/* user pathname		*/
+	__u32		oflags;		/* open flags			*/
+	void		*ihandle;	/* user supplied handle		*/
+	__u32		ihandlen;	/* user supplied length		*/
+	void		*ohandle;	/* user buffer for handle	*/
+	__u32		*ohandlen;	/* user buffer length		*/
+} xfs_fsop_handlereq_t;
+
+#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
+/*
+ * Error injection.
+ */
+typedef struct xfs_error_injection {
+	__s32           fd;
+	__s32           errtag;
+} xfs_error_injection_t;
+#endif /* DEBUG || INDUCE_IO_ERROR */
+
+/*
+ * File system identifier. Should be unique (at least per machine).
+ */
+typedef struct {
+	__u32 val[2];			/* file system id type */
+} xfs_fsid_t;
+
+/*
+ * File identifier.  Should be unique per filesystem on a single machine.
+ * This is typically called by a stateless file server in order to generate
+ * "file handles".
+ */
+#define MAXFIDSZ        46
+
+typedef struct fid {
+	__u16		fid_len;		/* length of data in bytes */
+	unsigned char	fid_data[MAXFIDSZ];	/* data (variable length)  */
+} fid_t;
+
+typedef struct xfs_fid {
+	__u16	xfs_fid_len;		/* length of remainder	*/
+	__u16	xfs_fid_pad;
+	__u32	xfs_fid_gen;		/* generation number	*/
+	__u64	xfs_fid_ino;		/* 64 bits inode number	*/
+} xfs_fid_t;
+
+typedef struct xfs_fid2 {
+	__u16	fid_len;	/* length of remainder */
+	__u16	fid_pad;	/* padding, must be zero */
+	__u32	fid_gen;	/* generation number */
+	__u64	fid_ino;	/* inode number */
+} xfs_fid2_t;
+
+typedef struct xfs_handle {
+	union {
+		__s64	    align;	/* force alignment of ha_fid	 */
+		xfs_fsid_t  _ha_fsid;	/* unique file system identifier */
+	} ha_u;
+	xfs_fid_t	ha_fid;		/* file system specific file ID  */
+} xfs_handle_t;
+
+#define ha_fsid ha_u._ha_fsid
+
+#define	XFS_HSIZE(handle)	(((char *) &(handle).ha_fid.xfs_fid_pad  \
+				 - (char *) &(handle))			  \
+				 + (handle).ha_fid.xfs_fid_len)
+
+#define XFS_HANDLE_CMP(h1, h2)	bcmp(h1, h2, sizeof (xfs_handle_t))
+
+#define FSHSIZE		sizeof (fsid_t)
+
+
+/*
+ * ioctl commands that replace IRIX fcntl()'s
+ * For 'documentation' purposed more than anything else,
+ * the "cmd #" field reflects the IRIX fcntl number.
+ */
+#define	XFS_IOC_ALLOCSP		_IOW ('X', 10, struct xfs_flock64)
+#define	XFS_IOC_FREESP		_IOW ('X', 11, struct xfs_flock64)
+#define	XFS_IOC_DIOINFO		_IOR ('X', 30, struct dioattr)
+#define	XFS_IOC_FSGETXATTR	_IOR ('X', 31, struct fsxattr)
+#define	XFS_IOC_FSSETXATTR	_IOW ('X', 32, struct fsxattr)
+#define	XFS_IOC_ALLOCSP64	_IOW ('X', 36, struct xfs_flock64)
+#define	XFS_IOC_FREESP64	_IOW ('X', 37, struct xfs_flock64)
+#define	XFS_IOC_GETBMAP		_IOWR('X', 38, struct getbmap)
+#define	XFS_IOC_FSSETDM		_IOW ('X', 39, struct fsdmidata)
+#define	XFS_IOC_RESVSP		_IOW ('X', 40, struct xfs_flock64)
+#define	XFS_IOC_UNRESVSP	_IOW ('X', 41, struct xfs_flock64)
+#define	XFS_IOC_RESVSP64	_IOW ('X', 42, struct xfs_flock64)
+#define	XFS_IOC_UNRESVSP64	_IOW ('X', 43, struct xfs_flock64)
+#define	XFS_IOC_GETBMAPA	_IOWR('X', 44, struct getbmap)
+#define	XFS_IOC_FSGETXATTRA	_IOR ('X', 45, struct fsxattr)
+#define	XFS_IOC_SETBIOSIZE	_IOW ('X', 46, struct biosize)
+#define	XFS_IOC_GETBIOSIZE	_IOR ('X', 47, struct biosize)
+#define	XFS_IOC_GETBMAPX	_IOWR('X', 56, struct getbmap)
+
+/*
+ * ioctl commands that replace IRIX syssgi()'s
+ */
+#define	XFS_IOC_FSGEOMETRY	     _IOR ('X', 100, struct xfs_fsop_geom)
+#define	XFS_IOC_FSBULKSTAT	     _IOWR('X', 101, struct xfs_fsop_bulkreq)
+#define	XFS_IOC_FSBULKSTAT_SINGLE    _IOWR('X', 102, struct xfs_fsop_bulkreq)
+#define	XFS_IOC_FSINUMBERS	     _IOWR('X', 103, struct xfs_fsop_bulkreq)
+#define	XFS_IOC_PATH_TO_FSHANDLE     _IOWR('X', 104, struct xfs_fsop_handlereq)
+#define	XFS_IOC_PATH_TO_HANDLE	     _IOWR('X', 105, struct xfs_fsop_handlereq)
+#define	XFS_IOC_FD_TO_HANDLE	     _IOWR('X', 106, struct xfs_fsop_handlereq)
+#define	XFS_IOC_OPEN_BY_HANDLE	     _IOWR('X', 107, struct xfs_fsop_handlereq)
+#define	XFS_IOC_READLINK_BY_HANDLE   _IOWR('X', 108, struct xfs_fsop_handlereq)
+#define XFS_IOC_SWAPEXT		     _IOWR('X', 109, struct xfs_swapext)
+#define	XFS_IOC_FSGROWFSDATA	     _IOW('X', 110, struct xfs_growfs_data)
+#define	XFS_IOC_FSGROWFSLOG	     _IOW('X', 111, struct xfs_growfs_log)
+#define	XFS_IOC_FSGROWFSRT	     _IOW('X', 112, struct xfs_growfs_rt)
+#define	XFS_IOC_FSCOUNTS	     _IOR ('X', 113, struct xfs_fsop_counts)
+#define	XFS_IOC_SET_RESBLKS	     _IOR ('X', 114, struct xfs_fsop_resblks)
+#define	XFS_IOC_GET_RESBLKS	     _IOR ('X', 115, struct xfs_fsop_resblks)
+#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
+#define XFS_IOC_ERROR_INJECTION  _IOW('X', 116, struct xfs_error_injection)
+#define XFS_IOC_ERROR_CLEARALL   _IOW('X', 117, struct xfs_error_injection)
+#endif /* DEBUG || INDUCE_IO_ERROR */
+
+/*
+ * ioctl command to export information not in standard interfaces
+ * 	140: IRIX statvfs.f_fstr field - UUID from the superblock
+ */
+#define XFS_IOC_GETFSUUID	_IOR ('X', 140, unsigned char[16])
+
+
+/*
+ * Block I/O parameterization.  A basic block (BB) is the lowest size of
+ * filesystem allocation, and must == NBPSCTR.  Length units given to bio
+ * routines are in BB's.
+ */
+#define	BBSHIFT		9
+#define	BBSIZE		(1<<BBSHIFT)
+#define	BBMASK		(BBSIZE-1)
+#define	BTOBB(bytes)	(((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT)
+#define	BTOBBT(bytes)	((__u64)(bytes) >> BBSHIFT)
+#define	BBTOB(bbs)	((bbs) << BBSHIFT)
+#define OFFTOBB(bytes)	(((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT)
+#define	OFFTOBBT(bytes)	((__u64)(bytes) >> BBSHIFT)
+#define	BBTOOFF(bbs)	((__u64)(bbs) << BBSHIFT)     
+     
+#define SEEKLIMIT32	0x7fffffff
+#define BBSEEKLIMIT32	BTOBBT(SEEKLIMIT32)
+#define SEEKLIMIT	0x7fffffffffffffffLL
+#define BBSEEKLIMIT	OFFTOBBT(SEEKLIMIT)     
+
+
+#ifdef __KERNEL__
+
+/*
+ * Function prototypes
+ */
+extern int init_xfs_fs(void);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _LINUX_XFS_FS_H */
diff --git a/include/xfs_ialloc.h b/include/xfs_ialloc.h
new file mode 100644
index 000000000..2693501ac
--- /dev/null
+++ b/include/xfs_ialloc.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IALLOC_H__
+#define	__XFS_IALLOC_H__
+
+struct xfs_buf;
+struct xfs_dinode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Allocation parameters for inode allocation.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_INODES)
+int xfs_ialloc_inodes(struct xfs_mount *mp);
+#define	XFS_IALLOC_INODES(mp)	xfs_ialloc_inodes(mp)
+#else
+#define	XFS_IALLOC_INODES(mp)	((mp)->m_ialloc_inos)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_BLOCKS)
+xfs_extlen_t xfs_ialloc_blocks(struct xfs_mount *mp);
+#define	XFS_IALLOC_BLOCKS(mp)	xfs_ialloc_blocks(mp)
+#else
+#define	XFS_IALLOC_BLOCKS(mp)	((mp)->m_ialloc_blks)
+#endif
+
+/*
+ * For small block file systems, move inodes in clusters of this size.
+ * When we don't have a lot of memory, however, we go a bit smaller
+ * to reduce the number of AGI and ialloc btree blocks we need to keep
+ * around for xfs_dilocate().  We choose which one to use in
+ * xfs_mount_int().
+ */
+#define	XFS_INODE_BIG_CLUSTER_SIZE	8192
+#define	XFS_INODE_SMALL_CLUSTER_SIZE	4096
+#define	XFS_INODE_CLUSTER_SIZE(mp)	(mp)->m_inode_cluster_size
+
+/*
+ * Make an inode pointer out of the buffer/offset.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MAKE_IPTR)
+struct xfs_dinode *xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o);
+#define	XFS_MAKE_IPTR(mp,b,o) 		xfs_make_iptr(mp,b,o)
+#else
+#define	XFS_MAKE_IPTR(mp,b,o) \
+	((xfs_dinode_t *)(xfs_buf_offset(b, (o) << (mp)->m_sb.sb_inodelog)))
+#endif
+
+/*
+ * Find a free (set) bit in the inode bitmask.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_FIND_FREE)
+int xfs_ialloc_find_free(xfs_inofree_t *fp);
+#define	XFS_IALLOC_FIND_FREE(fp)	xfs_ialloc_find_free(fp)
+#else
+#define	XFS_IALLOC_FIND_FREE(fp)	xfs_lowbit64(*(fp))
+#endif
+
+
+#ifdef __KERNEL__
+
+/*
+ * Prototypes for visible xfs_ialloc.c routines.
+ */
+
+/*
+ * Allocate an inode on disk.
+ * Mode is used to tell whether the new inode will need space, and whether
+ * it is a directory.
+ *
+ * To work within the constraint of one allocation per transaction,
+ * xfs_dialloc() is designed to be called twice if it has to do an
+ * allocation to make more free inodes.  If an inode is 
+ * available without an allocation, agbp would be set to the current
+ * agbp and alloc_done set to false.
+ * If an allocation needed to be done, agbp would be set to the
+ * inode header of the allocation group and alloc_done set to true.
+ * The caller should then commit the current transaction and allocate a new
+ * transaction.  xfs_dialloc() should then be called again with
+ * the agbp value returned from the previous call.
+ *
+ * Once we successfully pick an inode its number is returned and the
+ * on-disk data structures are updated.  The inode itself is not read
+ * in, since doing so would break ordering constraints with xfs_reclaim.
+ *
+ * *agbp should be set to NULL on the first call, *alloc_done set to FALSE.
+ */
+int					/* error */
+xfs_dialloc(
+	struct xfs_trans *tp,		/* transaction pointer */
+	xfs_ino_t	parent,		/* parent inode (directory) */
+	mode_t		mode,		/* mode bits for new inode */
+	int		okalloc,	/* ok to allocate more space */
+	struct xfs_buf	**agbp,		/* buf for a.g. inode header */
+	boolean_t	*alloc_done,	/* an allocation was done to replenish
+					   the free inodes */
+	xfs_ino_t	*inop);		/* inode number allocated */
+
+/*
+ * Free disk inode.  Carefully avoids touching the incore inode, all
+ * manipulations incore are the caller's responsibility.
+ * The on-disk inode is not changed by this operation, only the
+ * btree (free inode mask) is changed.
+ */
+int					/* error */
+xfs_difree(
+	struct xfs_trans *tp,		/* transaction pointer */
+	xfs_ino_t	inode);		/* inode to be freed */
+
+/*
+ * Return the location of the inode in bno/len/off,
+ * for mapping it into a buffer.
+ */
+int
+xfs_dilocate(
+	struct xfs_mount *mp,		/* file system mount structure */
+	struct xfs_trans *tp,		/* transaction pointer */
+	xfs_ino_t	ino,		/* inode to locate */
+	xfs_fsblock_t	*bno,		/* output: block containing inode */
+	int		*len,		/* output: num blocks in cluster*/
+	int		*off,		/* output: index in block of inode */
+	uint		flags);		/* flags for inode btree lookup */
+
+/*
+ * Compute and fill in value of m_in_maxlevels.
+ */
+void
+xfs_ialloc_compute_maxlevels(
+	struct xfs_mount *mp);		/* file system mount structure */
+
+/*
+ * Log specified fields for the ag hdr (inode section)
+ */
+void
+xfs_ialloc_log_agi(
+	struct xfs_trans *tp,		/* transaction pointer */
+	struct xfs_buf	*bp,		/* allocation group header buffer */
+	int		fields);	/* bitmask of fields to log */
+
+/*
+ * Read in the allocation group header (inode allocation section)
+ */
+int					/* error */
+xfs_ialloc_read_agi(
+	struct xfs_mount *mp,		/* file system mount structure */
+	struct xfs_trans *tp,		/* transaction pointer */
+	xfs_agnumber_t	agno,		/* allocation group number */
+	struct xfs_buf	**bpp);		/* allocation group hdr buf */
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_IALLOC_H__ */
diff --git a/include/xfs_ialloc_btree.h b/include/xfs_ialloc_btree.h
new file mode 100644
index 000000000..e49b2597b
--- /dev/null
+++ b/include/xfs_ialloc_btree.h
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IALLOC_BTREE_H__
+#define	__XFS_IALLOC_BTREE_H__
+
+/*
+ * Inode map on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_btree_sblock;
+struct xfs_mount;
+
+/*
+ * There is a btree for the inode map per allocation group.
+ */
+#define	XFS_IBT_MAGIC	0x49414254	/* 'IABT' */
+
+typedef	__uint64_t	xfs_inofree_t;
+#define	XFS_INODES_PER_CHUNK	(NBBY * sizeof(xfs_inofree_t))
+#define	XFS_INODES_PER_CHUNK_LOG	(XFS_NBBYLOG + 3)
+#define	XFS_INOBT_ALL_FREE	((xfs_inofree_t)-1)
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_MASKN)
+xfs_inofree_t xfs_inobt_maskn(int i, int n);
+#define	XFS_INOBT_MASKN(i,n)		xfs_inobt_maskn(i,n)
+#else
+#define	XFS_INOBT_MASKN(i,n)	\
+	((((n) >= XFS_INODES_PER_CHUNK ? \
+		(xfs_inofree_t)0 : ((xfs_inofree_t)1 << (n))) - 1) << (i))
+#endif
+
+/*
+ * Data record structure
+ */
+typedef struct xfs_inobt_rec
+{
+	xfs_agino_t	ir_startino;	/* starting inode number */
+	__int32_t	ir_freecount;	/* count of free inodes (set bits) */
+	xfs_inofree_t	ir_free;	/* free inode mask */
+} xfs_inobt_rec_t;
+
+/*
+ * Key structure
+ */
+typedef struct xfs_inobt_key
+{
+	xfs_agino_t	ir_startino;	/* starting inode number */
+} xfs_inobt_key_t;
+
+typedef xfs_agblock_t xfs_inobt_ptr_t;	/* btree pointer type */
+					/* btree block header type */
+typedef	struct xfs_btree_sblock xfs_inobt_block_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_INOBT_BLOCK)
+xfs_inobt_block_t *xfs_buf_to_inobt_block(struct xfs_buf *bp);
+#define	XFS_BUF_TO_INOBT_BLOCK(bp)	xfs_buf_to_inobt_block(bp)
+#else
+#define	XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+/*
+ * Bit manipulations for ir_free.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_MASK)
+xfs_inofree_t xfs_inobt_mask(int i);
+#define	XFS_INOBT_MASK(i)		xfs_inobt_mask(i)
+#else
+#define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_FREE)
+int xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch);
+#define	XFS_INOBT_IS_FREE(rp,i,arch)	xfs_inobt_is_free(rp,i,arch)
+#else
+#define	XFS_INOBT_IS_FREE(rp,i,arch)	((INT_GET((rp)->ir_free, arch) \
+                                         & XFS_INOBT_MASK(i)) != 0)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_SET_FREE)
+void xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch);
+#define	XFS_INOBT_SET_FREE(rp,i,arch)	xfs_inobt_set_free(rp,i,arch)
+#else
+#define	XFS_INOBT_SET_FREE(rp,i,arch)	(INT_MOD_EXPR((rp)->ir_free, arch, |= XFS_INOBT_MASK(i)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_CLR_FREE)
+void xfs_inobt_clr_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch);
+#define	XFS_INOBT_CLR_FREE(rp,i,arch)	xfs_inobt_clr_free(rp,i,arch)
+#else
+#define	XFS_INOBT_CLR_FREE(rp,i,arch)	(INT_MOD_EXPR((rp)->ir_free, arch, &= ~XFS_INOBT_MASK(i)))
+#endif
+
+/*
+ * Real block structures have a size equal to the disk block size.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_SIZE)
+int xfs_inobt_block_size(int lev, struct xfs_btree_cur *cur);
+#define	XFS_INOBT_BLOCK_SIZE(lev,cur)	xfs_inobt_block_size(lev,cur)
+#else
+#define	XFS_INOBT_BLOCK_SIZE(lev,cur)	(1 << (cur)->bc_blocklog)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_MAXRECS)
+int xfs_inobt_block_maxrecs(int lev, struct xfs_btree_cur *cur);
+#define	XFS_INOBT_BLOCK_MAXRECS(lev,cur)	xfs_inobt_block_maxrecs(lev,cur)
+#else
+#define	XFS_INOBT_BLOCK_MAXRECS(lev,cur)	\
+	((cur)->bc_mp->m_inobt_mxr[lev != 0])
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_MINRECS)
+int xfs_inobt_block_minrecs(int lev, struct xfs_btree_cur *cur);
+#define	XFS_INOBT_BLOCK_MINRECS(lev,cur)	xfs_inobt_block_minrecs(lev,cur)
+#else
+#define	XFS_INOBT_BLOCK_MINRECS(lev,cur)	\
+	((cur)->bc_mp->m_inobt_mnr[lev != 0])
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_LAST_REC)
+int xfs_inobt_is_last_rec(struct xfs_btree_cur *cur);
+#define	XFS_INOBT_IS_LAST_REC(cur)	xfs_inobt_is_last_rec(cur)
+#else
+#define	XFS_INOBT_IS_LAST_REC(cur)	\
+	((cur)->bc_ptrs[0] == \
+		INT_GET(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs, ARCH_CONVERT))
+#endif
+
+/*
+ * Maximum number of inode btree levels.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IN_MAXLEVELS)
+int xfs_in_maxlevels(struct xfs_mount *mp);
+#define	XFS_IN_MAXLEVELS(mp)		xfs_in_maxlevels(mp)
+#else
+#define	XFS_IN_MAXLEVELS(mp)		((mp)->m_in_maxlevels)
+#endif
+
+/*
+ * block numbers in the AG.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IBT_BLOCK)
+xfs_agblock_t xfs_ibt_block(struct xfs_mount *mp);
+#define	XFS_IBT_BLOCK(mp)		xfs_ibt_block(mp)
+#else
+#define	XFS_IBT_BLOCK(mp)	((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_PREALLOC_BLOCKS)
+xfs_agblock_t xfs_prealloc_blocks(struct xfs_mount *mp);
+#define	XFS_PREALLOC_BLOCKS(mp)		xfs_prealloc_blocks(mp)
+#else
+#define	XFS_PREALLOC_BLOCKS(mp)	((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+#endif
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_REC_ADDR)
+xfs_inobt_rec_t *
+xfs_inobt_rec_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_INOBT_REC_ADDR(bb,i,cur)	xfs_inobt_rec_addr(bb,i,cur)
+#else
+#define	XFS_INOBT_REC_ADDR(bb,i,cur)	\
+	XFS_BTREE_REC_ADDR(XFS_INOBT_BLOCK_SIZE(0,cur), xfs_inobt, bb, i, \
+		XFS_INOBT_BLOCK_MAXRECS(0, cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_KEY_ADDR)
+xfs_inobt_key_t *
+xfs_inobt_key_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_INOBT_KEY_ADDR(bb,i,cur)	xfs_inobt_key_addr(bb,i,cur)
+#else
+#define	XFS_INOBT_KEY_ADDR(bb,i,cur)	\
+	XFS_BTREE_KEY_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, i, \
+		XFS_INOBT_BLOCK_MAXRECS(1, cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_PTR_ADDR)
+xfs_inobt_ptr_t *
+xfs_inobt_ptr_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define	XFS_INOBT_PTR_ADDR(bb,i,cur)	xfs_inobt_ptr_addr(bb,i,cur)
+#else
+#define	XFS_INOBT_PTR_ADDR(bb,i,cur)	\
+	XFS_BTREE_PTR_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, i, \
+		XFS_INOBT_BLOCK_MAXRECS(1, cur))
+#endif
+
+/*
+ * Prototypes for externally visible routines.
+ */
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int					/* error */
+xfs_inobt_decrement(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			level,	/* level in btree, 0 is leaf */
+	int			*stat); /* success/failure */
+
+#ifdef _NOTYET_
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int					/* error */
+xfs_inobt_delete(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			*stat);	/* success/failure */
+#endif	/* _NOTYET_ */
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int					/* error */
+xfs_inobt_get_rec(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agino_t		*ino,	/* output: starting inode of chunk */
+	__int32_t		*fcnt,	/* output: number of free inodes */
+	xfs_inofree_t		*free,	/* output: free inode mask */
+	int			*stat,	/* output: success/failure */
+	xfs_arch_t		arch);	/* output: architecture */
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int					/* error */
+xfs_inobt_increment(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			level,	/* level in btree, 0 is leaf */
+	int			*stat);	/* success/failure */
+
+/*
+ * Insert the current record at the point referenced by cur.
+ * The cursor may be inconsistent on return if splits have been done.
+ */
+int					/* error */
+xfs_inobt_insert(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			*stat);	/* success/failure */
+
+/*
+ * Lookup the record equal to ino in the btree given by cur.
+ */
+int					/* error */
+xfs_inobt_lookup_eq(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agino_t		ino,	/* starting inode of chunk */
+	__int32_t		fcnt,	/* free inode count */
+	xfs_inofree_t		free,	/* free inode mask */
+	int			*stat);	/* success/failure */
+
+/*
+ * Lookup the first record greater than or equal to ino
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_inobt_lookup_ge(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agino_t		ino,	/* starting inode of chunk */
+	__int32_t		fcnt,	/* free inode count */
+	xfs_inofree_t		free,	/* free inode mask */
+	int			*stat);	/* success/failure */
+
+/*
+ * Lookup the first record less than or equal to ino
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_inobt_lookup_le(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agino_t		ino,	/* starting inode of chunk */
+	__int32_t		fcnt,	/* free inode count */
+	xfs_inofree_t		free,	/* free inode mask */
+	int			*stat);	/* success/failure */
+ 
+/*
+ * Update the record referred to by cur, to the value given
+ * by [ino, fcnt, free].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+int					/* error */
+xfs_inobt_update(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agino_t		ino,	/* starting inode of chunk */
+	__int32_t		fcnt,	/* free inode count */
+	xfs_inofree_t		free);	/* free inode mask */
+
+#endif	/* __XFS_IALLOC_BTREE_H__ */
diff --git a/include/xfs_imap.h b/include/xfs_imap.h
new file mode 100644
index 000000000..54b58d6af
--- /dev/null
+++ b/include/xfs_imap.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IMAP_H__
+#define	__XFS_IMAP_H__
+
+/*
+ * This is the structure passed to xfs_imap() to map
+ * an inode number to its on disk location.
+ */
+typedef struct xfs_imap {
+ 	xfs_daddr_t	im_blkno;	/* starting BB of inode chunk */
+	uint		im_len;		/* length in BBs of inode chunk */
+	xfs_agblock_t	im_agblkno;	/* logical block of inode chunk in ag */
+	ushort		im_ioffset;	/* inode offset in block in "inodes" */
+	ushort		im_boffset;	/* inode offset in block in bytes */
+} xfs_imap_t;
+	
+#ifdef __KERNEL__
+struct xfs_mount;
+struct xfs_trans;
+int	xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
+		 xfs_imap_t *, uint);
+#endif
+
+#endif	/* __XFS_IMAP_H__ */
diff --git a/include/xfs_inode.h b/include/xfs_inode.h
new file mode 100644
index 000000000..742ca12d7
--- /dev/null
+++ b/include/xfs_inode.h
@@ -0,0 +1,615 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_INODE_H__
+#define	__XFS_INODE_H__
+
+/*
+ * File incore extent information, present for each of data & attr forks.
+ */
+#define	XFS_INLINE_EXTS	2
+#define	XFS_INLINE_DATA	32
+typedef struct xfs_ifork {
+	int			if_bytes; 	/* bytes in if_u1 */
+	int			if_real_bytes;	/* bytes allocated in if_u1 */
+	xfs_bmbt_block_t	*if_broot;	/* file's incore btree root */
+	short			if_broot_bytes;	/* bytes allocated for root */
+	unsigned char		if_flags;	/* per-fork flags */
+	unsigned char		if_ext_max;	/* max # of extent records */
+	xfs_extnum_t		if_lastex;	/* last if_extents used */
+	union {
+		xfs_bmbt_rec_t	*if_extents;	/* linear map file exts */
+		char		*if_data;	/* inline file data */
+	} if_u1;
+	union {
+		xfs_bmbt_rec_t	if_inline_ext[XFS_INLINE_EXTS];
+						/* very small file extents */
+		char		if_inline_data[XFS_INLINE_DATA];
+						/* very small file data */
+		xfs_dev_t	if_rdev;	/* dev number if special */
+		uuid_t		if_uuid;	/* mount point value */
+	} if_u2;
+} xfs_ifork_t;
+
+/*
+ * Flags for xfs_ichgtime().
+ */
+#define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */
+#define	XFS_ICHGTIME_ACC	0x2	/* data fork access timestamp */
+#define	XFS_ICHGTIME_CHG	0x4	/* inode field change timestamp */
+
+/*
+ * Per-fork incore inode flags.
+ */
+#define	XFS_IFINLINE	0x0001	/* Inline data is read in */
+#define	XFS_IFEXTENTS	0x0002	/* All extent pointers are read in */
+#define	XFS_IFBROOT	0x0004	/* i_broot points to the bmap b-tree root */
+
+/*
+ * Flags for xfs_imap() and xfs_dilocate().
+ */
+#define	XFS_IMAP_LOOKUP		0x1
+
+/*
+ * Maximum number of extent pointers in if_u1.if_extents.
+ */
+#define	XFS_MAX_INCORE_EXTENTS	32768
+
+
+#ifdef __KERNEL__
+struct bhv_desc;
+struct cred;
+struct ktrace;
+struct vnode;
+struct xfs_buf;
+struct xfs_bmap_free;
+struct xfs_bmbt_irec;
+struct xfs_bmbt_block;
+struct xfs_ext_attr;
+struct xfs_inode;
+struct xfs_inode_log_item;
+struct xfs_mount;
+struct xfs_trans;
+struct xfs_dquot;
+struct pm;
+
+
+/*
+ * This structure is used to communicate which extents of a file
+ * were holes when a write started from xfs_write_file() to
+ * xfs_strat_read().  This is necessary so that we can know which
+ * blocks need to be zeroed when they are read in in xfs_strat_read()
+ * if they weren\'t allocated when the buffer given to xfs_strat_read()
+ * was mapped.
+ *
+ * We keep a list of these attached to the inode.  The list is
+ * protected by the inode lock and the fact that the io lock is
+ * held exclusively by writers.
+ */
+typedef struct xfs_gap {
+	struct xfs_gap	*xg_next;
+	xfs_fileoff_t	xg_offset_fsb;
+	xfs_extlen_t	xg_count_fsb;
+} xfs_gap_t;
+
+/*
+ * This structure is used to hold common pieces of the buffer
+ * and file for xfs_dio_write and xfs_dio_read.
+ */
+typedef	struct xfs_dio {
+	struct xfs_buf	*xd_bp;
+	bhv_desc_t	*xd_bdp;
+	struct xfs_inode *xd_ip;
+	struct xfs_iocore *xd_io;
+	struct cred	*xd_cr;
+	struct pm	*xd_pmp;
+	int		xd_blkalgn;
+	int		xd_ioflag;
+	xfs_off_t		xd_start;
+	size_t		xd_length;
+} xfs_dio_t;
+
+
+typedef struct xfs_iocore {
+	void			*io_obj;	/* pointer to container
+						 * inode or dcxvn structure */
+	struct xfs_mount	*io_mount;	/* fs mount struct ptr */
+	mrlock_t		*io_lock;	/* inode lock */
+	mrlock_t		*io_iolock;	/* inode IO lock */
+	sema_t			*io_flock;	/* inode flush lock */
+	mutex_t			io_rlock;	/* inode readahead mutex */
+
+	/* I/O state */
+	xfs_off_t		io_offset;	/* last buf offset */
+	xfs_off_t		io_next_offset;	/* seq read detector */
+	unsigned int		io_last_req_sz;	/* last read size */
+	unsigned int		io_size;	/* file io buffer len */
+	xfs_fsize_t		io_new_size;	/* sz when write completes */
+	xfs_off_t		io_write_offset;
+						/* start off of curr write */
+	xfs_fileoff_t		io_reada_blkno;	/* next blk to start ra */
+	xfs_gap_t		*io_gap_list;	/* hole list in write range */
+	unsigned int		io_readio_blocks;	/* read buffer size */
+	unsigned int		io_writeio_blocks;	/* write buffer size */
+	uchar_t			io_readio_log;	/* log2 of read buffer size */
+	uchar_t			io_writeio_log;	/* log2 of write buffer size */
+	uchar_t			io_max_io_log;	/* max r/w io value */
+	int			io_queued_bufs;	/* count of xfsd queued bufs*/
+
+	/* Miscellaneous state. */
+	unsigned int		io_flags;	/* IO related flags */
+
+	/* DMAPI state */
+	__uint32_t	io_dmevmask;	/* DMIG event mask */
+	__uint16_t	io_dmstate;	/* DMIG state info */
+} xfs_iocore_t;
+
+#define XFS_IO_INODE(io)	((xfs_inode_t *) ((io)->io_obj))
+#define XFS_IO_DCXVN(io)	((dcxvn_t *) ((io)->io_obj))
+
+/*
+ * Flags in the flags field
+ */
+
+#define XFS_IOCORE_ISXFS	0x01
+#define XFS_IOCORE_ISCXFS	0x02
+#define XFS_IOCORE_RT		0x04
+#define XFS_IOCORE_UIOSZ	0x08
+
+#define IO_IS_XFS(io)	((io)->io_flags & XFS_IOCORE_ISXFS)
+
+/*
+ * Clear out the read-ahead state in the in-core inode.
+ * We actually only need to clear i_next_offset and
+ * i_last_req_sz to get the effect of making all the
+ * read ahead state unusable.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INODE_CLEAR_READ_AHEAD)
+void xfs_inode_clear_read_ahead(xfs_iocore_t *io);
+#define XFS_INODE_CLEAR_READ_AHEAD(io)          xfs_inode_clear_read_ahead(io)
+#else
+#define XFS_INODE_CLEAR_READ_AHEAD(io)  {       \
+		mutex_lock(&((io)->io_rlock), PINOD);    \
+		(io)->io_next_offset = 0;          \
+		(io)->io_last_req_sz = 0;          \
+		mutex_unlock(&((io)->io_rlock)); }
+#endif
+
+
+/*
+ * xfs_iocore prototypes
+ */
+
+extern void xfs_iocore_inode_init(struct xfs_inode *);
+extern void xfs_iocore_inode_reinit(struct xfs_inode *);
+extern void xfs_iocore_reset(xfs_iocore_t *);
+extern void xfs_iocore_destroy(xfs_iocore_t *);
+
+
+/*
+ * This is the type used in the xfs inode hash table.
+ * An array of these is allocated for each mounted
+ * file system to hash the inodes for that file system.
+ */
+typedef struct xfs_ihash {
+	struct xfs_inode	*ih_next;	
+	mrlock_t		ih_lock;
+	uint			ih_version;
+} xfs_ihash_t;
+#if defined(MP)
+#pragma set type attribute xfs_ihash align=128
+#endif
+
+/*
+ * Inode hashing and hash bucket locking.
+ */
+#define XFS_BUCKETS(mp) (37*(mp)->m_sb.sb_agcount-1)
+#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)ino) % (mp)->m_ihsize))
+
+/*
+ * This is the xfs inode cluster hash.  This hash is used by xfs_iflush to
+ * find inodes that share a cluster and can be flushed to disk at the same
+ * time.
+ */
+
+typedef struct xfs_chashlist {
+	struct xfs_chashlist	*chl_next;
+	struct xfs_inode	*chl_ip;
+	xfs_daddr_t		chl_blkno;	/* starting block number of 
+						 * the cluster */
+#ifdef DEBUG
+	struct xfs_buf		*chl_buf;	/* debug: the inode buffer */
+#endif
+} xfs_chashlist_t;
+
+typedef struct xfs_chash {
+	xfs_chashlist_t		*ch_list;
+	lock_t			ch_lock;
+} xfs_chash_t;
+
+
+/*
+ * This is the xfs in-core inode structure.
+ * Most of the on-disk inode is embedded in the i_d field.
+ *
+ * The extent pointers/inline file space, however, are managed
+ * separately.  The memory for this information is pointed to by
+ * the if_u1 unions depending on the type of the data.
+ * This is used to linearize the array of extents for fast in-core
+ * access.  This is used until the file's number of extents
+ * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers
+ * are accessed through the buffer cache.
+ *
+ * Other state kept in the in-core inode is used for identification,
+ * locking, transactional updating, etc of the inode.
+ *
+ * Generally, we do not want to hold the i_rlock while holding the 
+ * i_ilock. Hierarchy is i_iolock followed by i_rlock. 
+ *
+ * xfs_iptr_t contains all the inode fields upto and including the
+ * i_mnext and i_mprev fields, it is used as a marker in the inode
+ * chain off the mount structure by xfs_sync calls.
+ */
+
+typedef struct {
+	struct xfs_ihash	*ip_hash;	/* pointer to hash header */
+	struct xfs_inode	*ip_next;	/* inode hash link forw */
+	struct xfs_inode	*ip_mnext;	/* next inode in mount list */
+	struct xfs_inode	*ip_mprev;	/* ptr to prev inode */
+	struct xfs_inode	**ip_prevp;	/* ptr to prev i_next */
+	struct xfs_mount	*ip_mount;	/* fs mount struct ptr */
+} xfs_iptr_t;
+
+typedef struct xfs_inode {
+	/* Inode linking and identification information. */
+	struct xfs_ihash	*i_hash;	/* pointer to hash header */
+	struct xfs_inode	*i_next;	/* inode hash link forw */
+	struct xfs_inode	*i_mnext;	/* next inode in mount list */
+	struct xfs_inode	*i_mprev;	/* ptr to prev inode */
+	struct xfs_inode	**i_prevp;	/* ptr to prev i_next */
+	struct xfs_mount	*i_mount;	/* fs mount struct ptr */
+	struct bhv_desc		i_bhv_desc;	/* inode behavior descriptor*/
+	struct xfs_dquot	*i_udquot;	/* user dquot */
+	struct xfs_dquot	*i_pdquot;	/* project dquot */
+
+	/* Inode location stuff */
+	xfs_ino_t		i_ino;		/* inode number (agno/agino)*/
+	xfs_daddr_t		i_blkno;	/* blkno of inode buffer */
+	dev_t			i_dev;		/* dev for this inode */
+	ushort			i_len;		/* len of inode buffer */
+	ushort			i_boffset;	/* off of inode in buffer */
+
+	/* Extent information. */
+	xfs_ifork_t		*i_afp;		/* attribute fork pointer */
+	xfs_ifork_t		i_df;		/* data fork */
+
+	/* Transaction and locking information. */
+	struct xfs_trans	*i_transp;	/* ptr to owning transaction*/
+	struct xfs_inode_log_item *i_itemp;	/* logging information */
+	mrlock_t		i_lock;		/* inode lock */
+	mrlock_t		i_iolock;	/* inode IO lock */
+	sema_t			i_flock;	/* inode flush lock */
+	unsigned int		i_pincount;	/* inode pin count */
+	sv_t			i_pinsema;	/* inode pin sema */
+	lock_t			i_ipinlock;	/* inode pinning mutex */
+	struct xfs_inode	*i_release;	/* inode to unref */
+
+	/* I/O state */
+	xfs_iocore_t		i_iocore;	/* I/O core */
+
+	/* Miscellaneous state. */
+	unsigned short		i_flags;	/* see defined flags below */
+	unsigned short		i_update_core;	/* timestamps/size is dirty */
+	unsigned short		i_update_size;	/* di_size field is dirty */
+	unsigned int		i_gen;		/* generation count */
+	unsigned int		i_delayed_blks;	/* count of delay alloc blks */
+	struct xfs_ext_attr	*i_ext_attr;	/* Critical ext attributes */
+	void			*i_ilock_ra;	/* current ilock ret addr */
+
+	xfs_dinode_core_t	i_d;		/* most of ondisk inode */
+	xfs_chashlist_t		*i_chash;	/* cluster hash list header */
+	struct xfs_inode	*i_cnext;	/* cluster hash link forward */
+	struct xfs_inode	*i_cprev;	/* cluster hash link backward */
+
+#ifdef DEBUG
+	/* Trace buffers per inode. */
+	struct ktrace		*i_xtrace;	/* inode extent list trace */
+	struct ktrace		*i_btrace;	/* inode bmap btree trace */
+	struct ktrace		*i_rwtrace;	/* inode read/write trace */
+	struct ktrace		*i_strat_trace;	/* inode strat_write trace */
+	struct ktrace		*i_lock_trace;	/* inode lock/unlock trace */
+	struct ktrace		*i_dir_trace;	/* inode directory trace */
+#endif /* DEBUG */
+} xfs_inode_t;
+
+#endif	/* __KERNEL__ */
+
+
+/*
+ * Fork handling.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_PTR)
+xfs_ifork_t *xfs_ifork_ptr(xfs_inode_t *ip, int w);
+#define	XFS_IFORK_PTR(ip,w)   		xfs_ifork_ptr(ip,w)
+#else
+#define	XFS_IFORK_PTR(ip,w)   ((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_Q)
+int xfs_ifork_q(xfs_inode_t *ip);
+#define	XFS_IFORK_Q(ip)			xfs_ifork_q(ip)
+#else
+#define	XFS_IFORK_Q(ip)			XFS_CFORK_Q(&(ip)->i_d)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_DSIZE)
+int xfs_ifork_dsize(xfs_inode_t *ip);
+#define	XFS_IFORK_DSIZE(ip)		xfs_ifork_dsize(ip)
+#else
+#define	XFS_IFORK_DSIZE(ip)		XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_ASIZE)
+int xfs_ifork_asize(xfs_inode_t *ip);
+#define	XFS_IFORK_ASIZE(ip)		xfs_ifork_asize(ip)
+#else
+#define	XFS_IFORK_ASIZE(ip)		XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_SIZE)
+int xfs_ifork_size(xfs_inode_t *ip, int w);
+#define	XFS_IFORK_SIZE(ip,w)		xfs_ifork_size(ip,w)
+#else
+#define	XFS_IFORK_SIZE(ip,w)		XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_FORMAT)
+int xfs_ifork_format(xfs_inode_t *ip, int w);
+#define	XFS_IFORK_FORMAT(ip,w)		xfs_ifork_format(ip,w)
+#else
+#define	XFS_IFORK_FORMAT(ip,w)		XFS_CFORK_FORMAT(&ip->i_d, w)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_FMT_SET)
+void xfs_ifork_fmt_set(xfs_inode_t *ip, int w, int n);
+#define	XFS_IFORK_FMT_SET(ip,w,n)	xfs_ifork_fmt_set(ip,w,n)
+#else
+#define	XFS_IFORK_FMT_SET(ip,w,n)	XFS_CFORK_FMT_SET(&ip->i_d, w, n)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_NEXTENTS)
+int xfs_ifork_nextents(xfs_inode_t *ip, int w);
+#define	XFS_IFORK_NEXTENTS(ip,w)	xfs_ifork_nextents(ip,w)
+#else
+#define	XFS_IFORK_NEXTENTS(ip,w)	XFS_CFORK_NEXTENTS(&ip->i_d, w)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_NEXT_SET)
+void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
+#define	XFS_IFORK_NEXT_SET(ip,w,n)	xfs_ifork_next_set(ip,w,n)
+#else
+#define	XFS_IFORK_NEXT_SET(ip,w,n)	XFS_CFORK_NEXT_SET(&ip->i_d, w, n)
+#endif
+
+
+#ifdef __KERNEL__
+
+/*
+ * In-core inode flags.
+ */
+#define XFS_IGRIO	0x0001  /* inode used for guaranteed rate i/o */
+#define XFS_IUIOSZ	0x0002  /* inode i/o sizes have been explicitly set */
+#define XFS_IQUIESCE    0x0004  /* we have started quiescing for this inode */
+#define XFS_IRECLAIM    0x0008  /* we have started reclaiming this inode    */
+
+/*
+ * Flags for inode locking.
+ */
+#define	XFS_IOLOCK_EXCL		0x001
+#define	XFS_IOLOCK_SHARED	0x002
+#define	XFS_ILOCK_EXCL		0x004
+#define	XFS_ILOCK_SHARED	0x008
+#define	XFS_IUNLOCK_NONOTIFY	0x010
+#define XFS_IOLOCK_NESTED	0x020
+#define XFS_EXTENT_TOKEN_RD	0x040
+#define XFS_SIZE_TOKEN_RD	0x080
+#define XFS_EXTSIZE_RD		(XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
+#define XFS_WILLLEND		0x100	/* Always acquire tokens for lending */
+#define XFS_EXTENT_TOKEN_WR	(XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
+#define XFS_SIZE_TOKEN_WR       (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
+#define XFS_EXTSIZE_WR		(XFS_EXTSIZE_RD | XFS_WILLLEND)
+
+
+#define XFS_LOCK_MASK	\
+	(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \
+	 XFS_IOLOCK_NESTED | \
+	 XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \
+	 XFS_WILLLEND)
+
+/*
+ * Flags for xfs_iflush()
+ */
+#define	XFS_IFLUSH_DELWRI_ELSE_SYNC	1
+#define	XFS_IFLUSH_DELWRI_ELSE_ASYNC	2
+#define	XFS_IFLUSH_SYNC			3
+#define	XFS_IFLUSH_ASYNC		4
+#define	XFS_IFLUSH_DELWRI		5
+
+/*
+ * Flags for xfs_iflush_all.
+ */
+#define	XFS_FLUSH_ALL		0x1
+
+/*
+ * Flags for xfs_itruncate_start().
+ */
+#define	XFS_ITRUNC_DEFINITE	0x1
+#define	XFS_ITRUNC_MAYBE	0x2
+
+/*
+ * Maximum file size.
+ * if XFS_BIG_FILES 2^63 - 1 (largest positive value of xfs_fsize_t)
+ * else 2^40 - 1 (40=31+9) (might be an int holding a block #)
+ * Note, we allow seeks to this offset, although you can't read or write.
+ * For the not XFS_BIG_FILES case, the value could be 1 higher but we don't
+ * do that, for symmetry.
+ */
+#if XFS_BIG_FILES
+#define XFS_MAX_FILE_OFFSET	((long long)((1ULL<<63)-1ULL))
+#else
+#define	XFS_MAX_FILE_OFFSET	((1LL<<40)-1LL)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOV)
+struct vnode *xfs_itov(xfs_inode_t *ip);
+#define	XFS_ITOV(ip)		xfs_itov(ip)
+#else
+#define	XFS_ITOV(ip)		BHV_TO_VNODE(XFS_ITOBHV(ip))
+#endif
+#define	XFS_ITOV_NULL(ip)	BHV_TO_VNODE_NULL(XFS_ITOBHV(ip))
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOBHV)
+struct bhv_desc *xfs_itobhv(xfs_inode_t *ip);
+#define	XFS_ITOBHV(ip)		xfs_itobhv(ip)
+#else
+#define	XFS_ITOBHV(ip)		((struct bhv_desc *)(&((ip)->i_bhv_desc)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BHVTOI)
+xfs_inode_t *xfs_bhvtoi(struct bhv_desc *bhvp);
+#define	XFS_BHVTOI(bhvp)	xfs_bhvtoi(bhvp)
+#else
+#define	XFS_BHVTOI(bhvp)	\
+	((xfs_inode_t *)((char *)(bhvp) - \
+			 (char *)&(((xfs_inode_t *)0)->i_bhv_desc)))
+#endif
+
+#define BHV_IS_XFS(bdp)		(BHV_OPS(bdp) == &xfs_vnodeops)
+
+/*
+ * Pick the inode cluster hash bucket
+ * (m_chash is the same size as m_ihash)
+ */
+#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize))
+
+
+/*
+ * xfs_iget.c prototypes.
+ */
+void		xfs_ihash_init(struct xfs_mount *);
+void		xfs_ihash_free(struct xfs_mount *);
+void		xfs_chash_init(struct xfs_mount *);
+void		xfs_chash_free(struct xfs_mount *);
+xfs_inode_t	*xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
+				  struct xfs_trans *);
+void            xfs_inode_lock_init(xfs_inode_t *, struct vnode *);
+int		xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
+			 uint, xfs_inode_t **, xfs_daddr_t);
+int		xfs_vn_iget(struct vnode    *, struct xfs_mount *,
+			 struct xfs_trans *, xfs_ino_t,
+			 uint, xfs_inode_t **, xfs_daddr_t);
+void		xfs_iput(xfs_inode_t *, uint);
+void		xfs_ilock(xfs_inode_t *, uint);
+int		xfs_ilock_nowait(xfs_inode_t *, uint);
+void		xfs_iunlock(xfs_inode_t *, uint);
+void		xfs_ilock_demote(xfs_inode_t *, uint);
+void		xfs_iflock(xfs_inode_t *);
+int		xfs_iflock_nowait(xfs_inode_t *);
+uint		xfs_ilock_map_shared(xfs_inode_t *);
+void		xfs_iunlock_map_shared(xfs_inode_t *, uint);
+void		xfs_ifunlock(xfs_inode_t *);
+void		xfs_ireclaim(xfs_inode_t *);
+int		xfs_finish_reclaim(xfs_inode_t *, int);
+
+/*
+ * xfs_inode.c prototypes.
+ */
+int		xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
+			    xfs_dinode_t **, struct xfs_buf **, int *);
+int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
+			  xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
+			  xfs_daddr_t);
+int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
+			  xfs_inode_t **, xfs_daddr_t);
+int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
+int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t,
+		           dev_t, struct cred *, xfs_prid_t, int,
+			   struct xfs_buf **, boolean_t *, xfs_inode_t **);
+void		xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, int,
+			   xfs_arch_t);
+int		xfs_ifree(struct xfs_trans *, xfs_inode_t *);
+int		xfs_atruncate_start(xfs_inode_t *);
+void		xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
+int		xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
+				     xfs_fsize_t, int, int);
+int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
+int		xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
+void		xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
+				 xfs_fsize_t, int);
+
+void		xfs_idestroy_fork(xfs_inode_t *, int);
+void		xfs_idestroy(xfs_inode_t *);
+void		xfs_idata_realloc(xfs_inode_t *, int, int);
+void		xfs_iextract(xfs_inode_t *);
+void		xfs_iext_realloc(xfs_inode_t *, int, int);
+void		xfs_iroot_realloc(xfs_inode_t *, int, int);
+void		xfs_ipin(xfs_inode_t *);
+void		xfs_iunpin(xfs_inode_t *);
+unsigned int	xfs_ipincount(xfs_inode_t *);
+int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_32_t *, int);
+int		xfs_iflush(xfs_inode_t *, uint);
+int		xfs_iflush_all(struct xfs_mount *, int);
+int             xfs_ibusy_check(xfs_inode_t *, int);
+int		xfs_iaccess(xfs_inode_t *, mode_t);
+uint		xfs_iroundup(uint);
+void		xfs_ichgtime(xfs_inode_t *, int);
+xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);
+xfs_inode_t	*xfs_get_inode(dev_t, xfs_ino_t);
+void		xfs_lock_inodes(xfs_inode_t **, int, int, uint);
+
+
+#ifdef DEBUG
+void		xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t);
+#else	/* DEBUG */
+#define xfs_isize_check(mp, ip, isize)
+#endif	/* DEBUG */
+
+#if defined(DEBUG)
+void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
+#else
+#define	xfs_inobp_check(mp, bp)
+#endif /* DEBUG */
+
+extern struct xfs_zone	*xfs_chashlist_zone;
+extern struct xfs_zone	*xfs_ifork_zone;
+extern struct xfs_zone	*xfs_inode_zone;
+extern struct xfs_zone	*xfs_ili_zone;
+extern struct vnodeops	xfs_vnodeops;
+
+#ifdef XFS_ILOCK_TRACE
+#define XFS_ILOCK_KTRACE_SIZE	32
+void	xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, 
+			inst_t *ra);
+#endif
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_INODE_H__ */
diff --git a/include/xfs_inode_item.h b/include/xfs_inode_item.h
new file mode 100644
index 000000000..d3433aaee
--- /dev/null
+++ b/include/xfs_inode_item.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_INODE_ITEM_H__
+#define	__XFS_INODE_ITEM_H__
+
+/*
+ * This is the structure used to lay out an inode log item in the
+ * log.  The size of the inline data/extents/b-tree root to be logged
+ * (if any) is indicated in the ilf_dsize field.  Changes to this structure
+ * must be added on to the end.
+ *
+ * Convention for naming inode log item versions :  The current version
+ * is always named XFS_LI_INODE.  When an inode log item gets superseded,
+ * add the latest version of IRIX that will generate logs with that item
+ * to the version name.
+ *
+ * -Version 1 of this structure (XFS_LI_5_3_INODE) included up to the first
+ *	union (ilf_u) field.  This was released with IRIX 5.3-XFS.
+ * -Version 2 of this structure (XFS_LI_6_1_INODE) is currently the entire
+ *	structure.  This was released with IRIX 6.0.1-XFS and IRIX 6.1.
+ * -Version 3 of this structure (XFS_LI_INODE) is the same as version 2
+ *	so a new structure definition wasn't necessary.  However, we had
+ *	to add a new type because the inode cluster size changed from 4K
+ *	to 8K and the version number had to be rev'ved to keep older kernels
+ *	from trying to recover logs with the 8K buffers in them.  The logging
+ *	code can handle recovery on different-sized clusters now so hopefully
+ *	this'll be the last time we need to change the inode log item just
+ *	for a change in the inode cluster size.  This new version was
+ *	released with IRIX 6.2.
+ */
+typedef struct xfs_inode_log_format {
+	unsigned short		ilf_type;	/* inode log item type */
+	unsigned short		ilf_size;	/* size of this item */
+	uint			ilf_fields;	/* flags for fields logged */
+	ushort			ilf_asize;	/* size of attr d/ext/root */
+	ushort			ilf_dsize;	/* size of data/ext/root */
+	xfs_ino_t		ilf_ino;	/* inode number */
+	union {
+		xfs_dev_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uuid_t		ilfu_uuid;	/* mount point value */
+	} ilf_u;
+	__int64_t		ilf_blkno;	/* blkno of inode buffer */
+	int			ilf_len;	/* len of inode buffer */
+	int			ilf_boffset;	/* off of inode in buffer */
+} xfs_inode_log_format_t;
+
+/* Initial version shipped with IRIX 5.3-XFS */
+typedef struct xfs_inode_log_format_v1 {
+	unsigned short		ilf_type;	/* inode log item type */
+	unsigned short		ilf_size;	/* size of this item */
+	uint			ilf_fields;	/* flags for fields logged */
+	uint			ilf_dsize;	/* size of data/ext/root */
+	xfs_ino_t		ilf_ino;	/* inode number */
+	union {
+		xfs_dev_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uuid_t		ilfu_uuid;	/* mount point value */
+	} ilf_u;
+} xfs_inode_log_format_t_v1;
+
+/*
+ * Flags for xfs_trans_log_inode flags field.
+ */
+#define	XFS_ILOG_CORE	0x001	/* log standard inode fields */
+#define	XFS_ILOG_DDATA	0x002	/* log i_df.if_data */
+#define	XFS_ILOG_DEXT	0x004	/* log i_df.if_extents */
+#define	XFS_ILOG_DBROOT	0x008	/* log i_df.i_broot */
+#define	XFS_ILOG_DEV	0x010	/* log the dev field */
+#define	XFS_ILOG_UUID	0x020	/* log the uuid field */
+#define	XFS_ILOG_ADATA	0x040	/* log i_af.if_data */
+#define	XFS_ILOG_AEXT	0x080	/* log i_af.if_extents */
+#define	XFS_ILOG_ABROOT	0x100	/* log i_af.i_broot */
+
+#define	XFS_ILOG_NONCORE	(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
+				 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
+				 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
+				 XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
+
+#define	XFS_ILOG_DFORK		(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
+				 XFS_ILOG_DBROOT)
+
+#define	XFS_ILOG_AFORK		(XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
+				 XFS_ILOG_ABROOT)
+
+#define	XFS_ILOG_ALL		(XFS_ILOG_CORE | XFS_ILOG_DDATA | \
+				 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
+				 XFS_ILOG_DEV | XFS_ILOG_UUID | \
+				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
+				 XFS_ILOG_ABROOT)
+
+#define	XFS_ILI_HOLD		0x1
+#define	XFS_ILI_IOLOCKED_EXCL	0x2
+#define	XFS_ILI_IOLOCKED_SHARED	0x4
+
+#define	XFS_ILI_IOLOCKED_ANY   (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
+
+
+#ifdef __KERNEL__
+
+struct xfs_buf;
+struct xfs_bmbt_rec_32;
+struct xfs_inode;
+struct xfs_mount;
+
+
+typedef struct xfs_inode_log_item {
+	xfs_log_item_t		ili_item;	   /* common portion */
+	struct xfs_inode	*ili_inode;	   /* inode ptr */
+	xfs_lsn_t		ili_flush_lsn;	   /* lsn at last flush */
+	xfs_lsn_t		ili_last_lsn;	   /* lsn at last transaction */
+	unsigned short		ili_ilock_recur;   /* lock recursion count */
+	unsigned short		ili_iolock_recur;  /* lock recursion count */
+	unsigned short		ili_flags;	   /* misc flags */
+	unsigned short		ili_logged;	   /* flushed logged data */
+	unsigned int		ili_last_fields;   /* fields when flushed */
+	struct xfs_bmbt_rec_32	*ili_extents_buf;  /* array of logged exts */
+	unsigned int            ili_pushbuf_flag;  /* one bit used in push_ail */
+
+#ifdef DEBUG
+	uint64_t                ili_push_owner;    /* one who sets pushbuf_flag
+						      above gets to push the buf */
+#endif
+#ifdef XFS_TRANS_DEBUG
+	int			ili_root_size;
+	char			*ili_orig_root;
+#endif
+	xfs_inode_log_format_t	ili_format;	   /* logged structure */
+} xfs_inode_log_item_t;
+
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FDATA)
+int xfs_ilog_fdata(int w);
+#define	XFS_ILOG_FDATA(w)	xfs_ilog_fdata(w)
+#else
+#define	XFS_ILOG_FDATA(w)	\
+	((w) == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA)
+#endif
+
+#endif	/* __KERNEL__ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FBROOT)
+int xfs_ilog_fbroot(int w);
+#define	XFS_ILOG_FBROOT(w)	xfs_ilog_fbroot(w)
+#else
+#define	XFS_ILOG_FBROOT(w)	\
+	((w) == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FEXT)
+int xfs_ilog_fext(int w);
+#define	XFS_ILOG_FEXT(w)	xfs_ilog_fext(w)
+#else
+#define	XFS_ILOG_FEXT(w)	\
+	((w) == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT)
+#endif
+
+#ifdef __KERNEL__
+
+void	xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
+void	xfs_inode_item_destroy(struct xfs_inode *);
+void	xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
+void	xfs_iflush_abort(struct xfs_inode *);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_INODE_ITEM_H__ */
diff --git a/include/xfs_inum.h b/include/xfs_inum.h
new file mode 100644
index 000000000..fb3ec3c3b
--- /dev/null
+++ b/include/xfs_inum.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_INUM_H__
+#define	__XFS_INUM_H__
+
+/*
+ * Inode number format:
+ * low inopblog bits - offset in block
+ * next agblklog bits - block number in ag
+ * next agno_log bits - ag number
+ * high agno_log-agblklog-inopblog bits - 0
+ */
+
+typedef	__uint32_t	xfs_agino_t;	/* within allocation grp inode number */
+
+/*
+ * Useful inode bits for this kernel.
+ * Used in some places where having 64-bits in the 32-bit kernels
+ * costs too much.
+ */
+#if XFS_BIG_FILESYSTEMS
+typedef	xfs_ino_t	xfs_intino_t;
+#else
+typedef	__uint32_t	xfs_intino_t;
+#endif
+
+#define	NULLFSINO	((xfs_ino_t)-1)
+#define	NULLAGINO	((xfs_agino_t)-1)
+
+struct xfs_mount;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_MASK)
+__uint32_t xfs_ino_mask(int k);
+#define	XFS_INO_MASK(k)			xfs_ino_mask(k)
+#else
+#define	XFS_INO_MASK(k)	((__uint32_t)((1ULL << (k)) - 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_OFFSET_BITS)
+int xfs_ino_offset_bits(struct xfs_mount *mp);
+#define	XFS_INO_OFFSET_BITS(mp)		xfs_ino_offset_bits(mp)
+#else
+#define	XFS_INO_OFFSET_BITS(mp)	((mp)->m_sb.sb_inopblog)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGBNO_BITS)
+int xfs_ino_agbno_bits(struct xfs_mount *mp);
+#define	XFS_INO_AGBNO_BITS(mp)		xfs_ino_agbno_bits(mp)
+#else
+#define	XFS_INO_AGBNO_BITS(mp)	((mp)->m_sb.sb_agblklog)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGINO_BITS)
+int xfs_ino_agino_bits(struct xfs_mount *mp);
+#define	XFS_INO_AGINO_BITS(mp)		xfs_ino_agino_bits(mp)
+#else
+#define	XFS_INO_AGINO_BITS(mp)		((mp)->m_agino_log)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGNO_BITS)
+int xfs_ino_agno_bits(struct xfs_mount *mp);
+#define	XFS_INO_AGNO_BITS(mp)		xfs_ino_agno_bits(mp)
+#else
+#define	XFS_INO_AGNO_BITS(mp)	((mp)->m_agno_log)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_BITS)
+int xfs_ino_bits(struct xfs_mount *mp);
+#define	XFS_INO_BITS(mp)		xfs_ino_bits(mp)
+#else
+#define	XFS_INO_BITS(mp)	(XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGNO)
+xfs_agnumber_t xfs_ino_to_agno(struct xfs_mount *mp, xfs_ino_t i);
+#define	XFS_INO_TO_AGNO(mp,i)		xfs_ino_to_agno(mp,i)
+#else
+#define	XFS_INO_TO_AGNO(mp,i)	\
+	((xfs_agnumber_t)((i) >> XFS_INO_AGINO_BITS(mp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGINO)
+xfs_agino_t xfs_ino_to_agino(struct xfs_mount *mp, xfs_ino_t i);
+#define	XFS_INO_TO_AGINO(mp,i)		xfs_ino_to_agino(mp,i)
+#else
+#define	XFS_INO_TO_AGINO(mp,i)	\
+	((xfs_agino_t)(i) & XFS_INO_MASK(XFS_INO_AGINO_BITS(mp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGBNO)
+xfs_agblock_t xfs_ino_to_agbno(struct xfs_mount *mp, xfs_ino_t i);
+#define	XFS_INO_TO_AGBNO(mp,i)		xfs_ino_to_agbno(mp,i)
+#else
+#define	XFS_INO_TO_AGBNO(mp,i)	\
+	(((xfs_agblock_t)(i) >> XFS_INO_OFFSET_BITS(mp)) & \
+	 XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_OFFSET)
+int xfs_ino_to_offset(struct xfs_mount *mp, xfs_ino_t i);
+#define	XFS_INO_TO_OFFSET(mp,i)		xfs_ino_to_offset(mp,i)
+#else
+#define	XFS_INO_TO_OFFSET(mp,i)	\
+	((int)(i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_FSB)
+xfs_fsblock_t xfs_ino_to_fsb(struct xfs_mount *mp, xfs_ino_t i);
+#define	XFS_INO_TO_FSB(mp,i)		xfs_ino_to_fsb(mp,i)
+#else
+#define	XFS_INO_TO_FSB(mp,i)	\
+	XFS_AGB_TO_FSB(mp, XFS_INO_TO_AGNO(mp,i), XFS_INO_TO_AGBNO(mp,i))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_INO)
+xfs_ino_t
+xfs_agino_to_ino(struct xfs_mount *mp, xfs_agnumber_t a, xfs_agino_t i);
+#define	XFS_AGINO_TO_INO(mp,a,i)	xfs_agino_to_ino(mp,a,i)
+#else
+#define	XFS_AGINO_TO_INO(mp,a,i)	\
+	(((xfs_ino_t)(a) << XFS_INO_AGINO_BITS(mp)) | (i))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_AGBNO)
+xfs_agblock_t xfs_agino_to_agbno(struct xfs_mount *mp, xfs_agino_t i);
+#define	XFS_AGINO_TO_AGBNO(mp,i)	xfs_agino_to_agbno(mp,i)
+#else
+#define	XFS_AGINO_TO_AGBNO(mp,i)	((i) >> XFS_INO_OFFSET_BITS(mp))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_OFFSET)
+int xfs_agino_to_offset(struct xfs_mount *mp, xfs_agino_t i);
+#define	XFS_AGINO_TO_OFFSET(mp,i)	xfs_agino_to_offset(mp,i)
+#else
+#define	XFS_AGINO_TO_OFFSET(mp,i)	\
+	((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_OFFBNO_TO_AGINO)
+xfs_agino_t xfs_offbno_to_agino(struct xfs_mount *mp, xfs_agblock_t b, int o);
+#define	XFS_OFFBNO_TO_AGINO(mp,b,o)	xfs_offbno_to_agino(mp,b,o)
+#else
+#define	XFS_OFFBNO_TO_AGINO(mp,b,o)	\
+	((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o)))
+#endif
+
+#if XFS_BIG_FILESYSTEMS
+#define	XFS_MAXINUMBER		((xfs_ino_t)((1ULL << 56) - 1ULL))
+#define	XFS_INO64_OFFSET	((xfs_ino_t)(1ULL << 32))
+#else
+#define	XFS_MAXINUMBER		((xfs_ino_t)((1ULL << 32) - 1ULL))
+#endif
+#define	XFS_MAXINUMBER_32	((xfs_ino_t)((1ULL << 32) - 1ULL))
+
+#endif	/* __XFS_INUM_H__ */
diff --git a/include/xfs_log.h b/include/xfs_log.h
new file mode 100644
index 000000000..c333cefc4
--- /dev/null
+++ b/include/xfs_log.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_LOG_H__
+#define __XFS_LOG_H__
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define LSN_FIELD_CYCLE(arch) (((arch)==ARCH_NOCONVERT)?1:0)
+#define LSN_FIELD_BLOCK(arch) (((arch)==ARCH_NOCONVERT)?0:1)
+#else
+#define LSN_FIELD_CYCLE(arch) (0)
+#define LSN_FIELD_BLOCK(arch) (1)
+#endif
+
+/* get lsn fields */
+    
+#define CYCLE_LSN(lsn,arch) (INT_GET(((uint *)&(lsn))[LSN_FIELD_CYCLE(arch)], arch))
+#define BLOCK_LSN(lsn,arch) (INT_GET(((uint *)&(lsn))[LSN_FIELD_BLOCK(arch)], arch))
+
+#ifdef __KERNEL__
+/*
+ * By comparing each compnent, we don't have to worry about extra
+ * endian issues in treating two 32 bit numbers as one 64 bit number
+ */
+static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2, xfs_arch_t arch)
+{
+	if (CYCLE_LSN(lsn1, arch) != CYCLE_LSN(lsn2, arch))
+		return (CYCLE_LSN(lsn1, arch)<CYCLE_LSN(lsn2, arch))? -999 : 999;
+
+        if (BLOCK_LSN(lsn1, arch) != BLOCK_LSN(lsn2, arch))
+                return (BLOCK_LSN(lsn1, arch)<BLOCK_LSN(lsn2, arch))? -999 : 999;
+        
+        return 0;
+}
+
+#define	XFS_LSN_CMP_ARCH(x,y,arch)	_lsn_cmp(x, y, arch)
+#define	XFS_LSN_CMP(x,y) XFS_LSN_CMP_ARCH(x,y,ARCH_NOCONVERT)
+#define	XFS_LSN_DIFF_ARCH(x,y,arch)	_lsn_cmp(x, y, arch)
+#define	XFS_LSN_DIFF(x,y) XFS_LSN_DIFF_ARCH(x,y,ARCH_NOCONVERT)
+
+/*
+ * Macros, structures, prototypes for interface to the log manager.
+ */
+
+/*
+ * Flags to xfs_log_mount
+ */
+#define XFS_LOG_RECOVER		0x1
+
+/*
+ * Flags to xfs_log_done()
+ */
+#define XFS_LOG_REL_PERM_RESERV	0x1
+
+
+/*
+ * Flags to xfs_log_reserve()
+ *
+ *	XFS_LOG_SLEEP:	 If space is not available, sleep (default)
+ *	XFS_LOG_NOSLEEP: If space is not available, return error
+ *	XFS_LOG_PERM_RESERV: Permanent reservation.  When writes are
+ *		performed against this type of reservation, the reservation
+ *		is not decreased.  Long running transactions should use this.
+ */
+#define XFS_LOG_SLEEP		0x0
+#define XFS_LOG_NOSLEEP		0x1
+#define XFS_LOG_PERM_RESERV	0x2
+#define XFS_LOG_RESV_ALL	(XFS_LOG_NOSLEEP|XFS_LOG_PERM_RESERV)
+
+
+/*
+ * Flags to xfs_log_force()
+ *
+ *	XFS_LOG_SYNC:	Synchronous force in-core log to disk
+ *	XFS_LOG_FORCE:	Start in-core log write now.
+ *	XFS_LOG_URGE:	Start write within some window of time.
+ *
+ * Note: Either XFS_LOG_FORCE or XFS_LOG_URGE must be set.
+ */
+#define XFS_LOG_SYNC		0x1
+#define XFS_LOG_FORCE		0x2
+#define XFS_LOG_URGE		0x4
+
+#endif	/* __KERNEL__ */
+
+
+/* Log Clients */
+#define XFS_TRANSACTION		0x69
+#define XFS_VOLUME		0x2
+#define XFS_LOG			0xaa
+
+typedef struct xfs_log_iovec {
+	xfs_caddr_t		i_addr;		/* beginning address of region */
+	int		i_len;		/* length in bytes of region */
+} xfs_log_iovec_t;
+
+typedef void* xfs_log_ticket_t;
+
+/*
+ * Structure used to pass callback function and the function's argument
+ * to the log manager.
+ */
+typedef struct xfs_log_callback {
+	struct xfs_log_callback	*cb_next;
+	void			(*cb_func)(void *, int);
+	void 			*cb_arg;
+} xfs_log_callback_t;
+
+
+#ifdef __KERNEL__
+/* Log manager interfaces */
+struct xfs_mount;
+xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
+		       xfs_log_ticket_t ticket,
+		       uint		flags);
+int	  xfs_log_force(struct xfs_mount *mp,
+			xfs_lsn_t	 lsn,
+			uint		 flags);
+int	  xfs_log_init(void);
+int	  xfs_log_mount(struct xfs_mount *mp,
+			dev_t		 log_dev,
+			xfs_daddr_t		 start_block,
+			int		 num_bblocks);
+int	  xfs_log_mount_finish(struct xfs_mount *mp, int);
+void	  xfs_log_move_tail(struct xfs_mount	*mp,
+			    xfs_lsn_t		tail_lsn);
+void	  xfs_log_notify(struct xfs_mount	*mp,
+			 xfs_lsn_t		lsn,
+			 xfs_log_callback_t	*callback_entry);
+int	  xfs_log_reserve(struct xfs_mount *mp,
+			  int		   length,
+			  int		   count,
+			  xfs_log_ticket_t *ticket,
+			  char		   clientid,
+			  uint		   flags);
+int	  xfs_log_write(struct xfs_mount *mp,
+			xfs_log_iovec_t  region[],
+			int		 nentries,
+			xfs_log_ticket_t ticket,
+			xfs_lsn_t	 *start_lsn);
+int	  xfs_log_unmount(struct xfs_mount *mp);
+int	  xfs_log_unmount_write(struct xfs_mount *mp);
+void      xfs_log_unmount_dealloc(struct xfs_mount *mp);
+int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
+int	  xfs_log_need_covered(struct xfs_mount *mp);
+
+void	  xlog_iodone(struct xfs_buf *);
+
+#endif
+
+
+extern int xlog_debug;		/* set to 1 to enable real log */
+
+
+#endif	/* __XFS_LOG_H__ */
diff --git a/include/xfs_log_priv.h b/include/xfs_log_priv.h
new file mode 100644
index 000000000..c4f8b1123
--- /dev/null
+++ b/include/xfs_log_priv.h
@@ -0,0 +1,540 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_LOG_PRIV_H__
+#define __XFS_LOG_PRIV_H__
+
+#if defined(XFS_ALL_TRACE)
+#define	XFS_LOG_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_LOG_TRACE
+#endif
+
+struct xfs_buf;
+struct ktrace;
+struct log;
+struct xfs_buf_cancel;
+struct xfs_mount;
+
+/*
+ * Macros, structures, prototypes for internal log manager use.
+ */
+
+#define XLOG_NUM_ICLOGS		2
+#define XLOG_MAX_ICLOGS		4
+#define XLOG_CALLBACK_SIZE	10
+#define XLOG_HEADER_MAGIC_NUM	0xFEEDbabe	/* Illegal cycle number */
+#define XLOG_RECORD_BSIZE	(16*1024)	/* eventually 32k */
+#define XLOG_MAX_RECORD_BSIZE	(32*1024)
+#define XLOG_RECORD_BSHIFT	14		/* 16384 == 1 << 14 */
+#define XLOG_MAX_RECORD_BSHIFT	15		/* 32k == 1 << 15 */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_BTOLRBB)
+int xlog_btolrbb(int b);
+#define XLOG_BTOLRBB(b)		xlog_btolrbb(b)
+#else
+#define XLOG_BTOLRBB(b)		(((b)+XLOG_RECORD_BSIZE-1) >> XLOG_RECORD_BSHIFT)
+#endif
+
+#define XLOG_HEADER_SIZE	512
+
+/*
+ *  set lsns
+ */
+
+#define ASSIGN_LSN_CYCLE(lsn,cycle,arch) \
+    INT_SET(((uint *)&(lsn))[LSN_FIELD_CYCLE(arch)], arch, (cycle));
+#define ASSIGN_LSN_BLOCK(lsn,block,arch) \
+    INT_SET(((uint *)&(lsn))[LSN_FIELD_BLOCK(arch)], arch, (block));
+#define ASSIGN_ANY_LSN(lsn,cycle,block,arch)  \
+    { \
+        ASSIGN_LSN_CYCLE(lsn,cycle,arch); \
+        ASSIGN_LSN_BLOCK(lsn,block,arch); \
+    }
+#define ASSIGN_LSN(lsn,log,arch) \
+    ASSIGN_ANY_LSN(lsn,(log)->l_curr_cycle,(log)->l_curr_block,arch);
+    
+#define XLOG_SET(f,b)		(((f) & (b)) == (b))
+
+#define GET_CYCLE(ptr, arch) \
+    (INT_GET(*(uint *)(ptr), arch) == XLOG_HEADER_MAGIC_NUM ? \
+         INT_GET(*((uint *)(ptr)+1), arch) : \
+         INT_GET(*(uint *)(ptr), arch) \
+    )
+    
+#define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
+
+
+#ifdef __KERNEL__
+/*
+ * get client id from packed copy.
+ *
+ * this hack is here because the xlog_pack code copies four bytes
+ * of xlog_op_header containing the fields oh_clientid, oh_flags 
+ * and oh_res2 into the packed copy.
+ *
+ * later on this four byte chunk is treated as an int and the 
+ * client id is pulled out.
+ *
+ * this has endian issues, of course.
+ */	
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define GET_CLIENT_ID(i,arch) \
+    ((i) & 0xff)
+#else
+#define GET_CLIENT_ID(i,arch) \
+    ((i) >> 24)
+#endif
+   
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_GRANT_SUB_SPACE)
+void xlog_grant_sub_space(struct log *log, int bytes, int type);
+#define XLOG_GRANT_SUB_SPACE(log,bytes,type)	\
+	xlog_grant_sub_space(log,bytes,type)
+#else
+#define XLOG_GRANT_SUB_SPACE(log,bytes,type)				\
+    {									\
+	if (type == 'w') {						\
+		(log)->l_grant_write_bytes -= (bytes);			\
+		if ((log)->l_grant_write_bytes < 0) {			\
+			(log)->l_grant_write_bytes += (log)->l_logsize;	\
+			(log)->l_grant_write_cycle--;			\
+		}							\
+	} else {							\
+		(log)->l_grant_reserve_bytes -= (bytes);		\
+		if ((log)->l_grant_reserve_bytes < 0) {			\
+			(log)->l_grant_reserve_bytes += (log)->l_logsize;\
+			(log)->l_grant_reserve_cycle--;			\
+		}							\
+	 }								\
+    }
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_GRANT_ADD_SPACE)
+void xlog_grant_add_space(struct log *log, int bytes, int type);
+#define XLOG_GRANT_ADD_SPACE(log,bytes,type)	\
+	xlog_grant_add_space(log,bytes,type)
+#else
+#define XLOG_GRANT_ADD_SPACE(log,bytes,type)				\
+    {									\
+	if (type == 'w') {						\
+		(log)->l_grant_write_bytes += (bytes);			\
+		if ((log)->l_grant_write_bytes > (log)->l_logsize) {	\
+			(log)->l_grant_write_bytes -= (log)->l_logsize;	\
+			(log)->l_grant_write_cycle++;			\
+		}							\
+	} else {							\
+		(log)->l_grant_reserve_bytes += (bytes);		\
+		if ((log)->l_grant_reserve_bytes > (log)->l_logsize) {	\
+			(log)->l_grant_reserve_bytes -= (log)->l_logsize;\
+			(log)->l_grant_reserve_cycle++;			\
+		}							\
+	 }								\
+    }
+#endif
+#define XLOG_INS_TICKETQ(q,tic)				\
+    {							\
+	if (q) {					\
+		(tic)->t_next	    = (q);		\
+		(tic)->t_prev	    = (q)->t_prev;	\
+		(q)->t_prev->t_next = (tic);		\
+		(q)->t_prev	    = (tic);		\
+	} else {					\
+		(tic)->t_prev = (tic)->t_next = (tic);	\
+		(q) = (tic);				\
+	}						\
+	(tic)->t_flags |= XLOG_TIC_IN_Q;		\
+    }
+#define XLOG_DEL_TICKETQ(q,tic)				\
+    {							\
+	if ((tic) == (tic)->t_next) {			\
+		(q) = NULL;				\
+	} else {					\
+		(q) = (tic)->t_next;			\
+		(tic)->t_next->t_prev = (tic)->t_prev;	\
+		(tic)->t_prev->t_next = (tic)->t_next;	\
+	}						\
+	(tic)->t_next = (tic)->t_prev = NULL;		\
+	(tic)->t_flags &= ~XLOG_TIC_IN_Q;		\
+    }
+
+
+#define GRANT_LOCK(log)		mutex_spinlock(&(log)->l_grant_lock)
+#define GRANT_UNLOCK(log, s)	mutex_spinunlock(&(log)->l_grant_lock, s)
+#define LOG_LOCK(log)		mutex_spinlock(&(log)->l_icloglock)
+#define LOG_UNLOCK(log, s)	mutex_spinunlock(&(log)->l_icloglock, s)
+
+#define xlog_panic(s)		{cmn_err(CE_PANIC, s); }
+#define xlog_exit(s)		{cmn_err(CE_PANIC, s); }
+#define xlog_warn(s)		{cmn_err(CE_WARN, s); }
+
+/*
+ * In core log state
+ */
+#define XLOG_STATE_ACTIVE    0x0001 /* Current IC log being written to */
+#define XLOG_STATE_WANT_SYNC 0x0002 /* Want to sync this iclog; no more writes */
+#define XLOG_STATE_SYNCING   0x0004 /* This IC log is syncing */
+#define XLOG_STATE_DONE_SYNC 0x0008 /* Done syncing to disk */
+#define XLOG_STATE_DO_CALLBACK \
+			     0x0010 /* Process callback functions */
+#define XLOG_STATE_CALLBACK  0x0020 /* Callback functions now */
+#define XLOG_STATE_DIRTY     0x0040 /* Dirty IC log, not ready for ACTIVE status*/
+#define XLOG_STATE_IOERROR   0x0080 /* IO error happened in sync'ing log */
+#define XLOG_STATE_ALL	     0x7FFF /* All possible valid flags */
+#define XLOG_STATE_NOTUSED   0x8000 /* This IC log not being used */
+#endif	/* __KERNEL__ */
+
+/*
+ * Flags to log operation header
+ *
+ * The first write of a new transaction will be preceded with a start
+ * record, XLOG_START_TRANS.  Once a transaction is committed, a commit
+ * record is written, XLOG_COMMIT_TRANS.  If a single region can not fit into
+ * the remainder of the current active in-core log, it is split up into
+ * multiple regions.  Each partial region will be marked with a
+ * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS.
+ *
+ */
+#define XLOG_START_TRANS	0x01	/* Start a new transaction */
+#define XLOG_COMMIT_TRANS	0x02	/* Commit this transaction */
+#define XLOG_CONTINUE_TRANS	0x04	/* Cont this trans into new region */
+#define XLOG_WAS_CONT_TRANS	0x08	/* Cont this trans into new region */
+#define XLOG_END_TRANS		0x10	/* End a continued transaction */
+#define XLOG_UNMOUNT_TRANS	0x20	/* Unmount a filesystem transaction */
+#define XLOG_SKIP_TRANS		(XLOG_COMMIT_TRANS | XLOG_CONTINUE_TRANS | \
+				 XLOG_WAS_CONT_TRANS | XLOG_END_TRANS | \
+				 XLOG_UNMOUNT_TRANS)
+
+#ifdef __KERNEL__
+/*
+ * Flags to log ticket
+ */
+#define XLOG_TIC_INITED		0x1	/* has been initialized */
+#define XLOG_TIC_PERM_RESERV	0x2	/* permanent reservation */
+#define XLOG_TIC_IN_Q		0x4
+#endif	/* __KERNEL__ */
+
+#define XLOG_UNMOUNT_TYPE	0x556e	/* Un for Unmount */
+
+/*
+ * Flags for log structure
+ */
+#define XLOG_CHKSUM_MISMATCH	0x1	/* used only during recovery */
+#define XLOG_ACTIVE_RECOVERY	0x2	/* in the middle of recovery */
+#define	XLOG_RECOVERY_NEEDED	0x4	/* log was recovered */     
+#define XLOG_IO_ERROR		0x8	/* log hit an I/O error, and being
+					   shutdown */
+typedef __uint32_t xlog_tid_t;
+
+
+#ifdef __KERNEL__
+/*
+ * Below are states for covering allocation transactions.
+ * By covering, we mean changing the h_tail_lsn in the last on-disk
+ * log write such that no allocation transactions will be re-done during
+ * recovery after a system crash. Recovery starts at the last on-disk
+ * log write.
+ *
+ * These states are used to insert dummy log entries to cover
+ * space allocation transactions which can undo non-transactional changes
+ * after a crash. Writes to a file with space
+ * already allocated do not result in any transactions. Allocations
+ * might include space beyond the EOF. So if we just push the EOF a
+ * little, the last transaction for the file could contain the wrong
+ * size. If there is no file system activity, after an allocation
+ * transaction, and the system crashes, the allocation transaction
+ * will get replayed and the file will be truncated. This could
+ * be hours/days/... after the allocation occurred.
+ *
+ * The fix for this is to do two dummy transactions when the
+ * system is idle. We need two dummy transaction because the h_tail_lsn
+ * in the log record header needs to point beyond the last possible
+ * non-dummy transaction. The first dummy changes the h_tail_lsn to
+ * the first transaction before the dummy. The second dummy causes
+ * h_tail_lsn to point to the first dummy. Recovery starts at h_tail_lsn.
+ * 
+ * These dummy transactions get committed when everything
+ * is idle (after there has been some activity).
+ *
+ * There are 5 states used to control this.
+ *
+ *  IDLE -- no logging has been done on the file system or
+ *		we are done covering previous transactions.
+ *  NEED -- logging has occurred and we need a dummy transaction
+ *		when the log becomes idle.
+ *  DONE -- we were in the NEED state and have committed a dummy
+ *		transaction.
+ *  NEED2 -- we detected that a dummy transaction has gone to the
+ *		on disk log with no other transactions.
+ *  DONE2 -- we committed a dummy transaction when in the NEED2 state.
+ *
+ * There are two places where we switch states:
+ *
+ * 1.) In xfs_sync, when we detect an idle log and are in NEED or NEED2.
+ *	We commit the dummy transaction and switch to DONE or DONE2,
+ * 	respectively. In all other states, we don't do anything.
+ *
+ * 2.) When we finish writing the on-disk log (xlog_state_clean_log).
+ *
+ *	No matter what state we are in, if this isn't the dummy
+ *	transaction going out, the next state is NEED.
+ *	So, if we aren't in the DONE or DONE2 states, the next state
+ *	is NEED. We can't be finishing a write of the dummy record
+ *	unless it was committed and the state switched to DONE or DONE2.
+ *	
+ *	If we are in the DONE state and this was a write of the
+ *		dummy transaction, we move to NEED2.
+ *
+ *	If we are in the DONE2 state and this was a write of the
+ *		dummy transaction, we move to IDLE.
+ *
+ *
+ * Writing only one dummy transaction can get appended to
+ * one file space allocation. When this happens, the log recovery
+ * code replays the space allocation and a file could be truncated.
+ * This is why we have the NEED2 and DONE2 states before going idle.
+ */
+
+#define XLOG_STATE_COVER_IDLE	0
+#define XLOG_STATE_COVER_NEED	1
+#define XLOG_STATE_COVER_DONE	2
+#define XLOG_STATE_COVER_NEED2	3
+#define XLOG_STATE_COVER_DONE2	4
+
+#define XLOG_COVER_OPS		5
+
+typedef struct xlog_ticket {
+	sv_t		   t_sema;	 /* sleep on this semaphore	 :20 */
+	struct xlog_ticket *t_next;	 /*			         : 4 */
+	struct xlog_ticket *t_prev;	 /*				 : 4 */
+	xlog_tid_t	   t_tid;	 /* transaction identifier	 : 4 */
+	int		   t_curr_res;	 /* current reservation in bytes : 4 */
+	int		   t_unit_res;	 /* unit reservation in bytes    : 4 */
+	char		   t_ocnt;	 /* original count		 : 1 */
+	char		   t_cnt;	 /* current count		 : 1 */
+	char		   t_clientid;	 /* who does this belong to;	 : 1 */
+	char		   t_flags;	 /* properties of reservation	 : 1 */
+} xlog_ticket_t;
+#endif
+
+
+typedef struct xlog_op_header {
+	xlog_tid_t oh_tid;	/* transaction id of operation	:  4 b */
+	int	   oh_len;	/* bytes in data region		:  2 b */
+	char	   oh_clientid;	/* who sent me this		:  1 b */
+	char	   oh_flags;	/* 				:  1 b */
+	ushort	   oh_res2;	/* 32 bit align			:  2 b */
+} xlog_op_header_t;
+
+
+/* valid values for h_fmt */
+#define XLOG_FMT_UNKNOWN  0
+#define XLOG_FMT_LINUX_LE 1
+#define XLOG_FMT_LINUX_BE 2
+#define XLOG_FMT_IRIX_BE  3
+
+/* our fmt */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define XLOG_FMT XLOG_FMT_LINUX_LE
+#else
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define XLOG_FMT XLOG_FMT_LINUX_BE
+#else
+#error unknown byte order
+#endif
+#endif
+
+typedef struct xlog_rec_header {
+	uint	  h_magicno;	/* log record (LR) identifier		:  4 */
+	uint	  h_cycle;	/* write cycle of log			:  4 */
+	int	  h_version;	/* LR version				:  4 */
+	int	  h_len;	/* len in bytes; should be 64-bit aligned: 4 */
+	xfs_lsn_t h_lsn;	/* lsn of this LR			:  8 */
+	xfs_lsn_t h_tail_lsn;	/* lsn of 1st LR w/ buffers not committed: 8 */
+	uint	  h_chksum;	/* may not be used; non-zero if used	:  4 */
+	int	  h_prev_block; /* block number to previous LR		:  4 */
+	int	  h_num_logops;	/* number of log operations in this LR	:  4 */
+	uint	  h_cycle_data[XLOG_MAX_RECORD_BSIZE / BBSIZE];
+        /* new fields */
+        int       h_fmt;        /* format of log record                 :  4 */
+        uuid_t    h_fs_uuid;    /* uuid of FS                           : 16 */
+} xlog_rec_header_t;
+
+#ifdef __KERNEL__
+/*
+ * - A log record header is 512 bytes.  There is plenty of room to grow the
+ *	xlog_rec_header_t into the reserved space.
+ * - ic_data follows, so a write to disk can start at the beginning of
+ *	the iclog.
+ * - ic_forcesema is used to implement synchronous forcing of the iclog to disk.
+ * - ic_next is the pointer to the next iclog in the ring.
+ * - ic_bp is a pointer to the buffer used to write this incore log to disk.
+ * - ic_log is a pointer back to the global log structure.
+ * - ic_callback is a linked list of callback function/argument pairs to be
+ *	called after an iclog finishes writing.
+ * - ic_size is the full size of the header plus data.
+ * - ic_offset is the current number of bytes written to in this iclog.
+ * - ic_refcnt is bumped when someone is writing to the log.
+ * - ic_state is the state of the iclog.
+ */
+typedef struct xlog_iclog_fields {
+	sv_t			ic_forcesema;
+	struct xlog_in_core	*ic_next;
+	struct xlog_in_core	*ic_prev;
+	struct xfs_buf  		*ic_bp;
+	struct log		*ic_log;
+	xfs_log_callback_t	*ic_callback;
+	xfs_log_callback_t	**ic_callback_tail;
+#ifdef DEBUG
+	struct ktrace		*ic_trace;
+#endif
+	int	  		ic_size;
+	int	  		ic_offset;
+	int	  		ic_refcnt;
+	int			ic_roundoff;
+	int			ic_bwritecnt;
+	ushort_t		ic_state;
+} xlog_iclog_fields_t;
+
+typedef struct xlog_in_core {
+	union {
+		xlog_iclog_fields_t	hic_fields;
+		char			hic_pad[BBSIZE];
+	} ic_h1;
+	union {
+		xlog_rec_header_t hic_header;
+		char		  hic_sector[XLOG_HEADER_SIZE];
+	} ic_h2;
+	char		       ic_data[1];
+} xlog_in_core_t;
+
+/*
+ * Defines to save our code from this glop.
+ */
+#define	ic_forcesema	ic_h1.hic_fields.ic_forcesema
+#define	ic_next		ic_h1.hic_fields.ic_next
+#define	ic_prev		ic_h1.hic_fields.ic_prev
+#define	ic_bp		ic_h1.hic_fields.ic_bp
+#define	ic_log		ic_h1.hic_fields.ic_log
+#define	ic_callback	ic_h1.hic_fields.ic_callback
+#define	ic_callback_tail ic_h1.hic_fields.ic_callback_tail
+#define	ic_trace	ic_h1.hic_fields.ic_trace
+#define	ic_size		ic_h1.hic_fields.ic_size
+#define	ic_offset	ic_h1.hic_fields.ic_offset
+#define	ic_refcnt	ic_h1.hic_fields.ic_refcnt
+#define	ic_roundoff	ic_h1.hic_fields.ic_roundoff
+#define	ic_bwritecnt	ic_h1.hic_fields.ic_bwritecnt
+#define	ic_state	ic_h1.hic_fields.ic_state
+#define ic_header	ic_h2.hic_header
+
+/*
+ * The reservation head lsn is not made up of a cycle number and block number.
+ * Instead, it uses a cycle number and byte number.  Logs don't expect to
+ * overflow 31 bits worth of byte offset, so using a byte number will mean
+ * that round off problems won't occur when releasing partial reservations.
+ */
+typedef struct log {
+    /* The following block of fields are changed while holding icloglock */
+    sema_t		l_flushsema;    /* iclog flushing semaphore */
+    int			l_flushcnt;	/* # of procs waiting on this sema */
+    int			l_ticket_cnt;	/* free ticket count */
+    int			l_ticket_tcnt;	/* total ticket count */
+    int			l_covered_state;/* state of "covering disk log entries" */
+    xlog_ticket_t	*l_freelist;    /* free list of tickets */
+    xlog_ticket_t	*l_unmount_free;/* kmem_free these addresses */
+    xlog_ticket_t	*l_tail;        /* free list of tickets */
+    xlog_in_core_t	*l_iclog;       /* head log queue	*/
+    lock_t		l_icloglock;    /* grab to change iclog state */
+    xfs_lsn_t		l_tail_lsn;     /* lsn of 1st LR w/ unflush buffers */
+    xfs_lsn_t		l_last_sync_lsn;/* lsn of last LR on disk */
+    struct xfs_mount	*l_mp;	        /* mount point */
+    struct xfs_buf	*l_xbuf;        /* extra buffer for log wrapping */
+    dev_t		l_dev;	        /* dev_t of log */
+    xfs_daddr_t		l_logBBstart;   /* start block of log */
+    int			l_logsize;      /* size of log in bytes */
+    int			l_logBBsize;    /* size of log in 512 byte chunks */
+    int			l_roundoff;	/* round off error of all iclogs */
+    int			l_curr_cycle;   /* Cycle number of log writes */
+    int			l_prev_cycle;   /* Cycle # b4 last block increment */
+    int			l_curr_block;   /* current logical block of log */
+    int			l_prev_block;   /* previous logical block of log */
+    int			l_iclog_size;	 /* size of log in bytes */
+    int			l_iclog_size_log;/* log power size of log */
+    int			l_iclog_bufs;	 /* number of iclog buffers */
+
+    /* The following field are used for debugging; need to hold icloglock */
+    char		*l_iclog_bak[XLOG_MAX_ICLOGS];
+
+    /* The following block of fields are changed while holding grant_lock */
+    lock_t		l_grant_lock;		/* protects below fields */
+    xlog_ticket_t	*l_reserve_headq;	/* */
+    xlog_ticket_t	*l_write_headq;		/* */
+    int			l_grant_reserve_cycle;	/* */
+    int			l_grant_reserve_bytes;	/* */
+    int			l_grant_write_cycle;	/* */
+    int			l_grant_write_bytes;	/* */
+
+    /* The following fields don't need locking */
+#ifdef DEBUG
+    struct ktrace	*l_trace;
+    struct ktrace	*l_grant_trace;
+#endif
+    uint		l_flags;
+    uint		l_quotaoffs_flag;/* XFS_DQ_*, if QUOTAOFFs found */
+    struct xfs_buf_cancel **l_buf_cancel_table;	
+} xlog_t;
+
+
+/* common routines */
+extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp,
+				      xlog_in_core_t *iclog);
+extern int	 xlog_find_head(xlog_t *log, xfs_daddr_t *head_blk);
+extern int	 xlog_find_tail(xlog_t	*log,
+				xfs_daddr_t *head_blk,
+				xfs_daddr_t *tail_blk,
+				int readonly);
+extern int	 xlog_print_find_oldest(xlog_t *log, xfs_daddr_t *last_blk);
+extern int	 xlog_recover(xlog_t *log, int readonly);
+extern int	 xlog_recover_finish(xlog_t *log, int mfsi_flags);
+extern void	 xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog);
+extern struct xfs_buf *xlog_get_bp(int,xfs_mount_t *);
+extern void	 xlog_put_bp(struct xfs_buf *);
+extern int	 xlog_bread(xlog_t *, xfs_daddr_t blkno, int bblks, struct xfs_buf *bp);
+extern void	 xlog_recover_process_iunlinks(xlog_t *log);
+
+#define XLOG_TRACE_GRAB_FLUSH  1
+#define XLOG_TRACE_REL_FLUSH   2
+#define XLOG_TRACE_SLEEP_FLUSH 3
+#define XLOG_TRACE_WAKE_FLUSH  4
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_LOG_PRIV_H__ */
diff --git a/include/xfs_log_recover.h b/include/xfs_log_recover.h
new file mode 100644
index 000000000..233cb1635
--- /dev/null
+++ b/include/xfs_log_recover.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_LOG_RECOVER_H__
+#define __XFS_LOG_RECOVER_H__
+
+/*
+ * Macros, structures, prototypes for internal log manager use.
+ */
+
+#define XLOG_RHASH_BITS  4
+#define XLOG_RHASH_SIZE	16
+#define XLOG_RHASH_SHIFT 2
+#define XLOG_RHASH(tid)	\
+	((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
+
+#define XLOG_MAX_REGIONS_IN_ITEM   (XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK / 2 + 1)
+
+
+/*
+ * item headers are in ri_buf[0].  Additional buffers follow.
+ */
+typedef struct xlog_recover_item {
+	struct xlog_recover_item *ri_next;
+	struct xlog_recover_item *ri_prev;
+	int			 ri_type;
+	int			 ri_cnt;	/* count of regions found */
+	int			 ri_total;	/* total regions */
+	xfs_log_iovec_t		 *ri_buf;	/* ptr to regions buffer */
+} xlog_recover_item_t;
+
+struct xlog_tid;
+typedef struct xlog_recover {
+	struct xlog_recover *r_next;
+	xlog_tid_t	    r_log_tid;		/* log's transaction id */
+	xfs_trans_header_t  r_theader;		/* trans header for partial */
+	int		    r_state;		/* not needed */
+	xfs_lsn_t	    r_lsn;		/* xact lsn */
+	xlog_recover_item_t *r_itemq;		/* q for items */
+} xlog_recover_t;
+
+#define ITEM_TYPE(i)	(*(ushort *)(i)->ri_buf[0].i_addr)
+
+/*
+ * This is the number of entries in the l_buf_cancel_table used during
+ * recovery.
+ */
+#define	XLOG_BC_TABLE_SIZE	64
+
+#define	XLOG_RECOVER_PASS1	1
+#define	XLOG_RECOVER_PASS2	2
+
+#endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/include/xfs_mount.h b/include/xfs_mount.h
new file mode 100644
index 000000000..b026f2005
--- /dev/null
+++ b/include/xfs_mount.h
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_MOUNT_H__
+#define	__XFS_MOUNT_H__
+
+
+typedef struct xfs_trans_reservations {
+	uint	tr_write;	/* extent alloc trans */
+	uint	tr_itruncate;	/* truncate trans */
+	uint	tr_rename;	/* rename trans */
+	uint	tr_link;	/* link trans */
+	uint	tr_remove;	/* unlink trans */
+	uint	tr_symlink;	/* symlink trans */
+	uint	tr_create;	/* create trans */
+	uint	tr_mkdir;	/* mkdir trans */
+	uint	tr_ifree;	/* inode free trans */
+	uint	tr_ichange;	/* inode update trans */
+	uint	tr_growdata;	/* fs data section grow trans */
+	uint	tr_swrite;	/* sync write inode trans */
+	uint	tr_addafork;	/* cvt inode to attributed trans */
+	uint	tr_writeid;	/* write setuid/setgid file */
+	uint	tr_attrinval;	/* attr fork buffer invalidation */
+	uint	tr_attrset;	/* set/create an attribute */
+	uint	tr_attrrm;	/* remove an attribute */
+	uint	tr_clearagi;	/* clear bad agi unlinked ino bucket */
+	uint	tr_growrtalloc;	/* grow realtime allocations */
+	uint	tr_growrtzero;	/* grow realtime zeroing */
+	uint	tr_growrtfree;	/* grow realtime freeing */
+} xfs_trans_reservations_t;
+
+
+#ifndef __KERNEL__
+/*
+ * Moved here from xfs_ag.h to avoid reordering header files
+ */
+#define XFS_DADDR_TO_AGNO(mp,d) \
+	((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
+#define XFS_DADDR_TO_AGBNO(mp,d) \
+	((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
+#else
+struct cred;
+struct mounta;
+struct vfs;
+struct vnode;
+struct xfs_args;
+struct xfs_ihash;
+struct xfs_chash;
+struct xfs_inode;
+struct xfs_perag;
+struct xfs_quotainfo;
+struct xfs_iocore;
+struct xfs_dio;
+struct xfs_bmbt_irec;
+struct xfs_bmap_free;
+
+#if defined(INTERRUPT_LATENCY_TESTING)
+#define	SPLDECL(s)	       
+#define	AIL_LOCK_T		mutex_t
+#define	AIL_LOCKINIT(x,y)	mutex_init(x,MUTEX_DEFAULT, y)
+#define	AIL_LOCK_DESTROY(x)	mutex_destroy(x)
+#define	AIL_LOCK(mp,s)		mutex_lock(&(mp)->m_ail_lock, PZERO)
+#define	AIL_UNLOCK(mp,s)	mutex_unlock(&(mp)->m_ail_lock)
+#else	/* !INTERRUPT_LATENCY_TESTING */
+#define	SPLDECL(s)		int s
+#define	AIL_LOCK_T		lock_t
+#define	AIL_LOCKINIT(x,y)	spinlock_init(x,y)
+#define	AIL_LOCK_DESTROY(x)	spinlock_destroy(x)
+#define	AIL_LOCK(mp,s)		s=mutex_spinlock(&(mp)->m_ail_lock)
+#define	AIL_UNLOCK(mp,s)	mutex_spinunlock(&(mp)->m_ail_lock, s)
+#endif /* !INTERRUPT_LATENCY_TESTING */
+
+
+/* Prototypes and functions for I/O core modularization, a vector
+ * of functions is used to indirect from xfs/cxfs independent code
+ * to the xfs/cxfs dependent code.
+ * The vector is placed in the mount structure so that we can
+ * minimize the number of memory indirections involved.
+ */
+
+typedef int		(*xfs_dio_write_t)(struct xfs_dio *);
+typedef int		(*xfs_dio_read_t)(struct xfs_dio *);
+typedef int		(*xfs_strat_write_t)(struct xfs_iocore *, struct xfs_buf *);
+typedef int		(*xfs_bmapi_t)(struct xfs_trans *, void *,
+				xfs_fileoff_t, xfs_filblks_t, int,
+				xfs_fsblock_t *, xfs_extlen_t,
+				struct xfs_bmbt_irec *, int *,
+				struct xfs_bmap_free *);
+typedef int		(*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *);
+typedef int		(*xfs_rsync_t)(void *, int, xfs_off_t, xfs_off_t);
+typedef uint		(*xfs_lck_map_shared_t)(void *);
+typedef void		(*xfs_lock_t)(void *, uint);
+typedef void		(*xfs_lock_demote_t)(void *, uint);
+typedef int		(*xfs_lock_nowait_t)(void *, uint);
+typedef void		(*xfs_unlk_t)(void *, unsigned int);
+typedef void		(*xfs_chgtime_t)(void *, int);
+typedef xfs_fsize_t	(*xfs_size_t)(void *);
+typedef xfs_fsize_t	(*xfs_setsize_t)(void *, xfs_off_t);
+typedef xfs_fsize_t	(*xfs_lastbyte_t)(void *);
+
+#ifdef CELL_CAPABLE
+typedef int             (*xfs_checklock_t)(bhv_desc_t *, struct vnode *,
+                                int, off_t, off_t, int, struct cred *,
+                                struct flid *, vrwlock_t, int);
+#endif
+
+typedef struct xfs_ioops {
+	xfs_dio_write_t		xfs_dio_write_func;
+	xfs_dio_read_t		xfs_dio_read_func;
+	xfs_strat_write_t	xfs_strat_write_func;
+	xfs_bmapi_t		xfs_bmapi_func;
+	xfs_bmap_eof_t		xfs_bmap_eof_func;
+	xfs_rsync_t		xfs_rsync_func;
+	xfs_lck_map_shared_t	xfs_lck_map_shared;
+	xfs_lock_t		xfs_ilock;
+	xfs_lock_demote_t	xfs_ilock_demote;
+	xfs_lock_nowait_t	xfs_ilock_nowait;
+	xfs_unlk_t		xfs_unlock;
+	xfs_chgtime_t		xfs_chgtime;	
+	xfs_size_t		xfs_size_func;
+	xfs_setsize_t		xfs_setsize_func;
+	xfs_lastbyte_t		xfs_lastbyte;
+#ifdef CELL_CAPABLE
+        xfs_checklock_t         xfs_checklock;
+#endif
+} xfs_ioops_t;
+
+
+#define XFS_DIO_WRITE(mp, diop) \
+	(*(mp)->m_io_ops.xfs_dio_write_func)(diop)
+
+#define XFS_DIO_READ(mp, diop) \
+	(*(mp)->m_io_ops.xfs_dio_read_func)(diop)
+
+#define XFS_STRAT_WRITE(mp, io, bp) \
+	(*(mp)->m_io_ops.xfs_strat_write_func)(io, bp)
+
+#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist)	\
+	(*(mp)->m_io_ops.xfs_bmapi_func) \
+		(trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist)
+
+#define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \
+	(*(mp)->m_io_ops.xfs_bmap_eof_func) \
+		((io)->io_obj, endoff, whichfork, eof)
+
+#define XFS_RSYNC(mp, io, ioflag, start, end) \
+	(*(mp)->m_io_ops.xfs_rsync_func)((io)->io_obj, ioflag, start, end)
+
+#define XFS_LCK_MAP_SHARED(mp, io) \
+	(*(mp)->m_io_ops.xfs_lck_map_shared)((io)->io_obj)
+
+#define XFS_UNLK_MAP_SHARED(mp, io, mode) \
+	(*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode)
+
+#define XFS_ILOCK(mp, io, mode) \
+	(*(mp)->m_io_ops.xfs_ilock)((io)->io_obj, mode)
+
+#define XFS_ILOCK_NOWAIT(mp, io, mode) \
+	(*(mp)->m_io_ops.xfs_ilock_nowait)((io)->io_obj, mode)
+
+#define XFS_IUNLOCK(mp, io, mode) \
+	(*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode)
+
+#define XFS_ILOCK_DEMOTE(mp, io, mode) \
+	(*(mp)->m_io_ops.xfs_ilock_demote)((io)->io_obj, mode)
+
+#define XFS_CHGTIME(mp, io, flags) \
+	(*(mp)->m_io_ops.xfs_chgtime)((io)->io_obj, flags)
+
+#define XFS_SIZE(mp, io) \
+	(*(mp)->m_io_ops.xfs_size_func)((io)->io_obj)
+
+#define XFS_SETSIZE(mp, io, newsize) \
+	(*(mp)->m_io_ops.xfs_setsize_func)((io)->io_obj, newsize)
+
+#define XFS_LASTBYTE(mp, io) \
+	(*(mp)->m_io_ops.xfs_lastbyte)((io)->io_obj)
+
+
+typedef struct xfs_mount {
+	bhv_desc_t		m_bhv;		/* vfs xfs behavior */
+	xfs_tid_t		m_tid;		/* next unused tid for fs */
+	AIL_LOCK_T		m_ail_lock;	/* fs AIL mutex */
+	xfs_ail_entry_t		m_ail;		/* fs active log item list */
+	uint			m_ail_gen;	/* fs AIL generation count */
+	xfs_sb_t		m_sb;		/* copy of fs superblock */
+	lock_t			m_sb_lock;	/* sb counter mutex */
+	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */
+	char			*m_fsname; 	/* filesystem name */
+	int			m_fsname_len;	/* strlen of fs name */
+	int			m_bsize;	/* fs logical block size */
+	xfs_agnumber_t		m_agfrotor;	/* last ag where space found */
+	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
+	int			m_ihsize;	/* size of next field */
+	struct xfs_ihash	*m_ihash;	/* fs private inode hash table*/
+	struct xfs_inode	*m_inodes;	/* active inode list */
+	mutex_t			m_ilock;	/* inode list mutex */
+	uint			m_ireclaims;	/* count of calls to reclaim*/
+	uint			m_readio_log;	/* min read size log bytes */
+	uint			m_readio_blocks; /* min read size blocks */
+	uint			m_writeio_log;	/* min write size log bytes */
+	uint			m_writeio_blocks; /* min write size blocks */
+	void			*m_log;		/* log specific stuff */
+	int			m_logbufs;	/* number of log buffers */
+	int			m_logbsize;	/* size of each log buffer */
+	uint			m_rsumlevels;	/* rt summary levels */
+	uint			m_rsumsize;	/* size of rt summary, bytes */
+	struct xfs_inode	*m_rbmip;	/* pointer to bitmap inode */
+	struct xfs_inode	*m_rsumip;	/* pointer to summary inode */
+	struct xfs_inode	*m_rootip;	/* pointer to root directory */
+	struct xfs_quotainfo	*m_quotainfo;	/* disk quota information */
+	buftarg_t		m_ddev_targ;	/* ptr to data device */
+	buftarg_t		m_logdev_targ;	/* ptr to log device */
+	buftarg_t		m_rtdev_targ;	/* ptr to rt device */
+	buftarg_t		*m_ddev_targp;	/* saves taking the address */
+#define m_dev		m_ddev_targ.dev
+#define m_logdev	m_logdev_targ.dev
+#define m_rtdev		m_rtdev_targ.dev
+	__uint8_t		m_dircook_elog;	/* log d-cookie entry bits */
+	__uint8_t		m_blkbit_log;	/* blocklog + NBBY */
+	__uint8_t		m_blkbb_log;	/* blocklog - BBSHIFT */
+	__uint8_t		m_agno_log;	/* log #ag's */
+	__uint8_t		m_agino_log;	/* #bits for agino in inum */
+	__uint8_t		m_nreadaheads;	/* #readahead buffers */
+	__uint16_t		m_inode_cluster_size;/* min inode buf size */
+	uint			m_blockmask;	/* sb_blocksize-1 */
+	uint			m_blockwsize;	/* sb_blocksize in words */
+	uint			m_blockwmask;	/* blockwsize-1 */
+	uint			m_alloc_mxr[2];	/* XFS_ALLOC_BLOCK_MAXRECS */
+	uint			m_alloc_mnr[2];	/* XFS_ALLOC_BLOCK_MINRECS */
+	uint			m_bmap_dmxr[2];	/* XFS_BMAP_BLOCK_DMAXRECS */
+	uint			m_bmap_dmnr[2];	/* XFS_BMAP_BLOCK_DMINRECS */
+	uint			m_inobt_mxr[2];	/* XFS_INOBT_BLOCK_MAXRECS */
+	uint			m_inobt_mnr[2];	/* XFS_INOBT_BLOCK_MINRECS */
+	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */
+	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
+	uint			m_in_maxlevels;	/* XFS_IN_MAXLEVELS */
+	struct xfs_perag	*m_perag;	/* per-ag accounting info */
+	mrlock_t		m_peraglock;	/* lock for m_perag (pointer) */
+	sema_t			m_growlock;	/* growfs mutex */
+	int			m_fixedfsid[2];	/* unchanged for life of FS */
+	uint			m_dmevmask;	/* DMI events for this FS */
+	uint			m_flags;	/* global mount flags */
+	uint			m_attroffset;	/* inode attribute offset */
+ 	int			m_da_node_ents;	/* how many entries in danode */
+	int			m_ialloc_inos;	/* inodes in inode allocation */
+	int			m_ialloc_blks;	/* blocks in inode allocation */
+	int			m_litino;	/* size of inode union area */
+	int			m_inoalign_mask;/* mask sb_inoalignmt if used */
+	uint			m_qflags;	/* quota status flags */
+	xfs_trans_reservations_t m_reservations;/* precomputed res values */
+	__uint64_t		m_maxicount;	/* maximum inode count */
+	__uint64_t		m_resblks;	/* total reserved blocks */
+	__uint64_t		m_resblks_avail;/* available reserved blocks */
+#if XFS_BIG_FILESYSTEMS
+	xfs_ino_t		m_inoadd;	/* add value for ino64_offset */
+#endif
+	int			m_dalign;	/* stripe unit */
+	int			m_swidth;	/* stripe width */
+	int			m_sinoalign;	/* stripe unit inode alignmnt */
+	int			m_attr_magicpct;/* 37% of the blocksize */
+	int			m_dir_magicpct;	/* 37% of the dir blocksize */
+	__uint8_t		m_mk_sharedro;	/* mark shared ro on unmount */
+        __uint8_t               m_inode_quiesce;/* call quiesce on new inodes.
+                                                   field governed by m_ilock */
+	__uint8_t		m_dirversion;	/* 1 or 2 */
+	xfs_dirops_t		m_dirops;	/* table of dir funcs */
+	int			m_dirblksize;	/* directory block sz--bytes */
+	int			m_dirblkfsbs;	/* directory block sz--fsbs */
+	xfs_dablk_t		m_dirdatablk;	/* blockno of dir data v2 */
+	xfs_dablk_t		m_dirleafblk;	/* blockno of dir non-data v2 */
+	xfs_dablk_t		m_dirfreeblk;	/* blockno of dirfreeindex v2 */
+	int			m_chsize;	/* size of next field */
+	struct xfs_chash	*m_chash;	/* fs private inode per-cluster
+						 * hash table */
+	struct xfs_ioops	m_io_ops;	/* vector of I/O ops */
+        struct xfs_expinfo      *m_expinfo;     /* info to export to other 
+                                                   cells. */
+	uint64_t		m_shadow_pinmask;
+						/* which bits matter in rpc
+						   log item pin masks */
+	uint			m_cxfstype;	/* mounted shared, etc. */
+} xfs_mount_t;
+
+/*
+ * Flags for m_flags.
+ */
+#define	XFS_MOUNT_WSYNC		0x00000001	/* for nfs - all metadata ops
+						   must be synchronous except
+						   for space allocations */
+#if XFS_BIG_FILESYSTEMS
+#define	XFS_MOUNT_INO64		0x00000002
+#endif
+#define XFS_MOUNT_ROOTQCHECK	0x00000004
+			     /* 0x00000008	-- currently unused */
+#define XFS_MOUNT_FS_SHUTDOWN	0x00000010	/* atomic stop of all filesystem
+						   operations, typically for
+						   disk errors in metadata */
+#define XFS_MOUNT_NOATIME	0x00000020	/* don't modify inode access
+						   times on reads */
+#define XFS_MOUNT_RETERR	0x00000040      /* return alignment errors to
+                                                   user */
+#define XFS_MOUNT_NOALIGN	0x00000080	/* turn off stripe alignment 
+						   allocations */
+			     /* 0x00000100      -- currently unused */
+#define XFS_MOUNT_REGISTERED    0x00000200      /* registered with cxfs master
+                                                   cell logic */
+#define XFS_MOUNT_NORECOVERY   	0x00000400      /* no recovery - dirty fs */
+#define XFS_MOUNT_SHARED    	0x00000800      /* shared mount */
+#define XFS_MOUNT_DFLT_IOSIZE  	0x00001000      /* set default i/o size */
+#define XFS_MOUNT_OSYNCISDSYNC 	0x00002000      /* treat o_sync like o_dsync */
+
+/*
+ * Flags for m_cxfstype
+ */
+#define XFS_CXFS_NOT		0x00000001	/* local mount */
+#define XFS_CXFS_SERVER		0x00000002	/* we're the CXFS server */
+#define XFS_CXFS_CLIENT		0x00000004	/* We're a CXFS client */
+#define XFS_CXFS_REC_ENABLED	0x00000008	/* recovery is enabled */
+
+#define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
+
+/*
+ * Default minimum read and write sizes.
+ */
+#define	XFS_READIO_LOG_SMALL	15	/* <= 32MB memory */
+#define	XFS_WRITEIO_LOG_SMALL	15
+#define	XFS_READIO_LOG_LARGE	16	/* > 32MB memory */
+#define	XFS_WRITEIO_LOG_LARGE	16
+
+/*
+ * max and min values for UIO and mount-option defined I/O sizes
+ * min value can't be less than a page.  Lower limit for 4K machines
+ * is 8K because that's what was tested.
+ */
+#define XFS_MAX_IO_LOG		16	/* 64K */
+
+#if (_PAGESZ == 16384) || (_PAGESZ == 8192)
+#define XFS_MIN_IO_LOG		14	/* 16K */
+#elif _PAGESZ == 4096
+#define XFS_MIN_IO_LOG		13	/* 8K */
+#else
+#error	"Unknown page size"
+#endif
+
+
+/*
+ * Synchronous read and write sizes.  This should be
+ * better for NFSv2 wsync filesystems.
+ */
+#define	XFS_WSYNC_READIO_LOG	15	/* 32K */
+#define	XFS_WSYNC_WRITEIO_LOG	14	/* 16K */
+
+/* 
+ * Flags sent to xfs_force_shutdown.
+ */
+#define XFS_METADATA_IO_ERROR	0x1
+#define XFS_LOG_IO_ERROR	0x2
+#define XFS_FORCE_UMOUNT	0x4
+#define XFS_CORRUPT_INCORE	0x8	/* corrupt in-memory data structures */
+#if CELL_CAPABLE
+#define XFS_SHUTDOWN_REMOTE_REQ	0x10	/* shutdown req came from remote cell */
+#endif
+
+/*
+ * xflags for xfs_syncsub
+ */
+#define XFS_XSYNC_RELOC		0x01
+
+/*
+ * Flags for xfs_mountfs
+ */
+#define XFS_MFSI_SECOND         0x01	/* Is a cxfs secondary mount -- skip */
+					/* stuff which should only be done */
+					/* once. */
+#define XFS_MFSI_CLIENT         0x02    /* Is a client -- skip lots of stuff */
+#define XFS_MFSI_NOUNLINK	0x08	/* Skip unlinked inode processing in */
+					/* log recovery */
+
+/*
+ * Macros for getting from mount to vfs and back.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MTOVFS)
+struct vfs *xfs_mtovfs(xfs_mount_t *mp);
+#define	XFS_MTOVFS(mp)		xfs_mtovfs(mp)
+#else
+#define	XFS_MTOVFS(mp)		(bhvtovfs(&(mp)->m_bhv))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BHVTOM)
+xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp);
+#define	XFS_BHVTOM(bdp)	xfs_bhvtom(bdp)
+#else
+#define	XFS_BHVTOM(bdp)		((xfs_mount_t *)BHV_PDATA(bdp))
+#endif
+ 
+
+/*
+ * Moved here from xfs_ag.h to avoid reordering header files
+ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_AGNO)
+xfs_agnumber_t xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d);
+#define XFS_DADDR_TO_AGNO(mp,d)         xfs_daddr_to_agno(mp,d)
+#else
+
+static inline xfs_agnumber_t XFS_DADDR_TO_AGNO(xfs_mount_t *mp, xfs_daddr_t d)
+{
+        d = XFS_BB_TO_FSBT(mp, d);
+        do_div(d, mp->m_sb.sb_agblocks);
+        return (xfs_agnumber_t) d;
+}
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_AGBNO)
+xfs_agblock_t xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d);
+#define XFS_DADDR_TO_AGBNO(mp,d)        xfs_daddr_to_agbno(mp,d)
+#else
+
+static inline xfs_agblock_t XFS_DADDR_TO_AGBNO(xfs_mount_t *mp, xfs_daddr_t d)
+{
+        d = XFS_BB_TO_FSBT(mp, d);
+        return (xfs_agblock_t) do_div(d, mp->m_sb.sb_agblocks);
+}
+
+#endif
+
+/*
+ * This structure is for use by the xfs_mod_incore_sb_batch() routine.
+ */
+typedef struct xfs_mod_sb {
+	xfs_sb_field_t	msb_field;	/* Field to modify, see below */
+	int		msb_delta;	/* change to make to the specified field */
+} xfs_mod_sb_t;
+
+#define	XFS_MOUNT_ILOCK(mp)	mutex_lock(&((mp)->m_ilock), PINOD)
+#define	XFS_MOUNT_IUNLOCK(mp)	mutex_unlock(&((mp)->m_ilock))
+#define	XFS_SB_LOCK(mp)		mutex_spinlock(&(mp)->m_sb_lock)
+#define	XFS_SB_UNLOCK(mp,s)	mutex_spinunlock(&(mp)->m_sb_lock,(s))
+
+void		xfs_mod_sb(xfs_trans_t *, __int64_t);
+xfs_mount_t	*xfs_mount_init(void);
+void		xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
+int		xfs_mountfs(struct vfs *, xfs_mount_t *mp, dev_t, int);
+int		xfs_mountargs(struct mounta *, struct xfs_args *);
+
+int		xfs_unmountfs(xfs_mount_t *, int, struct cred *);
+void		xfs_unmountfs_close(xfs_mount_t *, int, struct cred *);
+int             xfs_unmountfs_writesb(xfs_mount_t *);
+int             xfs_unmount_flush(xfs_mount_t *, int);
+int		xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
+int		xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int);
+int		xfs_readsb(xfs_mount_t *mp, dev_t);
+struct xfs_buf	*xfs_getsb(xfs_mount_t *, int);
+void            xfs_freesb(xfs_mount_t *);
+void		xfs_force_shutdown(struct xfs_mount *, int);
+int		xfs_syncsub(xfs_mount_t *, int, int, int *);
+void		xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t, __int64_t);
+extern	struct vfsops xfs_vfsops;
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_MOUNT_H__ */
diff --git a/include/xfs_quota.h b/include/xfs_quota.h
new file mode 100644
index 000000000..794b90ee2
--- /dev/null
+++ b/include/xfs_quota.h
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_QUOTA_H__
+#define __XFS_QUOTA_H__
+
+/* 
+ * We use only 16-bit prid's in the inode, not the 64-bit version in the proc.
+ * uid_t is hard-coded to 32 bits in the inode. Hence, an 'id' in a dquot is
+ * 32 bits..
+ */
+typedef __int32_t	xfs_dqid_t;
+/*
+ * Eventhough users may not have quota limits occupying all 64-bits, 
+ * they may need 64-bit accounting. Hence, 64-bit quota-counters,
+ * and quota-limits. This is a waste in the common case, but heh ...
+ */
+typedef __uint64_t	xfs_qcnt_t;
+typedef __uint16_t      xfs_qwarncnt_t;
+
+/* 
+ * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
+ */
+#define XFS_UQUOTA_ACCT	0x0001  /* user quota accounting ON */
+#define XFS_UQUOTA_ENFD	0x0002  /* user quota limits enforced */
+#define XFS_UQUOTA_CHKD	0x0004  /* quotacheck run on usr quotas */
+#define XFS_PQUOTA_ACCT	0x0008  /* project quota accounting ON */
+#define XFS_PQUOTA_ENFD	0x0010  /* proj quota limits enforced */
+#define XFS_PQUOTA_CHKD	0x0020  /* quotacheck run on prj quotas */
+
+/* 
+ * Incore only flags for quotaoff - these bits get cleared when quota(s)
+ * are in the process of getting turned off. These flags are in m_qflags but
+ * never in sb_qflags.
+ */
+#define XFS_UQUOTA_ACTIVE	0x0040  /* uquotas are being turned off */
+#define XFS_PQUOTA_ACTIVE	0x0080  /* pquotas are being turned off */
+
+/*
+ * Typically, we turn quotas off if we weren't explicitly asked to 
+ * mount quotas. This is the mount option not to do that.
+ * This option is handy in the miniroot, when trying to mount /root.
+ * We can't really know what's in /etc/fstab until /root is already mounted!
+ * This stops quotas getting turned off in the root filesystem everytime
+ * the system boots up a miniroot.
+ */
+#define XFS_QUOTA_MAYBE		0x0100 /* Turn quotas on if SB has quotas on */
+
+/*
+ * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
+ * quota will be not be switched off as long as that inode lock is held.
+ */
+#define XFS_IS_QUOTA_ON(mp)  	((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
+						   XFS_PQUOTA_ACTIVE))
+#define XFS_IS_UQUOTA_ON(mp)	((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
+#define XFS_IS_PQUOTA_ON(mp) 	((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
+
+/*
+ * Flags to tell various functions what to do. Not all of these are meaningful
+ * to a single function. None of these XFS_QMOPT_* flags are meant to have
+ * persistent values (ie. their values can and will change between versions)
+ */
+#define XFS_QMOPT_DQLOCK	0x0000001 /* dqlock */
+#define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
+#define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
+#define XFS_QMOPT_PQUOTA	0x0000008 /* proj dquot requested */
+#define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
+#define XFS_QMOPT_DQSUSER	0x0000020 /* don't cache super users dquot */
+#define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
+#define XFS_QMOPT_QUOTAOFF	0x0000080 /* quotas are being turned off */
+#define XFS_QMOPT_UMOUNTING	0x0000100 /* filesys is being unmounted */
+#define XFS_QMOPT_DOLOG		0x0000200 /* log buf changes (in quotacheck) */
+#define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if necessary */
+#define XFS_QMOPT_ILOCKED	0x0000800 /* inode is already locked (excl) */
+#define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot, if damaged. */
+
+/* 
+ * flags to xfs_trans_mod_dquot to indicate which field needs to be
+ * modified.
+ */
+#define XFS_QMOPT_RES_REGBLKS 	0x0010000
+#define XFS_QMOPT_RES_RTBLKS	0x0020000
+#define XFS_QMOPT_BCOUNT	0x0040000
+#define XFS_QMOPT_ICOUNT	0x0080000
+#define XFS_QMOPT_RTBCOUNT	0x0100000
+#define XFS_QMOPT_DELBCOUNT	0x0200000
+#define XFS_QMOPT_DELRTBCOUNT	0x0400000
+#define XFS_QMOPT_RES_INOS	0x0800000
+
+/*
+ * flags for dqflush and dqflush_all.
+ */
+#define XFS_QMOPT_SYNC		0x1000000
+#define XFS_QMOPT_ASYNC		0x2000000
+#define XFS_QMOPT_DELWRI	0x4000000
+
+/* 
+ * flags to xfs_trans_mod_dquot.
+ */
+#define XFS_TRANS_DQ_RES_BLKS	XFS_QMOPT_RES_REGBLKS
+#define XFS_TRANS_DQ_RES_RTBLKS	XFS_QMOPT_RES_RTBLKS
+#define XFS_TRANS_DQ_RES_INOS	XFS_QMOPT_RES_INOS
+#define XFS_TRANS_DQ_BCOUNT	XFS_QMOPT_BCOUNT
+#define XFS_TRANS_DQ_DELBCOUNT	XFS_QMOPT_DELBCOUNT
+#define XFS_TRANS_DQ_ICOUNT	XFS_QMOPT_ICOUNT
+#define XFS_TRANS_DQ_RTBCOUNT	XFS_QMOPT_RTBCOUNT
+#define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
+
+
+#define XFS_QMOPT_QUOTALL	(XFS_QMOPT_UQUOTA|XFS_QMOPT_PQUOTA)
+#define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
+
+/*
+ * This check is done typically without holding the inode lock;
+ * that may seem racey, but it is harmless in the context that it is used.
+ * The inode cannot go inactive as long a reference is kept, and 
+ * therefore if dquot(s) were attached, they'll stay consistent.
+ * If, for example, the ownership of the inode changes while
+ * we didnt have the inode locked, the appropriate dquot(s) will be
+ * attached atomically.
+ */
+#define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\
+				     (ip)->i_udquot == NULL) || \
+				    (XFS_IS_PQUOTA_ON(mp) && \
+				     (ip)->i_pdquot == NULL))
+
+#define XFS_QM_NEED_QUOTACHECK(mp) ((XFS_IS_UQUOTA_ON(mp) && \
+				     (mp->m_sb.sb_qflags & \
+				      XFS_UQUOTA_CHKD) == 0) || \
+				    (XFS_IS_PQUOTA_ON(mp) && \
+				     (mp->m_sb.sb_qflags & \
+				      XFS_PQUOTA_CHKD) == 0))
+
+#define XFS_MOUNT_QUOTA_ALL	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+				 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
+				 XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD)
+#define XFS_MOUNT_QUOTA_MASK	(XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \
+				 XFS_PQUOTA_ACTIVE)
+
+#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
+
+
+#ifdef __KERNEL__
+/*
+ * External Interface to the XFS disk quota subsystem.
+ */
+struct	bhv_desc;
+struct  vfs;
+struct  xfs_disk_dquot;
+struct  xfs_dqhash;
+struct  xfs_dquot;
+struct  xfs_inode;
+struct  xfs_mount;
+struct  xfs_trans;
+
+/*
+ * Quota Manager Interface.
+ */
+extern struct xfs_qm   *xfs_qm_init(void);
+extern void 		xfs_qm_destroy(struct xfs_qm *);
+extern int		xfs_qm_dqflush_all(struct xfs_mount *, int);
+extern int		xfs_qm_dqattach(struct xfs_inode *, uint);
+extern int		xfs_qm_dqpurge_all(struct xfs_mount *, uint);
+extern void		xfs_qm_mount_quotainit(struct xfs_mount *, uint);
+extern void		xfs_qm_unmount_quotadestroy(struct xfs_mount *);
+extern int		xfs_qm_mount_quotas(struct xfs_mount *);
+extern int 		xfs_qm_unmount_quotas(struct xfs_mount *);
+extern void		xfs_qm_dqdettach_inode(struct xfs_inode *);
+extern int 		xfs_qm_sync(struct xfs_mount *, short);
+
+
+/*
+ * system call interface
+ */
+extern int		xfs_quotactl(xfs_mount_t *, struct vfs *, int, int,
+				     int, xfs_caddr_t);
+
+/*
+ * dquot interface.
+ */
+extern void		xfs_dqlock(struct xfs_dquot *);
+extern void		xfs_dqunlock(struct xfs_dquot *);
+extern void		xfs_dqunlock_nonotify(struct xfs_dquot *);
+extern void		xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
+extern void 		xfs_qm_dqput(struct xfs_dquot *);
+extern void 		xfs_qm_dqrele(struct xfs_dquot *);
+extern xfs_dqid_t	xfs_qm_dqid(struct xfs_dquot *);
+extern int		xfs_qm_dqget(struct xfs_mount *, 
+				     struct xfs_inode *, xfs_dqid_t,
+				      uint, uint, struct xfs_dquot **);
+extern int 		xfs_qm_dqcheck(struct xfs_disk_dquot *, 
+				       xfs_dqid_t, uint, uint, char *);
+
+/*
+ * Vnodeops specific code that should actually be _in_ xfs_vnodeops.c, but
+ * is here because it's nicer to keep vnodeops (therefore, XFS) lean 
+ * and clean.
+ */
+extern struct xfs_dquot *	xfs_qm_vop_chown(struct xfs_trans *, 
+						 struct xfs_inode *, 
+						 struct xfs_dquot **,
+						 struct xfs_dquot *);
+extern int		xfs_qm_vop_dqalloc(struct xfs_mount *,
+					   struct xfs_inode *,
+					   uid_t, xfs_prid_t, uint,
+					   struct xfs_dquot	**,
+					   struct xfs_dquot	**);
+
+extern int		xfs_qm_vop_chown_dqalloc(struct xfs_mount *,
+						 struct xfs_inode *,
+						 int, uid_t, xfs_prid_t,
+						 struct xfs_dquot **,
+						 struct xfs_dquot **);
+
+extern int		xfs_qm_vop_chown_reserve(struct xfs_trans *,
+						 struct xfs_inode *,
+						 struct xfs_dquot *,
+						 struct xfs_dquot *,
+						 uint);
+
+extern int		xfs_qm_vop_rename_dqattach(struct xfs_inode **);
+extern void		xfs_qm_vop_dqattach_and_dqmod_newinode(
+						struct xfs_trans *,
+						struct xfs_inode *,
+						struct xfs_dquot *,	
+						struct xfs_dquot *);
+
+
+/*
+ * Dquot Transaction interface
+ */
+extern void 		xfs_trans_alloc_dqinfo(struct xfs_trans *);
+extern void 		xfs_trans_free_dqinfo(struct xfs_trans *);
+extern void		xfs_trans_dup_dqinfo(struct xfs_trans *, 
+					     struct xfs_trans *);
+extern void		xfs_trans_mod_dquot(struct xfs_trans *, 
+					    struct xfs_dquot *,
+					    uint, long);
+extern int		xfs_trans_mod_dquot_byino(struct xfs_trans *, 
+						  struct xfs_inode *,
+						  uint, long);
+extern void		xfs_trans_apply_dquot_deltas(struct xfs_trans *);
+extern void		xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+
+extern int		xfs_trans_reserve_quota_nblks(struct xfs_trans *,
+						      struct xfs_inode *,
+						      long, long, uint);
+
+
+extern int		xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
+							 struct xfs_dquot *,
+							 struct xfs_dquot *,
+							 long, long, uint);
+extern void		xfs_trans_log_dquot(struct xfs_trans *,
+					    struct xfs_dquot *);
+extern void		xfs_trans_dqjoin(struct xfs_trans *,
+					 struct xfs_dquot *);
+extern void		xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
+
+/* 
+ * Regular disk block quota reservations 
+ */
+#define 	xfs_trans_reserve_blkquota(tp, ip, nblks) \
+xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, XFS_QMOPT_RES_REGBLKS)
+						  
+#define 	xfs_trans_unreserve_blkquota(tp, ip, nblks) \
+xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), 0, XFS_QMOPT_RES_REGBLKS)
+
+#define 	xfs_trans_reserve_quota(tp, udq, pdq, nb, ni, f) \
+xfs_trans_reserve_quota_bydquots(tp, udq, pdq, nb, ni, f|XFS_QMOPT_RES_REGBLKS) 
+
+#define 	xfs_trans_unreserve_quota(tp, ud, pd, b, i, f) \
+xfs_trans_reserve_quota_bydquots(tp, ud, pd, -(b), -(i), f|XFS_QMOPT_RES_REGBLKS)
+
+/*
+ * Realtime disk block quota reservations 
+ */
+#define 	xfs_trans_reserve_rtblkquota(mp, tp, ip, nblks) \
+xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, XFS_QMOPT_RES_RTBLKS)
+						  
+#define 	xfs_trans_unreserve_rtblkquota(tp, ip, nblks) \
+xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), 0, XFS_QMOPT_RES_RTBLKS)
+
+#define 	xfs_trans_reserve_rtquota(mp, tp, uq, pq, blks, f) \
+xfs_trans_reserve_quota_bydquots(mp, tp, uq, pq, blks, 0, f|XFS_QMOPT_RES_RTBLKS) 
+
+#define 	xfs_trans_unreserve_rtquota(tp, uq, pq, blks) \
+xfs_trans_reserve_quota_bydquots(tp, uq, pq, -(blks), XFS_QMOPT_RES_RTBLKS)
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_QUOTA_H__ */
diff --git a/include/xfs_rtalloc.h b/include/xfs_rtalloc.h
new file mode 100644
index 000000000..be2b88a34
--- /dev/null
+++ b/include/xfs_rtalloc.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_RTALLOC_H__
+#define	__XFS_RTALLOC_H__
+
+struct xfs_mount;
+struct xfs_trans;
+
+/* Min and max rt extent sizes, specified in bytes */
+#define	XFS_MAX_RTEXTSIZE	(1024 * 1024 * 1024)	/* 1GB */
+#define	XFS_DFL_RTEXTSIZE	(64 * 1024)	        /* 64KB */
+#define	XFS_MIN_RTEXTSIZE	(4 * 1024)		/* 4KB */
+
+/*
+ * Constants for bit manipulations.
+ */
+#define	XFS_NBBYLOG	3		/* log2(NBBY) */
+#define	XFS_WORDLOG	2		/* log2(sizeof(xfs_rtword_t)) */
+#define	XFS_NBWORDLOG	(XFS_NBBYLOG + XFS_WORDLOG)
+#define	XFS_NBWORD	(1 << XFS_NBWORDLOG)
+#define	XFS_WORDMASK	((1 << XFS_WORDLOG) - 1)
+
+#define	XFS_BLOCKSIZE(mp)	((mp)->m_sb.sb_blocksize)
+#define	XFS_BLOCKMASK(mp)	((mp)->m_blockmask)
+#define	XFS_BLOCKWSIZE(mp)	((mp)->m_blockwsize)
+#define	XFS_BLOCKWMASK(mp)	((mp)->m_blockwmask)
+
+/*
+ * Summary and bit manipulation macros.
+ */
+#define	XFS_SUMOFFS(mp,ls,bb)	((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
+#define	XFS_SUMOFFSTOBLOCK(mp,s)	\
+	(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
+#define	XFS_SUMPTR(mp,bp,so)	\
+	((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \
+		(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
+
+#define	XFS_BITTOBLOCK(mp,bi)	((bi) >> (mp)->m_blkbit_log)
+#define	XFS_BLOCKTOBIT(mp,bb)	((bb) << (mp)->m_blkbit_log)
+#define	XFS_BITTOWORD(mp,bi)	\
+	((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
+
+#define	XFS_RTMIN(a,b)	((a) < (b) ? (a) : (b))
+#define	XFS_RTMAX(a,b)	((a) > (b) ? (a) : (b))
+
+#define	XFS_RTLOBIT(w)	xfs_lowbit32(w)
+#define	XFS_RTHIBIT(w)	xfs_highbit32(w)
+
+#if XFS_BIG_FILESYSTEMS
+#define	XFS_RTBLOCKLOG(b)	xfs_highbit64(b)
+#else
+#define	XFS_RTBLOCKLOG(b)	xfs_highbit32(b)
+#endif
+
+/*
+ * Function prototypes for exported functions.
+ */
+
+/*
+ * Allocate an extent in the realtime subvolume, with the usual allocation
+ * parameters.  The length units are all in realtime extents, as is the
+ * result block number.
+ */
+int					/* error */
+xfs_rtallocate_extent(
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_rtblock_t		bno,	/* starting block number to allocate */
+	xfs_extlen_t		minlen,	/* minimum length to allocate */
+	xfs_extlen_t		maxlen,	/* maximum length to allocate */
+	xfs_extlen_t		*len,	/* out: actual length allocated */
+	xfs_alloctype_t		type,	/* allocation type XFS_ALLOCTYPE... */
+	int			wasdel,	/* was a delayed allocation extent */
+	xfs_extlen_t		prod,	/* extent product factor */
+	xfs_rtblock_t		*rtblock); /* out: start block allocated */
+
+/*
+ * Free an extent in the realtime subvolume.  Length is expressed in
+ * realtime extents, as is the block number.
+ */
+int					/* error */
+xfs_rtfree_extent(
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_rtblock_t		bno,	/* starting block number to free */
+	xfs_extlen_t		len);	/* length of extent freed */
+
+/*
+ * Initialize realtime fields in the mount structure.
+ */
+int					/* error */
+xfs_rtmount_init(
+	struct xfs_mount	*mp);	/* file system mount structure */
+
+/*
+ * Get the bitmap and summary inodes into the mount structure 
+ * at mount time.
+ */
+int					/* error */
+xfs_rtmount_inodes(
+	struct xfs_mount	*mp);	/* file system mount structure */
+
+/*
+ * Pick an extent for allocation at the start of a new realtime file.
+ * Use the sequence number stored in the atime field of the bitmap inode.
+ * Translate this to a fraction of the rtextents, and return the product
+ * of rtextents and the fraction.
+ * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ...
+ */
+int					/* error */
+xfs_rtpick_extent(
+	struct xfs_mount	*mp,	/* file system mount point */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_extlen_t		len,	/* allocation length (rtextents) */
+	xfs_rtblock_t		*pick);	/* result rt extent */
+
+#ifdef XFSDEBUG
+/*
+ * Debug code: print out the value of a range in the bitmap.
+ */
+void
+xfs_rtprint_range(
+	struct xfs_mount	*mp,	/* file system mount structure */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_rtblock_t		start,	/* starting block to print */
+	xfs_extlen_t		len);	/* length to print */
+
+/*
+ * Debug code: print the summary file.
+ */
+void
+xfs_rtprint_summary(
+	struct xfs_mount	*mp,	/* file system mount structure */
+	struct xfs_trans	*tp);	/* transaction pointer */
+#endif	/* XFSDEBUG */
+
+#endif	/* __XFS_RTALLOC_H__ */
diff --git a/include/xfs_sb.h b/include/xfs_sb.h
new file mode 100644
index 000000000..6526d107e
--- /dev/null
+++ b/include/xfs_sb.h
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SB_H__
+#define	__XFS_SB_H__
+
+/*
+ * Super block
+ * Fits into a 512-byte buffer at daddr_t 0 of each allocation group.
+ * Only the first of these is ever updated except during growfs.
+ */
+
+struct xfs_buf;
+struct xfs_mount;
+
+#define	XFS_SB_MAGIC		0x58465342	/* 'XFSB' */
+#define	XFS_SB_VERSION_1	1		/* 5.3, 6.0.1, 6.1 */
+#define	XFS_SB_VERSION_2	2		/* 6.2 - attributes */
+#define	XFS_SB_VERSION_3	3		/* 6.2 - new inode version */
+#define	XFS_SB_VERSION_4	4		/* 6.2+ - bitmask version */
+#define	XFS_SB_VERSION_NUMBITS		0x000f
+#define	XFS_SB_VERSION_ALLFBITS		0xfff0
+#define	XFS_SB_VERSION_SASHFBITS	0xf000
+#define	XFS_SB_VERSION_REALFBITS	0x0ff0
+#define	XFS_SB_VERSION_ATTRBIT		0x0010
+#define	XFS_SB_VERSION_NLINKBIT		0x0020
+#define	XFS_SB_VERSION_QUOTABIT		0x0040
+#define	XFS_SB_VERSION_ALIGNBIT		0x0080
+#define	XFS_SB_VERSION_DALIGNBIT	0x0100
+#define	XFS_SB_VERSION_SHAREDBIT	0x0200
+#define	XFS_SB_VERSION_EXTFLGBIT	0x1000
+#define	XFS_SB_VERSION_DIRV2BIT		0x2000
+#define	XFS_SB_VERSION_OKSASHFBITS	\
+	(XFS_SB_VERSION_EXTFLGBIT | \
+	 XFS_SB_VERSION_DIRV2BIT)
+#define	XFS_SB_VERSION_OKREALFBITS	\
+	(XFS_SB_VERSION_ATTRBIT | \
+	 XFS_SB_VERSION_NLINKBIT | \
+	 XFS_SB_VERSION_QUOTABIT | \
+	 XFS_SB_VERSION_ALIGNBIT | \
+	 XFS_SB_VERSION_DALIGNBIT | \
+	 XFS_SB_VERSION_SHAREDBIT)
+#define	XFS_SB_VERSION_OKSASHBITS	\
+	(XFS_SB_VERSION_NUMBITS | \
+	 XFS_SB_VERSION_REALFBITS | \
+	 XFS_SB_VERSION_OKSASHFBITS)
+#define	XFS_SB_VERSION_OKREALBITS	\
+	(XFS_SB_VERSION_NUMBITS | \
+	 XFS_SB_VERSION_OKREALFBITS | \
+	 XFS_SB_VERSION_OKSASHFBITS)
+#define	XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2)	\
+	(((ia) || (dia) || (extflag) || (dirv2)) ? \
+		(XFS_SB_VERSION_4 | \
+		 ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \
+		 ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \
+		 ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \
+		 ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0)) : \
+		XFS_SB_VERSION_1)
+
+typedef struct xfs_sb
+{
+	__uint32_t	sb_magicnum;	/* magic number == XFS_SB_MAGIC */
+	__uint32_t	sb_blocksize;	/* logical block size, bytes */
+	xfs_drfsbno_t	sb_dblocks;	/* number of data blocks */
+	xfs_drfsbno_t	sb_rblocks;	/* number of realtime blocks */
+	xfs_drtbno_t	sb_rextents;	/* number of realtime extents */
+	uuid_t		sb_uuid;	/* file system unique id */
+	xfs_dfsbno_t	sb_logstart;	/* starting block of log if internal */
+	xfs_ino_t	sb_rootino;	/* root inode number */
+	xfs_ino_t	sb_rbmino;	/* bitmap inode for realtime extents */
+	xfs_ino_t	sb_rsumino;	/* summary inode for rt bitmap */
+	xfs_agblock_t	sb_rextsize;	/* realtime extent size, blocks */
+	xfs_agblock_t	sb_agblocks;	/* size of an allocation group */
+	xfs_agnumber_t	sb_agcount;	/* number of allocation groups */
+	xfs_extlen_t	sb_rbmblocks;	/* number of rt bitmap blocks */
+	xfs_extlen_t	sb_logblocks;	/* number of log blocks */
+	__uint16_t	sb_versionnum;	/* header version == XFS_SB_VERSION */
+	__uint16_t	sb_sectsize;	/* volume sector size, bytes */
+	__uint16_t	sb_inodesize;	/* inode size, bytes */
+	__uint16_t	sb_inopblock;	/* inodes per block */
+	char		sb_fname[12];	/* file system name */
+	__uint8_t	sb_blocklog;	/* log2 of sb_blocksize */
+	__uint8_t	sb_sectlog;	/* log2 of sb_sectsize */
+	__uint8_t	sb_inodelog;	/* log2 of sb_inodesize */
+	__uint8_t	sb_inopblog;	/* log2 of sb_inopblock */
+	__uint8_t	sb_agblklog;	/* log2 of sb_agblocks (rounded up) */
+	__uint8_t	sb_rextslog;	/* log2 of sb_rextents */
+	__uint8_t	sb_inprogress;	/* mkfs is in progress, don't mount */
+	__uint8_t	sb_imax_pct;	/* max % of fs for inode space */
+					/* statistics */
+	/*
+	 * These fields must remain contiguous.  If you really
+	 * want to change their layout, make sure you fix the
+	 * code in xfs_trans_apply_sb_deltas().
+	 */
+	__uint64_t	sb_icount;	/* allocated inodes */
+	__uint64_t	sb_ifree;	/* free inodes */
+	__uint64_t	sb_fdblocks;	/* free data blocks */
+	__uint64_t	sb_frextents;	/* free realtime extents */
+	/*
+	 * End contiguous fields.
+	 */
+	xfs_ino_t	sb_uquotino;	/* user quota inode */
+	xfs_ino_t	sb_pquotino;	/* project quota inode */
+	__uint16_t	sb_qflags;	/* quota flags */
+	__uint8_t	sb_flags;	/* misc. flags */
+	__uint8_t	sb_shared_vn;	/* shared version number */
+	xfs_extlen_t	sb_inoalignmt;	/* inode chunk alignment, fsblocks */
+	__uint32_t	sb_unit;	/* stripe or raid unit */
+	__uint32_t	sb_width;	/* stripe or raid width */	
+	__uint8_t	sb_dirblklog;	/* log2 of dir block size (fsbs) */
+        __uint8_t       sb_dummy[7];    /* padding */
+} xfs_sb_t;
+
+/*
+ * Sequence number values for the fields.
+ */
+typedef enum {
+	XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
+	XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
+	XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
+	XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
+	XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
+	XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
+	XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
+	XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
+	XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
+	XFS_SBS_PQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
+	XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
+        XFS_SBS_DUMMY,
+	XFS_SBS_FIELDCOUNT
+} xfs_sb_field_t;
+
+/*
+ * Mask values, defined based on the xfs_sb_field_t values.
+ * Only define the ones we're using.
+ */
+#define	XFS_SB_MVAL(x)		(1LL << XFS_SBS_ ## x)
+#define	XFS_SB_UUID		XFS_SB_MVAL(UUID)
+#define	XFS_SB_FNAME		XFS_SB_MVAL(FNAME)
+#define	XFS_SB_ROOTINO		XFS_SB_MVAL(ROOTINO)
+#define	XFS_SB_RBMINO		XFS_SB_MVAL(RBMINO)
+#define	XFS_SB_RSUMINO		XFS_SB_MVAL(RSUMINO)
+#define	XFS_SB_VERSIONNUM	XFS_SB_MVAL(VERSIONNUM)
+#define XFS_SB_UQUOTINO		XFS_SB_MVAL(UQUOTINO)
+#define XFS_SB_PQUOTINO		XFS_SB_MVAL(PQUOTINO)
+#define XFS_SB_QFLAGS		XFS_SB_MVAL(QFLAGS)
+#define XFS_SB_SHARED_VN	XFS_SB_MVAL(SHARED_VN)
+#define XFS_SB_UNIT		XFS_SB_MVAL(UNIT)
+#define XFS_SB_WIDTH		XFS_SB_MVAL(WIDTH)
+#define	XFS_SB_NUM_BITS		((int)XFS_SBS_FIELDCOUNT)
+#define	XFS_SB_ALL_BITS		((1LL << XFS_SB_NUM_BITS) - 1)
+#define	XFS_SB_MOD_BITS		\
+	(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
+	 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_PQUOTINO | \
+	 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH)
+
+/*
+ * Misc. Flags - warning - these will be cleared by xfs_repair unless
+ * a feature bit is set when the flag is used.
+ */
+#define XFS_SBF_NOFLAGS		0x00	/* no flags set */
+#define XFS_SBF_READONLY	0x01	/* only read-only mounts allowed */
+
+/*
+ * define max. shared version we can interoperate with
+ */
+#define XFS_SB_MAX_SHARED_VN	0
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_NUM)
+int xfs_sb_version_num(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_NUM(sbp)	xfs_sb_version_num(sbp)
+#else
+#define	XFS_SB_VERSION_NUM(sbp)	((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_GOOD_VERSION)
+int xfs_sb_good_version(xfs_sb_t *sbp);
+#define	XFS_SB_GOOD_VERSION(sbp)	xfs_sb_good_version(sbp)
+#else
+#define	XFS_SB_GOOD_VERSION_INT(sbp)	\
+	((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \
+	  ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \
+	 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	  !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS)
+#ifdef __KERNEL__
+#define	XFS_SB_GOOD_VERSION(sbp)	\
+	(XFS_SB_GOOD_VERSION_INT(sbp) && \
+	  (sbp)->sb_shared_vn <= XFS_SB_MAX_SHARED_VN) ))
+#else
+/*
+ * extra 2 paren's here (( to unconfuse paren-matching editors
+ * like vi because XFS_SB_GOOD_VERSION_INT is a partial expression
+ * and the two XFS_SB_GOOD_VERSION's each 2 more close paren's to
+ * complete the expression.
+ */
+#define XFS_SB_GOOD_VERSION(sbp)	\
+	(XFS_SB_GOOD_VERSION_INT(sbp) && \
+	  (!((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \
+	   (sbp)->sb_shared_vn <= XFS_SB_MAX_SHARED_VN)) ))
+#endif /* __KERNEL__ */
+#endif
+
+#define	XFS_SB_GOOD_SASH_VERSION(sbp)	\
+	((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \
+	  ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \
+	 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	  !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKSASHBITS)))
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_TONEW)
+unsigned xfs_sb_version_tonew(unsigned v);
+#define	XFS_SB_VERSION_TONEW(v)	xfs_sb_version_tonew(v)
+#else
+#define	XFS_SB_VERSION_TONEW(v)	\
+	((((v) == XFS_SB_VERSION_1) ? \
+		0 : \
+		(((v) == XFS_SB_VERSION_2) ? \
+			XFS_SB_VERSION_ATTRBIT : \
+			(XFS_SB_VERSION_ATTRBIT | XFS_SB_VERSION_NLINKBIT))) | \
+	 XFS_SB_VERSION_4)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_TOOLD)
+unsigned xfs_sb_version_toold(unsigned v);
+#define	XFS_SB_VERSION_TOOLD(v)	xfs_sb_version_toold(v)
+#else
+#define	XFS_SB_VERSION_TOOLD(v)	\
+	(((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \
+		0 : \
+		(((v) & XFS_SB_VERSION_NLINKBIT) ? \
+			XFS_SB_VERSION_3 : \
+			(((v) & XFS_SB_VERSION_ATTRBIT) ?  \
+				XFS_SB_VERSION_2 : \
+				XFS_SB_VERSION_1)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASATTR)
+int xfs_sb_version_hasattr(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_HASATTR(sbp)	xfs_sb_version_hasattr(sbp)
+#else
+#define	XFS_SB_VERSION_HASATTR(sbp)	\
+	(((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \
+	 ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
+	 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	  ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDATTR)
+void xfs_sb_version_addattr(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_ADDATTR(sbp)	xfs_sb_version_addattr(sbp)
+#else
+#define	XFS_SB_VERSION_ADDATTR(sbp)	\
+	((sbp)->sb_versionnum = \
+	 (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \
+		XFS_SB_VERSION_2 : \
+		((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) ? \
+			((sbp)->sb_versionnum | XFS_SB_VERSION_ATTRBIT) : \
+			(XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT))))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASNLINK)
+int xfs_sb_version_hasnlink(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_HASNLINK(sbp)	xfs_sb_version_hasnlink(sbp)
+#else
+#define	XFS_SB_VERSION_HASNLINK(sbp)	\
+	(((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
+	 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	  ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDNLINK)
+void xfs_sb_version_addnlink(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_ADDNLINK(sbp)	xfs_sb_version_addnlink(sbp)
+#else
+#define	XFS_SB_VERSION_ADDNLINK(sbp)	\
+	((sbp)->sb_versionnum = \
+	 ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \
+		XFS_SB_VERSION_3 : \
+		((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASQUOTA)
+int xfs_sb_version_hasquota(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_HASQUOTA(sbp)	xfs_sb_version_hasquota(sbp)
+#else
+#define	XFS_SB_VERSION_HASQUOTA(sbp)	\
+	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	 ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDQUOTA)
+void xfs_sb_version_addquota(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_ADDQUOTA(sbp)	xfs_sb_version_addquota(sbp)
+#else
+#define	XFS_SB_VERSION_ADDQUOTA(sbp)	\
+	((sbp)->sb_versionnum = \
+	 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \
+		((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \
+		(XFS_SB_VERSION_TONEW((sbp)->sb_versionnum) | \
+		 XFS_SB_VERSION_QUOTABIT)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASALIGN)
+int xfs_sb_version_hasalign(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_HASALIGN(sbp)	xfs_sb_version_hasalign(sbp)
+#else
+#define	XFS_SB_VERSION_HASALIGN(sbp)	\
+	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	 ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBALIGN)
+void xfs_sb_version_subalign(xfs_sb_t *sbp);
+#define	XFS_SB_VERSION_SUBALIGN(sbp)	xfs_sb_version_subalign(sbp)
+#else
+#define	XFS_SB_VERSION_SUBALIGN(sbp)	\
+	((sbp)->sb_versionnum = \
+	 XFS_SB_VERSION_TOOLD((sbp)->sb_versionnum & ~XFS_SB_VERSION_ALIGNBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASDALIGN)
+int xfs_sb_version_hasdalign(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_HASDALIGN(sbp)	xfs_sb_version_hasdalign(sbp)
+#else
+#define XFS_SB_VERSION_HASDALIGN(sbp)	\
+	((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	 ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDDALIGN)
+int xfs_sb_version_adddalign(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_ADDDALIGN(sbp)	xfs_sb_version_adddalign(sbp)
+#else
+#define XFS_SB_VERSION_ADDDALIGN(sbp)	\
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum | XFS_SB_VERSION_DALIGNBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASSHARED)
+int xfs_sb_version_hasshared(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_HASSHARED(sbp)	xfs_sb_version_hasshared(sbp)
+#else
+#define XFS_SB_VERSION_HASSHARED(sbp)	\
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	 ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDSHARED)
+int xfs_sb_version_addshared(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_ADDSHARED(sbp)	xfs_sb_version_addshared(sbp)
+#else
+#define XFS_SB_VERSION_ADDSHARED(sbp)	\
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum | XFS_SB_VERSION_SHAREDBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBSHARED)
+int xfs_sb_version_subshared(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_SUBSHARED(sbp)	xfs_sb_version_subshared(sbp)
+#else
+#define XFS_SB_VERSION_SUBSHARED(sbp)	\
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum & ~XFS_SB_VERSION_SHAREDBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASDIRV2)
+int xfs_sb_version_hasdirv2(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_HASDIRV2(sbp)	xfs_sb_version_hasdirv2(sbp)
+#else
+#define XFS_SB_VERSION_HASDIRV2(sbp)	\
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	 ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASEXTFLGBIT)
+int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_HASEXTFLGBIT(sbp)	xfs_sb_version_hasextflgbit(sbp)
+#else
+#define XFS_SB_VERSION_HASEXTFLGBIT(sbp)	\
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+	 ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDEXTFLGBIT)
+int xfs_sb_version_addextflgbit(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp)	xfs_sb_version_addextflgbit(sbp)
+#else
+#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp)	\
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum | XFS_SB_VERSION_EXTFLGBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBEXTFLGBIT)
+int xfs_sb_version_subextflgbit(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp)	xfs_sb_version_subextflgbit(sbp)
+#else
+#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp)	\
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum & ~XFS_SB_VERSION_EXTFLGBIT))
+#endif
+
+/*
+ * end of superblock version macros
+ */
+
+#define	XFS_SB_DADDR	((xfs_daddr_t)0)		/* daddr in filesystem/ag */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_BLOCK)
+xfs_agblock_t xfs_sb_block(struct xfs_mount *mp);
+#define	XFS_SB_BLOCK(mp)	xfs_sb_block(mp)
+#else
+#define	XFS_SB_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_HDR_BLOCK)
+xfs_agblock_t xfs_hdr_block(struct xfs_mount *mp, xfs_daddr_t d);
+#define	XFS_HDR_BLOCK(mp,d)	xfs_hdr_block(mp,d)
+#else
+#define	XFS_HDR_BLOCK(mp,d)	((xfs_agblock_t)(XFS_BB_TO_FSBT(mp,d)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_FSB)
+xfs_fsblock_t xfs_daddr_to_fsb(struct xfs_mount *mp, xfs_daddr_t d);
+#define	XFS_DADDR_TO_FSB(mp,d)		xfs_daddr_to_fsb(mp,d)
+#else
+#define	XFS_DADDR_TO_FSB(mp,d) \
+	XFS_AGB_TO_FSB(mp, XFS_DADDR_TO_AGNO(mp,d), XFS_DADDR_TO_AGBNO(mp,d))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_DADDR)
+xfs_daddr_t xfs_fsb_to_daddr(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+#define	XFS_FSB_TO_DADDR(mp,fsbno)	xfs_fsb_to_daddr(mp,fsbno)
+#else
+#define	XFS_FSB_TO_DADDR(mp,fsbno) \
+	XFS_AGB_TO_DADDR(mp, XFS_FSB_TO_AGNO(mp,fsbno), \
+			 XFS_FSB_TO_AGBNO(mp,fsbno))
+#endif
+
+/*
+ * File system block to basic block conversions.
+ */
+#define	XFS_FSB_TO_BB(mp,fsbno)	((fsbno) << (mp)->m_blkbb_log)
+#define	XFS_BB_TO_FSB(mp,bb)	\
+	(((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log)
+#define	XFS_BB_TO_FSBT(mp,bb)	((bb) >> (mp)->m_blkbb_log)
+#define	XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1))
+
+/*
+ * File system block to byte conversions.
+ */
+#define	XFS_FSB_TO_B(mp,fsbno)	((xfs_fsize_t)(fsbno) << \
+				 (mp)->m_sb.sb_blocklog)
+#define	XFS_B_TO_FSB(mp,b)	\
+	((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
+#define	XFS_B_TO_FSBT(mp,b)	(((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
+#define	XFS_B_FSB_OFFSET(mp,b)	((b) & (mp)->m_blockmask)     
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_SBP)
+xfs_sb_t *xfs_buf_to_sbp(struct xfs_buf *bp);
+#define	XFS_BUF_TO_SBP(bp)	xfs_buf_to_sbp(bp)
+#else
+#define	XFS_BUF_TO_SBP(bp)	((xfs_sb_t *)XFS_BUF_PTR(bp))
+#endif
+
+#endif	/* __XFS_SB_H__ */
diff --git a/include/xfs_trans.h b/include/xfs_trans.h
new file mode 100644
index 000000000..49fbc0adf
--- /dev/null
+++ b/include/xfs_trans.h
@@ -0,0 +1,1000 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_TRANS_H__
+#define	__XFS_TRANS_H__
+
+/*
+ * This is the structure written in the log at the head of
+ * every transaction. It identifies the type and id of the
+ * transaction, and contains the number of items logged by
+ * the transaction so we know how many to expect during recovery.
+ *
+ * Do not change the below structure without redoing the code in
+ * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
+ */
+typedef struct xfs_trans_header {
+	uint		th_magic;		/* magic number */
+	uint		th_type;		/* transaction type */
+	__int32_t	th_tid;			/* transaction id (unused) */
+	uint		th_num_items;		/* num items logged by trans */
+} xfs_trans_header_t;
+
+#define	XFS_TRANS_HEADER_MAGIC	0x5452414e	/* TRAN */
+
+/*
+ * Log item types.
+ */
+#define	XFS_LI_5_3_BUF		0x1234	/* v1 bufs, 1-block inode buffers */
+#define	XFS_LI_5_3_INODE	0x1235	/* 1-block inode buffers */
+#define	XFS_LI_EFI		0x1236
+#define	XFS_LI_EFD		0x1237
+#define	XFS_LI_IUNLINK		0x1238
+#define	XFS_LI_6_1_INODE	0x1239	/* 4K non-aligned inode bufs */
+#define	XFS_LI_6_1_BUF		0x123a	/* v1, 4K inode buffers */
+#define	XFS_LI_INODE		0x123b	/* aligned ino chunks, var-size ibufs */
+#define	XFS_LI_BUF		0x123c	/* v2 bufs, variable sized inode bufs */
+#define	XFS_LI_DQUOT		0x123d
+#define	XFS_LI_QUOTAOFF		0x123e
+#define	XFS_LI_RPC		0x123f	/* CXFS RPC return info */
+
+/*
+ * Transaction types.  Used to distinguish types of buffers.
+ */
+#define XFS_TRANS_SETATTR_NOT_SIZE	1
+#define XFS_TRANS_SETATTR_SIZE		2
+#define XFS_TRANS_INACTIVE		3
+#define XFS_TRANS_CREATE		4
+#define XFS_TRANS_CREATE_TRUNC		5
+#define XFS_TRANS_TRUNCATE_FILE		6
+#define XFS_TRANS_REMOVE		7
+#define XFS_TRANS_LINK			8
+#define XFS_TRANS_RENAME		9
+#define XFS_TRANS_MKDIR			10
+#define XFS_TRANS_RMDIR			11
+#define XFS_TRANS_SYMLINK		12
+#define XFS_TRANS_SET_DMATTRS		13
+#define XFS_TRANS_GROWFS		14
+#define XFS_TRANS_STRAT_WRITE		15
+#define XFS_TRANS_DIOSTRAT		16
+#define	XFS_TRANS_WRITE_SYNC		17
+#define	XFS_TRANS_WRITEID		18
+#define	XFS_TRANS_ADDAFORK		19
+#define	XFS_TRANS_ATTRINVAL		20
+#define	XFS_TRANS_ATRUNCATE		21
+#define	XFS_TRANS_ATTR_SET		22
+#define	XFS_TRANS_ATTR_RM		23
+#define	XFS_TRANS_ATTR_FLAG		24
+#define	XFS_TRANS_CLEAR_AGI_BUCKET	25
+#define XFS_TRANS_QM_SBCHANGE		26
+/*
+ * Dummy entries since we use the transaction type to index into the
+ * trans_type[] in xlog_recover_print_trans_head()
+ */
+#define XFS_TRANS_DUMMY1		27
+#define XFS_TRANS_DUMMY2		28
+#define XFS_TRANS_QM_QUOTAOFF		29
+#define XFS_TRANS_QM_DQALLOC		30
+#define XFS_TRANS_QM_SETQLIM		31
+#define XFS_TRANS_QM_DQCLUSTER		32
+#define XFS_TRANS_QM_QINOCREATE		33
+#define XFS_TRANS_QM_QUOTAOFF_END	34
+#define XFS_TRANS_SB_UNIT		35
+#define XFS_TRANS_FSYNC_TS		36
+#define	XFS_TRANS_GROWFSRT_ALLOC	37
+#define	XFS_TRANS_GROWFSRT_ZERO		38
+#define	XFS_TRANS_GROWFSRT_FREE		39
+#define	XFS_TRANS_SWAPEXT		40
+/* new transaction types need to be reflected in xfs_logprint(8) */
+
+
+#ifdef __KERNEL__
+struct xfs_buf;
+struct buftarg;
+struct xfs_efd_log_item;
+struct xfs_efi_log_item;
+struct xfs_inode;
+struct xfs_item_ops;
+struct xfs_log_iovec;
+struct xfs_log_item;
+struct xfs_log_item_desc;
+struct xfs_mount;
+struct xfs_trans;
+struct xfs_dquot_acct;
+
+typedef struct xfs_ail_entry {
+	struct xfs_log_item	*ail_forw;	/* AIL forw pointer */
+	struct xfs_log_item	*ail_back;	/* AIL back pointer */
+} xfs_ail_entry_t;
+
+/*
+ * This structure is passed as a parameter to xfs_trans_push_ail()
+ * and is used to track the what LSN the waiting processes are
+ * waiting to become unused.
+ */
+typedef struct xfs_ail_ticket {
+	xfs_lsn_t		at_lsn;		/* lsn waitin for */
+	struct xfs_ail_ticket	*at_forw;	/* wait list ptr */
+	struct xfs_ail_ticket	*at_back;	/* wait list ptr */
+	sv_t			at_sema;	/* wait sema */
+} xfs_ail_ticket_t;
+
+
+typedef struct xfs_log_item {
+	xfs_ail_entry_t			li_ail;		/* AIL pointers */
+	xfs_lsn_t			li_lsn;		/* last on-disk lsn */
+	struct xfs_log_item_desc	*li_desc;	/* ptr to current desc*/
+	struct xfs_mount		*li_mountp;	/* ptr to fs mount */
+	uint				li_type;	/* item type */
+	uint				li_flags;	/* misc flags */
+	struct xfs_log_item		*li_bio_list;	/* buffer item list */
+	void				(*li_cb)(struct xfs_buf *,
+						 struct xfs_log_item *);
+							/* buffer item iodone */
+							/* callback func */
+	struct xfs_item_ops		*li_ops;	/* function list */
+} xfs_log_item_t;
+
+#define	XFS_LI_IN_AIL	0x1
+#define XFS_LI_ABORTED	0x2
+
+typedef struct xfs_item_ops {
+	uint (*iop_size)(xfs_log_item_t *);
+	void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
+	void (*iop_pin)(xfs_log_item_t *);
+	void (*iop_unpin)(xfs_log_item_t *);
+	void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
+	uint (*iop_trylock)(xfs_log_item_t *);
+	void (*iop_unlock)(xfs_log_item_t *);
+	xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
+	void (*iop_push)(xfs_log_item_t *);
+	void (*iop_abort)(xfs_log_item_t *);
+	void (*iop_pushbuf)(xfs_log_item_t *);
+	void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
+} xfs_item_ops_t;
+
+#define	IOP_SIZE(ip)		(*(ip)->li_ops->iop_size)(ip)
+#define	IOP_FORMAT(ip,vp)	(*(ip)->li_ops->iop_format)(ip, vp)
+#define	IOP_PIN(ip)		(*(ip)->li_ops->iop_pin)(ip)
+#define	IOP_UNPIN(ip)		(*(ip)->li_ops->iop_unpin)(ip)
+#define	IOP_UNPIN_REMOVE(ip,tp)	(*(ip)->li_ops->iop_unpin_remove)(ip, tp)
+#define	IOP_TRYLOCK(ip)		(*(ip)->li_ops->iop_trylock)(ip)
+#define	IOP_UNLOCK(ip)		(*(ip)->li_ops->iop_unlock)(ip)
+#define	IOP_COMMITTED(ip, lsn)	(*(ip)->li_ops->iop_committed)(ip, lsn)
+#define	IOP_PUSH(ip)		(*(ip)->li_ops->iop_push)(ip)
+#define	IOP_ABORT(ip)		(*(ip)->li_ops->iop_abort)(ip)
+#define IOP_PUSHBUF(ip)         (*(ip)->li_ops->iop_pushbuf)(ip)
+#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
+
+/*
+ * Return values for the IOP_TRYLOCK() routines.
+ */
+#define	XFS_ITEM_SUCCESS	0
+#define	XFS_ITEM_PINNED		1
+#define	XFS_ITEM_LOCKED		2
+#define	XFS_ITEM_FLUSHING	3
+#define XFS_ITEM_PUSHBUF      	4
+
+#endif	/* __KERNEL__ */
+
+/*
+ * This structure is used to track log items associated with
+ * a transaction.  It points to the log item and keeps some
+ * flags to track the state of the log item.  It also tracks
+ * the amount of space needed to log the item it describes
+ * once we get to commit processing (see xfs_trans_commit()).
+ */
+typedef struct xfs_log_item_desc {
+	xfs_log_item_t	*lid_item;
+	ushort		lid_size;
+	unsigned char	lid_flags;
+	unsigned char	lid_index;
+} xfs_log_item_desc_t;
+
+#define	XFS_LID_DIRTY		0x1
+#define	XFS_LID_PINNED		0x2
+#define	XFS_LID_SYNC_UNLOCK	0x4
+
+/*
+ * This structure is used to maintain a chunk list of log_item_desc
+ * structures. The free field is a bitmask indicating which descriptors
+ * in this chunk's array are free.  The unused field is the first value
+ * not used since this chunk was allocated.
+ */
+#define	XFS_LIC_NUM_SLOTS	15
+typedef struct xfs_log_item_chunk {
+	struct xfs_log_item_chunk	*lic_next;
+	ushort				lic_free;
+	ushort				lic_unused;
+	xfs_log_item_desc_t		lic_descs[XFS_LIC_NUM_SLOTS];
+} xfs_log_item_chunk_t;
+
+#define	XFS_LIC_MAX_SLOT	(XFS_LIC_NUM_SLOTS - 1)
+#define	XFS_LIC_FREEMASK	((1 << XFS_LIC_NUM_SLOTS) - 1)
+
+
+/*
+ * Initialize the given chunk.  Set the chunk's free descriptor mask
+ * to indicate that all descriptors are free.  The caller gets to set
+ * lic_unused to the right value (0 matches all free).  The
+ * lic_descs.lid_index values are set up as each desc is allocated.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_INIT)
+void xfs_lic_init(xfs_log_item_chunk_t *cp);
+#define	XFS_LIC_INIT(cp)	xfs_lic_init(cp)
+#else
+#define	XFS_LIC_INIT(cp)	((cp)->lic_free = XFS_LIC_FREEMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_INIT_SLOT)
+void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot);
+#define	XFS_LIC_INIT_SLOT(cp,slot)	xfs_lic_init_slot(cp, slot)
+#else
+#define	XFS_LIC_INIT_SLOT(cp,slot)	\
+	((cp)->lic_descs[slot].lid_index = (unsigned char)(slot))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_VACANCY)
+int xfs_lic_vacancy(xfs_log_item_chunk_t *cp);
+#define	XFS_LIC_VACANCY(cp)		xfs_lic_vacancy(cp)
+#else
+#define	XFS_LIC_VACANCY(cp)		(((cp)->lic_free) & XFS_LIC_FREEMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ALL_FREE)
+void xfs_lic_all_free(xfs_log_item_chunk_t *cp);
+#define	XFS_LIC_ALL_FREE(cp)		xfs_lic_all_free(cp)
+#else
+#define	XFS_LIC_ALL_FREE(cp)		((cp)->lic_free = XFS_LIC_FREEMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ARE_ALL_FREE)
+int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp);
+#define	XFS_LIC_ARE_ALL_FREE(cp)	xfs_lic_are_all_free(cp)
+#else
+#define	XFS_LIC_ARE_ALL_FREE(cp)	(((cp)->lic_free & XFS_LIC_FREEMASK) ==\
+					XFS_LIC_FREEMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ISFREE)
+int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot);
+#define	XFS_LIC_ISFREE(cp,slot)	xfs_lic_isfree(cp,slot)
+#else
+#define	XFS_LIC_ISFREE(cp,slot)	((cp)->lic_free & (1 << (slot)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_CLAIM)
+void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot);
+#define	XFS_LIC_CLAIM(cp,slot)		xfs_lic_claim(cp,slot)
+#else
+#define	XFS_LIC_CLAIM(cp,slot)		((cp)->lic_free &= ~(1 << (slot)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_RELSE)
+void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot);
+#define	XFS_LIC_RELSE(cp,slot)		xfs_lic_relse(cp,slot)
+#else
+#define	XFS_LIC_RELSE(cp,slot)		((cp)->lic_free |= 1 << (slot))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_SLOT)
+xfs_log_item_desc_t *xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot);
+#define	XFS_LIC_SLOT(cp,slot)		xfs_lic_slot(cp,slot)
+#else
+#define	XFS_LIC_SLOT(cp,slot)		(&((cp)->lic_descs[slot]))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_DESC_TO_SLOT)
+int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp);
+#define	XFS_LIC_DESC_TO_SLOT(dp)	xfs_lic_desc_to_slot(dp)
+#else
+#define	XFS_LIC_DESC_TO_SLOT(dp)	((uint)((dp)->lid_index))
+#endif
+/*
+ * Calculate the address of a chunk given a descriptor pointer:
+ * dp - dp->lid_index give the address of the start of the lic_descs array.
+ * From this we subtract the offset of the lic_descs field in a chunk.
+ * All of this yields the address of the chunk, which is
+ * cast to a chunk pointer.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_DESC_TO_CHUNK)
+xfs_log_item_chunk_t *xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp);
+#define	XFS_LIC_DESC_TO_CHUNK(dp)	xfs_lic_desc_to_chunk(dp)
+#else
+#define	XFS_LIC_DESC_TO_CHUNK(dp)	((xfs_log_item_chunk_t*) \
+					(((xfs_caddr_t)((dp) - (dp)->lid_index)) -\
+					(xfs_caddr_t)(((xfs_log_item_chunk_t*) \
+					0)->lic_descs)))
+#endif
+
+#ifdef __KERNEL__
+/*
+ * This is the type of function which can be given to xfs_trans_callback()
+ * to be called upon the transaction's commit to disk.
+ */
+typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
+
+/*
+ * This is the structure maintained for every active transaction.
+ */
+typedef struct xfs_trans {
+	unsigned int		t_magic;	/* magic number */
+	xfs_log_callback_t	t_logcb;	/* log callback struct */
+	struct xfs_trans	*t_forw;	/* async list pointers */
+	struct xfs_trans	*t_back;	/* async list pointers */
+	unsigned int		t_type;		/* transaction type */
+	unsigned int		t_log_res;	/* amt of log space resvd */
+	unsigned int		t_log_count;	/* count for perm log res */
+	unsigned int		t_blk_res;	/* # of blocks resvd */
+	unsigned int		t_blk_res_used;	/* # of resvd blocks used */
+	unsigned int		t_rtx_res;	/* # of rt extents resvd */
+	unsigned int		t_rtx_res_used;	/* # of resvd rt extents used */
+	xfs_log_ticket_t	t_ticket;	/* log mgr ticket */
+	sema_t			t_sema;		/* sema for commit completion */
+	xfs_lsn_t		t_lsn;		/* log seq num of trans commit*/
+	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
+	struct xfs_dquot_acct   *t_dqinfo;	/* accting info for dquots */
+	xfs_trans_callback_t	t_callback;	/* transaction callback */
+	void			*t_callarg;	/* callback arg */
+	unsigned int		t_flags;	/* misc flags */
+	long			t_icount_delta;	/* superblock icount change */
+	long			t_ifree_delta;	/* superblock ifree change */
+	long			t_fdblocks_delta; /* superblock fdblocks chg */
+	long			t_res_fdblocks_delta; /* on-disk only chg */
+	long			t_frextents_delta;/* superblock freextents chg*/
+	long			t_res_frextents_delta; /* on-disk only chg */
+	long			t_ag_freeblks_delta; /* debugging counter */
+	long			t_ag_flist_delta; /* debugging counter */
+	long			t_ag_btree_delta; /* debugging counter */
+	long			t_dblocks_delta;/* superblock dblocks change */
+	long			t_agcount_delta;/* superblock agcount change */
+	long			t_imaxpct_delta;/* superblock imaxpct change */
+	long			t_rextsize_delta;/* superblock rextsize chg */
+	long			t_rbmblocks_delta;/* superblock rbmblocks chg */
+	long			t_rblocks_delta;/* superblock rblocks change */
+	long			t_rextents_delta;/* superblocks rextents chg */
+	long			t_rextslog_delta;/* superblocks rextslog chg */
+	unsigned int		t_items_free;	/* log item descs free */
+	xfs_log_item_chunk_t	t_items;	/* first log item desc chunk */
+	xfs_trans_header_t	t_header;	/* header for in-log trans */
+} xfs_trans_t;
+
+#endif	/* __KERNEL__ */
+
+
+#define	XFS_TRANS_MAGIC		0x5452414E	/* 'TRAN' */
+/*
+ * Values for t_flags.
+ */
+#define	XFS_TRANS_DIRTY		0x01	/* something needs to be logged */
+#define	XFS_TRANS_SB_DIRTY	0x02	/* superblock is modified */
+#define	XFS_TRANS_PERM_LOG_RES	0x04	/* xact took a permanent log res */
+#define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */
+#define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */
+#define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */
+
+/*
+ * Values for call flags parameter.
+ */
+#define	XFS_TRANS_NOSLEEP		0x1
+#define	XFS_TRANS_WAIT			0x2
+#define	XFS_TRANS_RELEASE_LOG_RES	0x4
+#define	XFS_TRANS_ABORT			0x8
+
+/*
+ * Field values for xfs_trans_mod_sb.
+ */
+#define	XFS_TRANS_SB_ICOUNT		0x00000001
+#define	XFS_TRANS_SB_IFREE		0x00000002
+#define	XFS_TRANS_SB_FDBLOCKS		0x00000004
+#define	XFS_TRANS_SB_RES_FDBLOCKS	0x00000008
+#define	XFS_TRANS_SB_FREXTENTS		0x00000010
+#define	XFS_TRANS_SB_RES_FREXTENTS	0x00000020
+#define	XFS_TRANS_SB_DBLOCKS		0x00000040
+#define	XFS_TRANS_SB_AGCOUNT		0x00000080
+#define	XFS_TRANS_SB_IMAXPCT		0x00000100
+#define	XFS_TRANS_SB_REXTSIZE		0x00000200
+#define	XFS_TRANS_SB_RBMBLOCKS		0x00000400
+#define	XFS_TRANS_SB_RBLOCKS		0x00000800
+#define	XFS_TRANS_SB_REXTENTS		0x00001000
+#define	XFS_TRANS_SB_REXTSLOG		0x00002000
+
+
+/*
+ * Various log reservation values.
+ * These are based on the size of the file system block
+ * because that is what most transactions manipulate.
+ * Each adds in an additional 128 bytes per item logged to
+ * try to account for the overhead of the transaction mechanism.
+ *
+ * Note:
+ * Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish()
+ * call.  This is because the number in the worst case is quite high
+ * and quite unusual.  In order to fix this we need to change
+ * xfs_bmap_finish() to free extents in only a single AG at a time.
+ * This will require changes to the EFI code as well, however, so that
+ * the EFI for the extents not freed is logged again in each transaction.
+ * See bug 261917.
+ */
+
+/*
+ * Per-extent log reservation for the allocation btree changes
+ * involved in freeing or allocating an extent.
+ * 2 trees * (2 blocks/level * max depth - 1) * block size
+ */
+#define	XFS_ALLOCFREE_LOG_RES(mp,nx) \
+	((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))
+#define	XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
+	((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
+
+/*
+ * Per-directory log reservation for any directory change.
+ * dir blocks: (1 btree block per level + data block + free block) * dblock size
+ * bmap btree: (levels + 2) * max depth * block size
+ * v2 directory blocks can be fragmented below the dirblksize down to the fsb
+ * size, so account for that in the DAENTER macros.
+ */
+#define	XFS_DIROP_LOG_RES(mp)	\
+	(XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \
+	 (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)))
+#define	XFS_DIROP_LOG_COUNT(mp)	\
+	(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
+	 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents.  This gives:
+ *    the inode getting the new extents: inode size
+ *    the inode\'s bmap btree: max depth * block size
+ *    the agfs of the ags from which the extents are allocated: 2 * sector
+ *    the superblock free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ *    the agfs of the ags containing the blocks: 2 * sector size
+ *    the agfls of the ags containing the blocks: 2 * sector size
+ *    the super block free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define XFS_CALC_WRITE_LOG_RES(mp) \
+	(MAX( \
+	 ((mp)->m_sb.sb_inodesize + \
+	  XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
+	  (2 * (mp)->m_sb.sb_sectsize) + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 2) + \
+	  (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
+	 ((2 * (mp)->m_sb.sb_sectsize) + \
+	  (2 * (mp)->m_sb.sb_sectsize) + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 2) + \
+	  (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
+
+#define	XFS_WRITE_LOG_RES(mp)	((mp)->m_reservations.tr_write)
+
+/*
+ * In truncating a file we free up to two extents at once.  We can modify:
+ *    the inode being truncated: inode size
+ *    the inode\'s bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *		4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define	XFS_CALC_ITRUNCATE_LOG_RES(mp) \
+	(MAX( \
+	 ((mp)->m_sb.sb_inodesize + \
+	  XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
+	  (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
+	 ((4 * (mp)->m_sb.sb_sectsize) + \
+	  (4 * (mp)->m_sb.sb_sectsize) + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 4) + \
+	  (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
+
+#define	XFS_ITRUNCATE_LOG_RES(mp)   ((mp)->m_reservations.tr_itruncate)
+
+/*
+ * In renaming a files we can modify:
+ *    the four inodes involved: 4 * inode size
+ *    the two directory btrees: 2 * (max depth + v2) * dir block size
+ *    the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ *	of bmap blocks) giving:
+ *    the agf for the ags in which the blocks live: 3 * sector size
+ *    the agfl for the ags in which the blocks live: 3 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define	XFS_CALC_RENAME_LOG_RES(mp) \
+	(MAX( \
+	 ((4 * (mp)->m_sb.sb_inodesize) + \
+	  (2 * XFS_DIROP_LOG_RES(mp)) + \
+	  (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
+	 ((3 * (mp)->m_sb.sb_sectsize) + \
+	  (3 * (mp)->m_sb.sb_sectsize) + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 3) + \
+	  (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
+
+#define	XFS_RENAME_LOG_RES(mp)	((mp)->m_reservations.tr_rename)
+
+/*
+ * For creating a link to an inode:
+ *    the parent directory inode: inode size
+ *    the linked inode: inode size
+ *    the directory btree could split: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ *    the agf for the ag in which the blocks live: sector size
+ *    the agfl for the ag in which the blocks live: sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
+#define	XFS_CALC_LINK_LOG_RES(mp) \
+	(MAX( \
+	 ((mp)->m_sb.sb_inodesize + \
+ 	  (mp)->m_sb.sb_inodesize + \
+	  XFS_DIROP_LOG_RES(mp) + \
+	  (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
+	 ((mp)->m_sb.sb_sectsize + \
+	  (mp)->m_sb.sb_sectsize + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+	  (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
+
+#define	XFS_LINK_LOG_RES(mp)	((mp)->m_reservations.tr_link)
+
+/*
+ * For removing a directory entry we can modify:
+ *    the parent directory inode: inode size
+ *    the removed inode: inode size
+ *    the directory btree could join: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define	XFS_CALC_REMOVE_LOG_RES(mp)	\
+	(MAX( \
+	 ((mp)->m_sb.sb_inodesize + \
+ 	  (mp)->m_sb.sb_inodesize + \
+	  XFS_DIROP_LOG_RES(mp) + \
+	  (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
+	 ((2 * (mp)->m_sb.sb_sectsize) + \
+	  (2 * (mp)->m_sb.sb_sectsize) + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 2) + \
+	  (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
+
+#define	XFS_REMOVE_LOG_RES(mp)	((mp)->m_reservations.tr_remove)
+
+/*
+ * For symlink we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: 1 block
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode\'s bmap btree: (max depth + v2) * block size
+ *    the blocks for the symlink: 1 KB
+ * Or in the first xact we allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
+#define	XFS_CALC_SYMLINK_LOG_RES(mp)		\
+	(MAX( \
+	 ((mp)->m_sb.sb_inodesize + \
+	  (mp)->m_sb.sb_inodesize + \
+	  XFS_FSB_TO_B(mp, 1) + \
+	  XFS_DIROP_LOG_RES(mp) + \
+	  1024 + \
+	  (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
+	 (2 * (mp)->m_sb.sb_sectsize + \
+	  XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
+	  XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+	  (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
+	   XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
+
+#define	XFS_SYMLINK_LOG_RES(mp)	((mp)->m_reservations.tr_symlink)
+
+/*
+ * For create we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: block size
+ *    the superblock for the nlink flag: sector size
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode\'s bmap btree: (max depth + v2) * block size
+ * Or in the first xact we allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
+#define	XFS_CALC_CREATE_LOG_RES(mp)		\
+	(MAX( \
+	 ((mp)->m_sb.sb_inodesize + \
+	  (mp)->m_sb.sb_inodesize + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_FSB_TO_B(mp, 1) + \
+	  XFS_DIROP_LOG_RES(mp) + \
+	  (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
+	 (3 * (mp)->m_sb.sb_sectsize + \
+	  XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
+	  XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+	  (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
+	   XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
+
+#define	XFS_CREATE_LOG_RES(mp)	((mp)->m_reservations.tr_create)
+
+/*
+ * Making a new directory is the same as creating a new file.
+ */
+#define	XFS_CALC_MKDIR_LOG_RES(mp)	XFS_CALC_CREATE_LOG_RES(mp)
+
+#define	XFS_MKDIR_LOG_RES(mp)	((mp)->m_reservations.tr_mkdir)
+
+/*
+ * In freeing an inode we can modify:
+ *    the inode being freed: inode size
+ *    the super block free inode counter: sector size
+ *    the agi hash list and counters: sector size
+ *    the inode btree entry: block size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ */
+#define	XFS_CALC_IFREE_LOG_RES(mp) \
+	((mp)->m_sb.sb_inodesize + \
+	 (mp)->m_sb.sb_sectsize + \
+	 (mp)->m_sb.sb_sectsize + \
+	 XFS_FSB_TO_B((mp), 1) + \
+	 MAX(XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
+	 (128 * 5))
+
+#define	XFS_IFREE_LOG_RES(mp)	((mp)->m_reservations.tr_ifree)
+
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
+#define	XFS_CALC_ICHANGE_LOG_RES(mp)	((mp)->m_sb.sb_inodesize + \
+					 (mp)->m_sb.sb_sectsize + 512)
+
+#define	XFS_ICHANGE_LOG_RES(mp)	((mp)->m_reservations.tr_ichange)
+
+/*
+ * Growing the data section of the filesystem.
+ *	superblock
+ *	agi and agf
+ *	allocation btrees
+ */
+#define	XFS_CALC_GROWDATA_LOG_RES(mp) \
+	((mp)->m_sb.sb_sectsize * 3 + \
+	 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+	 (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
+
+#define	XFS_GROWDATA_LOG_RES(mp)    ((mp)->m_reservations.tr_growdata)
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ *	superblock: sector size
+ *	agf of the ag from which the extent is allocated: sector size
+ *	bmap btree for bitmap/summary inode: max depth * blocksize
+ *	bitmap/summary inode: inode size
+ *	allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
+#define	XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
+	(2 * (mp)->m_sb.sb_sectsize + \
+	 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
+	 (mp)->m_sb.sb_inodesize + \
+	 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+	 (128 * \
+	  (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
+	   XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
+
+#define	XFS_GROWRTALLOC_LOG_RES(mp)	((mp)->m_reservations.tr_growrtalloc)
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ *	one bitmap/summary block: blocksize
+ */
+#define	XFS_CALC_GROWRTZERO_LOG_RES(mp) \
+	((mp)->m_sb.sb_blocksize + 128)
+
+#define	XFS_GROWRTZERO_LOG_RES(mp)	((mp)->m_reservations.tr_growrtzero)
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ *	superblock: sector size
+ *	bitmap inode: inode size
+ *	summary inode: inode size
+ *	one bitmap block: blocksize
+ *	summary blocks: new summary size
+ */
+#define	XFS_CALC_GROWRTFREE_LOG_RES(mp) \
+	((mp)->m_sb.sb_sectsize + \
+	 2 * (mp)->m_sb.sb_inodesize + \
+	 (mp)->m_sb.sb_blocksize + \
+	 (mp)->m_rsumsize + \
+	 (128 * 5))
+
+#define	XFS_GROWRTFREE_LOG_RES(mp)	((mp)->m_reservations.tr_growrtfree)
+
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ *	inode
+ */
+#define	XFS_CALC_SWRITE_LOG_RES(mp) \
+     	((mp)->m_sb.sb_inodesize + 128)
+
+#define	XFS_SWRITE_LOG_RES(mp)	((mp)->m_reservations.tr_swrite)
+
+/*
+ * Logging the inode timestamps on an fsync -- same as SWRITE
+ * as long as SWRITE logs the entire inode core
+ */
+#define XFS_FSYNC_TS_LOG_RES(mp)        ((mp)->m_reservations.tr_swrite)
+
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ *	inode
+ */
+#define	XFS_CALC_WRITEID_LOG_RES(mp) \
+     	((mp)->m_sb.sb_inodesize + 128)
+
+#define	XFS_WRITEID_LOG_RES(mp)	((mp)->m_reservations.tr_swrite)
+
+/*
+ * Converting the inode from non-attributed to attributed.
+ *	the inode being converted: inode size
+ *	agf block and superblock (for block allocation)
+ *	the new block (directory sized)
+ *	bmap blocks for the new directory block
+ *	allocation btrees
+ */
+#define	XFS_CALC_ADDAFORK_LOG_RES(mp)	\
+	((mp)->m_sb.sb_inodesize + \
+	 (mp)->m_sb.sb_sectsize * 2 + \
+	 (mp)->m_dirblksize + \
+	 (XFS_DIR_IS_V1(mp) ? 0 : \
+	    XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1))) + \
+	 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+	 (128 * (4 + \
+		 (XFS_DIR_IS_V1(mp) ? 0 : \
+			 XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
+		 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
+
+#define	XFS_ADDAFORK_LOG_RES(mp)	((mp)->m_reservations.tr_addafork)
+
+/*
+ * Removing the attribute fork of a file
+ *    the inode being truncated: inode size
+ *    the inode\'s bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *		4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define	XFS_CALC_ATTRINVAL_LOG_RES(mp)	\
+	(MAX( \
+	 ((mp)->m_sb.sb_inodesize + \
+	  XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
+	  (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
+	 ((4 * (mp)->m_sb.sb_sectsize) + \
+	  (4 * (mp)->m_sb.sb_sectsize) + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 4) + \
+	  (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
+
+#define	XFS_ATTRINVAL_LOG_RES(mp)	((mp)->m_reservations.tr_attrinval)
+
+/*
+ * Setting an attribute.
+ *	the inode getting the attribute
+ *	the superblock for allocations
+ *	the agfs extents are allocated from
+ *	the attribute btree * max depth
+ *	the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime.
+ */
+#define	XFS_CALC_ATTRSET_LOG_RES(mp)	\
+	((mp)->m_sb.sb_inodesize + \
+	 (mp)->m_sb.sb_sectsize + \
+	  XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
+          (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
+
+#define	XFS_ATTRSET_LOG_RES(mp, ext)	\
+	((mp)->m_reservations.tr_attrset + \
+	 (ext * (mp)->m_sb.sb_sectsize) + \
+	 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
+	 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
+
+/*
+ * Removing an attribute.
+ *    the inode: inode size
+ *    the attribute btree could join: max depth * block size
+ *    the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define	XFS_CALC_ATTRRM_LOG_RES(mp)	\
+	(MAX( \
+ 	  ((mp)->m_sb.sb_inodesize + \
+	  XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
+	  XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
+	  (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
+	 ((2 * (mp)->m_sb.sb_sectsize) + \
+	  (2 * (mp)->m_sb.sb_sectsize) + \
+	  (mp)->m_sb.sb_sectsize + \
+	  XFS_ALLOCFREE_LOG_RES(mp, 2) + \
+	  (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
+
+#define	XFS_ATTRRM_LOG_RES(mp)	((mp)->m_reservations.tr_attrrm)
+
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
+#define	XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
+	((mp)->m_sb.sb_sectsize + 128)
+
+#define	XFS_CLEAR_AGI_BUCKET_LOG_RES(mp)  ((mp)->m_reservations.tr_clearagi)
+
+
+/*
+ * Various log count values.
+ */
+#define	XFS_DEFAULT_LOG_COUNT		1
+#define	XFS_DEFAULT_PERM_LOG_COUNT	2
+#define	XFS_ITRUNCATE_LOG_COUNT		2
+#define	XFS_CREATE_LOG_COUNT		2
+#define	XFS_MKDIR_LOG_COUNT		3
+#define	XFS_SYMLINK_LOG_COUNT		3
+#define	XFS_REMOVE_LOG_COUNT		2
+#define	XFS_LINK_LOG_COUNT		2
+#define	XFS_RENAME_LOG_COUNT		2
+#define	XFS_WRITE_LOG_COUNT		2
+#define	XFS_ADDAFORK_LOG_COUNT		2
+#define	XFS_ATTRINVAL_LOG_COUNT		1
+#define	XFS_ATTRSET_LOG_COUNT		3
+#define	XFS_ATTRRM_LOG_COUNT		3
+
+/*
+ * Here we centralize the specification of XFS meta-data buffer
+ * reference count values.  This determine how hard the buffer
+ * cache tries to hold onto the buffer.
+ */
+#define	XFS_AGF_REF		4
+#define	XFS_AGI_REF		4
+#define	XFS_AGFL_REF		3
+#define	XFS_INO_BTREE_REF	3
+#define	XFS_ALLOC_BTREE_REF	2
+#define	XFS_BMAP_BTREE_REF	2
+#define	XFS_DIR_BTREE_REF	2
+#define	XFS_ATTR_BTREE_REF	1
+#define	XFS_INO_REF		1
+#define	XFS_DQUOT_REF		1
+
+#ifdef __KERNEL__
+/*
+ * XFS transaction mechanism exported interfaces that are
+ * actually macros.
+ */
+#define	xfs_trans_get_log_res(tp)	((tp)->t_log_res)
+#define	xfs_trans_get_log_count(tp)	((tp)->t_log_count)
+#define	xfs_trans_get_block_res(tp)	((tp)->t_blk_res)
+#define	xfs_trans_set_sync(tp)		((tp)->t_flags |= XFS_TRANS_SYNC)
+
+#ifdef DEBUG
+#define	xfs_trans_agblocks_delta(tp, d)	((tp)->t_ag_freeblks_delta += (long)d)
+#define	xfs_trans_agflist_delta(tp, d)	((tp)->t_ag_flist_delta += (long)d)
+#define	xfs_trans_agbtree_delta(tp, d)	((tp)->t_ag_btree_delta += (long)d)
+#else
+#define	xfs_trans_agblocks_delta(tp, d)
+#define	xfs_trans_agflist_delta(tp, d)
+#define	xfs_trans_agbtree_delta(tp, d)
+#endif
+
+/*
+ * XFS transaction mechanism exported interfaces.
+ */
+void		xfs_trans_init(struct xfs_mount *);
+xfs_trans_t	*xfs_trans_alloc(struct xfs_mount *, uint);
+xfs_trans_t	*xfs_trans_dup(xfs_trans_t *);
+int		xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
+				  uint, uint);
+void		xfs_trans_callback(xfs_trans_t *,
+				   void (*)(xfs_trans_t *, void *), void *);
+void		xfs_trans_mod_sb(xfs_trans_t *, uint, long);
+struct xfs_buf	*xfs_trans_get_buf(xfs_trans_t *, struct buftarg *, xfs_daddr_t,
+				   int, uint);
+int		xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *,
+				   struct buftarg *, xfs_daddr_t, int, uint,
+				   struct xfs_buf **);
+struct xfs_buf	*xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
+
+void		xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
+void		xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
+void		xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
+void		xfs_trans_bhold_until_committed(xfs_trans_t *, struct xfs_buf *);
+void		xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
+void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
+void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
+void		xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
+int		xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
+			       xfs_ino_t , uint, struct xfs_inode **);
+void		xfs_trans_iput(xfs_trans_t *, struct xfs_inode *, uint);
+void		xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint);
+void		xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *);
+void		xfs_trans_ihold_release(xfs_trans_t *, struct xfs_inode *);
+void		xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
+void		xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
+struct xfs_efi_log_item	*xfs_trans_get_efi(xfs_trans_t *, uint);
+void		xfs_efi_release(struct xfs_efi_log_item *, uint);
+void		xfs_trans_log_efi_extent(xfs_trans_t *,
+					 struct xfs_efi_log_item *,
+					 xfs_fsblock_t,
+					 xfs_extlen_t);
+struct xfs_efd_log_item	*xfs_trans_get_efd(xfs_trans_t *,
+				  struct xfs_efi_log_item *,
+				  uint);
+void		xfs_trans_log_efd_extent(xfs_trans_t *,
+					 struct xfs_efd_log_item *,
+					 xfs_fsblock_t,
+					 xfs_extlen_t);
+void		xfs_trans_log_create_rpc(xfs_trans_t *, int, xfs_ino_t);
+void		xfs_trans_log_setattr_rpc(xfs_trans_t *, int); 
+int		xfs_trans_commit(xfs_trans_t *, uint flags, xfs_lsn_t *);
+void		xfs_trans_commit_async(struct xfs_mount *);
+void		xfs_trans_cancel(xfs_trans_t *, int);
+void		xfs_trans_ail_init(struct xfs_mount *);
+xfs_lsn_t	xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
+xfs_lsn_t	xfs_trans_tail_ail(struct xfs_mount *);
+void		xfs_trans_unlocked_item(struct xfs_mount *,
+					xfs_log_item_t *);
+
+/*
+ * Not necessarily exported, but used outside a single file.
+ */
+int		xfs_trans_lsn_danger(struct xfs_mount *, xfs_lsn_t);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __XFS_TRANS_H__ */
diff --git a/include/xfs_trans_space.h b/include/xfs_trans_space.h
new file mode 100644
index 000000000..c377a4476
--- /dev/null
+++ b/include/xfs_trans_space.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_TRANS_SPACE_H__
+#define __XFS_TRANS_SPACE_H__
+
+/*
+ * Components of space reservations.
+ */
+#define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)    \
+                (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0]))
+#define	XFS_EXTENTADD_SPACE_RES(mp,w)	(XFS_BM_MAXLEVELS(mp,w) - 1)
+#define XFS_NEXTENTADD_SPACE_RES(mp,b,w)\
+        (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
+          XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
+          XFS_EXTENTADD_SPACE_RES(mp,w))
+#define	XFS_DAENTER_1B(mp,w)	((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1)
+#define	XFS_DAENTER_DBS(mp,w)	\
+	(XFS_DA_NODE_MAXDEPTH + \
+	 ((XFS_DIR_IS_V2(mp) && (w) == XFS_DATA_FORK) ? 2 : 0))
+#define	XFS_DAENTER_BLOCKS(mp,w)	\
+	(XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w))
+#define	XFS_DAENTER_BMAP1B(mp,w)	\
+	XFS_NEXTENTADD_SPACE_RES(mp, XFS_DAENTER_1B(mp, w), w)
+#define	XFS_DAENTER_BMAPS(mp,w)		\
+	(XFS_DAENTER_DBS(mp,w) * XFS_DAENTER_BMAP1B(mp,w))
+#define	XFS_DAENTER_SPACE_RES(mp,w)	\
+	(XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w))
+#define	XFS_DAREMOVE_SPACE_RES(mp,w)	XFS_DAENTER_BMAPS(mp,w)
+#define	XFS_DIRENTER_MAX_SPLIT(mp,nl)	\
+	(((mp)->m_sb.sb_blocksize == 512 && \
+	  XFS_DIR_IS_V1(mp) && \
+	  (nl) >= XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN) ? 2 : 1)
+#define	XFS_DIRENTER_SPACE_RES(mp,nl)	\
+	(XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \
+	 XFS_DIRENTER_MAX_SPLIT(mp,nl))
+#define	XFS_DIRREMOVE_SPACE_RES(mp)	\
+	XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
+#define	XFS_IALLOC_SPACE_RES(mp)	\
+	(XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp)-1)
+
+/*
+ * Space reservation values for various transactions.
+ */
+#define	XFS_ADDAFORK_SPACE_RES(mp)	\
+	((mp)->m_dirblkfsbs + \
+	 (XFS_DIR_IS_V1(mp) ? 0 : XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)))
+#define	XFS_ATTRRM_SPACE_RES(mp)	\
+	XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK)
+/* This macro is not used - see inline code in xfs_attr_set */
+#define	XFS_ATTRSET_SPACE_RES(mp, v)	\
+	(XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v))
+#define	XFS_CREATE_SPACE_RES(mp,nl)	\
+	(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
+#define	XFS_DIOSTRAT_SPACE_RES(mp, v)	\
+	(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
+#define	XFS_GROWFS_SPACE_RES(mp)	\
+	(2 * XFS_AG_MAXLEVELS(mp))
+#define	XFS_GROWFSRT_SPACE_RES(mp,b)	\
+	((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
+#define	XFS_LINK_SPACE_RES(mp,nl)	\
+	XFS_DIRENTER_SPACE_RES(mp,nl)
+#define	XFS_MKDIR_SPACE_RES(mp,nl)	\
+	(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
+#define	XFS_QM_DQALLOC_SPACE_RES(mp)	\
+	(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \
+	 XFS_DQUOT_CLUSTER_SIZE_FSB)
+#define	XFS_QM_QINOCREATE_SPACE_RES(mp)	\
+	XFS_IALLOC_SPACE_RES(mp)
+#define	XFS_REMOVE_SPACE_RES(mp)	\
+	XFS_DIRREMOVE_SPACE_RES(mp)
+#define	XFS_RENAME_SPACE_RES(mp,nl)	\
+	(XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
+#define	XFS_SYMLINK_SPACE_RES(mp,nl,b)	\
+	(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
+
+#endif	/* __XFS_TRANS_SPACE_H__ */
diff --git a/include/xfs_types.h b/include/xfs_types.h
new file mode 100644
index 000000000..83d07197a
--- /dev/null
+++ b/include/xfs_types.h
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_TYPES_H__
+#define	__XFS_TYPES_H__
+
+/*
+ * Some types are conditional based on the selected configuration.
+ * Set XFS_BIG_FILES=1 or 0 and XFS_BIG_FILESYSTEMS=1 or 0 depending
+ * on the desired configuration.
+ * XFS_BIG_FILES needs pgno_t to be 64 bits (64-bit kernels).
+ * XFS_BIG_FILESYSTEMS needs daddr_t to be 64 bits (N32 and 64-bit kernels).
+ *
+ * Expect these to be set from klocaldefs, or from the machine-type
+ * defs files for the normal case.
+ */
+
+#define	XFS_BIG_FILES		1
+#define	XFS_BIG_FILESYSTEMS	1
+
+typedef __uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
+typedef	__uint32_t	xfs_extlen_t;	/* extent length in blocks */
+typedef	__uint32_t	xfs_agnumber_t;	/* allocation group number */
+typedef __int32_t	xfs_extnum_t;	/* # of extents in a file */
+typedef __int16_t	xfs_aextnum_t;	/* # extents in an attribute fork */
+typedef	__int64_t	xfs_fsize_t;	/* bytes in a file */
+typedef __uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
+
+typedef	__int32_t	xfs_suminfo_t;	/* type of bitmap summary info */
+typedef	__int32_t	xfs_rtword_t;	/* word type for bitmap manipulations */
+
+typedef	__int64_t	xfs_lsn_t;	/* log sequence number */
+typedef	__int32_t	xfs_tid_t;	/* transaction identifier */
+
+typedef	__uint32_t	xfs_dablk_t;	/* dir/attr block number (in file) */
+typedef	__uint32_t	xfs_dahash_t;	/* dir/attr hash value */
+
+typedef __uint16_t	xfs_prid_t;	/* prid_t truncated to 16bits in XFS */
+
+/*
+ * These types are 64 bits on disk but are either 32 or 64 bits in memory.
+ * Disk based types:
+ */
+typedef __uint64_t	xfs_dfsbno_t;	/* blockno in filesystem (agno|agbno) */
+typedef __uint64_t	xfs_drfsbno_t;	/* blockno in filesystem (raw) */
+typedef	__uint64_t	xfs_drtbno_t;	/* extent (block) in realtime area */
+typedef	__uint64_t	xfs_dfiloff_t;	/* block number in a file */
+typedef	__uint64_t	xfs_dfilblks_t;	/* number of blocks in a file */
+
+/*
+ * Memory based types are conditional.
+ */
+#if XFS_BIG_FILESYSTEMS
+typedef	__uint64_t	xfs_fsblock_t;	/* blockno in filesystem (agno|agbno) */
+typedef __uint64_t	xfs_rfsblock_t;	/* blockno in filesystem (raw) */
+typedef __uint64_t	xfs_rtblock_t;	/* extent (block) in realtime area */
+typedef	__int64_t	xfs_srtblock_t;	/* signed version of xfs_rtblock_t */
+#else
+typedef	__uint32_t	xfs_fsblock_t;	/* blockno in filesystem (agno|agbno) */
+typedef __uint32_t	xfs_rfsblock_t;	/* blockno in filesystem (raw) */
+typedef __uint32_t	xfs_rtblock_t;	/* extent (block) in realtime area */
+typedef	__int32_t	xfs_srtblock_t;	/* signed version of xfs_rtblock_t */
+#endif
+#if XFS_BIG_FILES
+typedef	__uint64_t	xfs_fileoff_t;	/* block number in a file */
+typedef	__int64_t	xfs_sfiloff_t;	/* signed block number in a file */
+typedef	__uint64_t	xfs_filblks_t;	/* number of blocks in a file */
+#else
+typedef	__uint32_t	xfs_fileoff_t;	/* block number in a file */
+typedef	__int32_t	xfs_sfiloff_t;	/* signed block number in a file */
+typedef	__uint32_t	xfs_filblks_t;	/* number of blocks in a file */
+#endif
+
+typedef __uint8_t       xfs_arch_t;     /* architecutre of an xfs fs */
+
+/*
+ * Null values for the types.
+ */
+#define	NULLDFSBNO	((xfs_dfsbno_t)-1)
+#define	NULLDRFSBNO	((xfs_drfsbno_t)-1)
+#define	NULLDRTBNO	((xfs_drtbno_t)-1)
+#define	NULLDFILOFF	((xfs_dfiloff_t)-1)
+
+#define	NULLFSBLOCK	((xfs_fsblock_t)-1)
+#define	NULLRFSBLOCK	((xfs_rfsblock_t)-1)
+#define	NULLRTBLOCK	((xfs_rtblock_t)-1)
+#define	NULLFILEOFF	((xfs_fileoff_t)-1)
+
+#define	NULLAGBLOCK	((xfs_agblock_t)-1)
+#define	NULLAGNUMBER	((xfs_agnumber_t)-1)
+#define	NULLEXTNUM	((xfs_extnum_t)-1)
+
+#define NULLCOMMITLSN	((xfs_lsn_t)-1)
+
+/*
+ * Max values for extlen, extnum, aextnum.
+ */
+#define	MAXEXTLEN	((xfs_extlen_t)0x001fffff)	/* 21 bits */
+#define	MAXEXTNUM	((xfs_extnum_t)0x7fffffff)	/* signed int */
+#define	MAXAEXTNUM	((xfs_aextnum_t)0x7fff)		/* signed short */
+
+/*
+ * MAXNAMELEN is the length (including the terminating null) of
+ * the longest permissible file (component) name.
+ */
+#define MAXNAMELEN	256
+
+typedef enum {
+	XFS_LOOKUP_EQi, XFS_LOOKUP_LEi, XFS_LOOKUP_GEi
+} xfs_lookup_t;
+
+typedef enum {
+	XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
+	XFS_BTNUM_MAX
+} xfs_btnum_t;
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC	4
+	__uint32_t		xs_allocx;
+	__uint32_t		xs_allocb;
+	__uint32_t		xs_freex;
+	__uint32_t		xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE   	(XFSSTAT_END_EXTENT_ALLOC+4)
+	__uint32_t		xs_abt_lookup;
+	__uint32_t		xs_abt_compare;
+	__uint32_t		xs_abt_insrec;
+	__uint32_t		xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING	(XFSSTAT_END_ALLOC_BTREE+7)
+	__uint32_t		xs_blk_mapr;
+	__uint32_t		xs_blk_mapw;
+	__uint32_t		xs_blk_unmap;
+	__uint32_t		xs_add_exlist;
+	__uint32_t		xs_del_exlist;
+	__uint32_t		xs_look_exlist;
+	__uint32_t		xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE	(XFSSTAT_END_BLOCK_MAPPING+4)
+	__uint32_t		xs_bmbt_lookup;
+	__uint32_t		xs_bmbt_compare;
+	__uint32_t		xs_bmbt_insrec;
+	__uint32_t		xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS	(XFSSTAT_END_BLOCK_MAP_BTREE+4)
+	__uint32_t		xs_dir_lookup;
+	__uint32_t		xs_dir_create;
+	__uint32_t		xs_dir_remove;
+	__uint32_t		xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS	(XFSSTAT_END_DIRECTORY_OPS+3)
+	__uint32_t		xs_trans_sync;
+	__uint32_t		xs_trans_async;
+	__uint32_t		xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS		(XFSSTAT_END_TRANSACTIONS+7)
+	__uint32_t		xs_ig_attempts;
+	__uint32_t		xs_ig_found;
+	__uint32_t		xs_ig_frecycle;
+	__uint32_t		xs_ig_missed;
+	__uint32_t		xs_ig_dup;
+	__uint32_t		xs_ig_reclaims;
+	__uint32_t		xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS		(XFSSTAT_END_INODE_OPS+5)
+	__uint32_t		xs_log_writes;
+	__uint32_t		xs_log_blocks;
+	__uint32_t		xs_log_noiclogs;
+	__uint32_t		xs_log_force;
+	__uint32_t		xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING	(XFSSTAT_END_LOG_OPS+10)
+	__uint32_t		xs_try_logspace;
+	__uint32_t		xs_sleep_logspace;
+	__uint32_t		xs_push_ail;
+	__uint32_t		xs_push_ail_success;
+	__uint32_t		xs_push_ail_pushbuf;
+	__uint32_t		xs_push_ail_pinned;
+	__uint32_t		xs_push_ail_locked;
+	__uint32_t		xs_push_ail_flushing;
+	__uint32_t		xs_push_ail_restarts;
+	__uint32_t		xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT	(XFSSTAT_END_TAIL_PUSHING+2)
+	__uint32_t		xs_xstrat_quick;
+	__uint32_t		xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS	(XFSSTAT_END_WRITE_CONVERT+2)
+	__uint32_t		xs_write_calls;
+	__uint32_t		xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS	(XFSSTAT_END_READ_WRITE_OPS+4)
+	__uint32_t		xs_attr_get;
+	__uint32_t		xs_attr_set;
+	__uint32_t		xs_attr_remove;
+	__uint32_t		xs_attr_list;
+# define XFSSTAT_END_QUOTA_OPS		(XFSSTAT_END_ATTRIBUTE_OPS+8)
+	__uint32_t		xs_qm_dqreclaims;
+	__uint32_t		xs_qm_dqreclaim_misses;
+	__uint32_t		xs_qm_dquot_dups;
+	__uint32_t		xs_qm_dqcachemisses;
+	__uint32_t		xs_qm_dqcachehits;
+	__uint32_t		xs_qm_dqwants;
+	__uint32_t		xs_qm_dqshake_reclaims;
+	__uint32_t		xs_qm_dqinact_reclaims;
+# define XFSSTAT_END_INODE_CLUSTER	(XFSSTAT_END_QUOTA_OPS+3)
+	__uint32_t		xs_iflush_count;
+	__uint32_t		xs_icluster_flushcnt;
+	__uint32_t		xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS		(XFSSTAT_END_INODE_CLUSTER+8)
+	__uint32_t		vn_active;	/* # vnodes not on free lists */
+	__uint32_t		vn_alloc;	/* # times vn_alloc called */
+	__uint32_t		vn_get;		/* # times vn_get called */
+	__uint32_t		vn_hold;	/* # times vn_hold called */
+	__uint32_t		vn_rele;	/* # times vn_rele called */
+	__uint32_t		vn_reclaim;	/* # times vn_reclaim called */
+	__uint32_t		vn_remove;	/* # times vn_remove called */
+	__uint32_t		vn_free;	/* # times vn_free called */
+	struct xfsstats_xpc {
+		__uint64_t	xs_xstrat_bytes;
+		__uint64_t	xs_write_bytes;
+		__uint64_t	xs_read_bytes;
+	} xpc;
+} xfsstats;
+
+# define XFS_STATS_INC(count)		( xfsstats.##count ++ )
+# define XFS_STATS_DEC(count)		( xfsstats.##count -- )
+# define XFS_STATS_ADD(count, inc)	( xfsstats.##count += (inc) )
+# define XFS_STATS64_INC(count)		( xfsstats.xpc.##count ++ )
+# define XFS_STATS64_ADD(count, inc)	( xfsstats.xpc.##count += (inc) )
+#else	/* !CONFIG_PROC_FS */
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+# define XFS_STATS64_INC(count)
+# define XFS_STATS64_ADD(count, inc)
+#endif	/* !CONFIG_PROC_FS */
+
+
+#ifdef __KERNEL__
+
+/* juggle IRIX device numbers - still used in ondisk structures */
+
+#define IRIX_DEV_BITSMAJOR      14
+#define IRIX_DEV_BITSMINOR      18 
+#define IRIX_DEV_MAXMAJ         0x1ff 
+#define IRIX_DEV_MAXMIN         0x3ffff
+#define IRIX_DEV_MAJOR(dev)     ((int)(((unsigned)(dev)>>IRIX_DEV_BITSMINOR) \
+                                    & IRIX_DEV_MAXMAJ))
+#define IRIX_DEV_MINOR(dev)     ((int)((dev)&IRIX_DEV_MAXMIN))
+#define IRIX_MKDEV(major,minor) ((xfs_dev_t)(((major)<<IRIX_DEV_BITSMINOR) \
+                                    | (minor&IRIX_DEV_MAXMIN)))
+                                    
+#define IRIX_DEV_TO_KDEVT(dev)  MKDEV(IRIX_DEV_MAJOR(dev),IRIX_DEV_MINOR(dev))
+#define IRIX_DEV_TO_DEVT(dev)   ((IRIX_DEV_MAJOR(dev)<<8)|IRIX_DEV_MINOR(dev))
+
+/* __psint_t is the same size as a pointer */
+#if (BITS_PER_LONG == 32)
+typedef __int32_t __psint_t;
+typedef __uint32_t __psunsigned_t;
+#elif (BITS_PER_LONG == 64)
+typedef __int64_t __psint_t;
+typedef __uint64_t __psunsigned_t;
+#else
+#error BITS_PER_LONG must be 32 or 64
+#endif
+
+
+/*
+ * struct for passing owner/requestor id
+ */
+typedef struct flid {
+#ifdef CELL_CAPABLE
+        pid_t   fl_pid;
+        sysid_t fl_sysid;
+#endif
+} flid_t;
+
+#endif	/* __KERNEL__ */
+
+#endif	/* !__XFS_TYPES_H */
diff --git a/include/xqm.h b/include/xqm.h
new file mode 100644
index 000000000..a8cc55823
--- /dev/null
+++ b/include/xqm.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XQM_H__
+#define __XQM_H__
+
+#include <linux/types.h>
+
+#define XQM_CMD(cmd)	( ('X'<<8)+(cmd) )
+#define IS_XQM_CMD(cmd)	( ((int)(cmd)>>8) == 'X' )
+
+/*
+ * Disk quota - quotactl(2) commands for XFS Quota Manager (XQM).
+ */
+#define Q_XQUOTAON   XQM_CMD(0x1)  /* enable quota accounting/enforcement */
+#define Q_XQUOTAOFF  XQM_CMD(0x2)  /* disable quota accounting/enforcement */
+#define Q_XGETQUOTA  XQM_CMD(0x3)  /* get disk limits & usage */
+#define Q_XSETQLIM   XQM_CMD(0x4)  /* set disk limits only */
+#define Q_XGETQSTAT  XQM_CMD(0x5)  /* returns fs_quota_stat_t struct */
+#define Q_XQUOTARM   XQM_CMD(0x6)  /* free quota files' space */
+
+/*
+ * fs_disk_quota structure:
+ *
+ * This contains the current quota information regarding a user/proj/group.
+ * It is 64-bit aligned, and all the blk units are in BBs (Basic Blocks) of
+ * 512 bytes.
+ */
+#define FS_DQUOT_VERSION	1	/* fs_disk_quota.d_version */
+typedef struct fs_disk_quota {
+	__s8		d_version;	/* version of this structure */
+	__s8		d_flags;	/* XFS_{USER,PROJ,GROUP}_QUOTA */
+	__u16		d_fieldmask;	/* field specifier */
+	__u32		d_id;		/* user, project, or group ID */
+	__u64		d_blk_hardlimit;/* absolute limit on disk blks */
+	__u64		d_blk_softlimit;/* preferred limit on disk blks */
+	__u64		d_ino_hardlimit;/* maximum # allocated inodes */
+	__u64		d_ino_softlimit;/* preferred inode limit */
+	__u64		d_bcount;	/* # disk blocks owned by the user */
+	__u64		d_icount;	/* # inodes owned by the user */
+	__s32		d_itimer;	/* zero if within inode limits */
+					/* if not, we refuse service */
+	__s32		d_btimer;	/* similar to above; for disk blocks */
+	__u16	  	d_iwarns;       /* # warnings issued wrt num inodes */
+	__u16	  	d_bwarns;       /* # warnings issued wrt disk blocks */
+	__s32		d_padding2;	/* padding2 - for future use */
+	__u64		d_rtb_hardlimit;/* absolute limit on realtime blks */
+	__u64		d_rtb_softlimit;/* preferred limit on RT disk blks */
+	__u64		d_rtbcount;	/* # realtime blocks owned */
+	__s32		d_rtbtimer;	/* similar to above; for RT disk blks */
+	__u16	  	d_rtbwarns;     /* # warnings issued wrt RT disk blks */
+	__s16		d_padding3;	/* padding3 - for future use */	
+	char		d_padding4[8];	/* yet more padding */
+} fs_disk_quota_t;
+
+/*
+ * These fields are sent to Q_XSETQLIM to specify fields that need to change.
+ */
+#define FS_DQ_ISOFT	(1<<0)
+#define FS_DQ_IHARD	(1<<1)
+#define FS_DQ_BSOFT	(1<<2)
+#define FS_DQ_BHARD 	(1<<3)
+#define FS_DQ_RTBSOFT	(1<<4)
+#define FS_DQ_RTBHARD	(1<<5)
+#define FS_DQ_LIMIT_MASK	(FS_DQ_ISOFT | FS_DQ_IHARD | FS_DQ_BSOFT | \
+				 FS_DQ_BHARD | FS_DQ_RTBSOFT | FS_DQ_RTBHARD)
+/*
+ * These timers can only be set in super user's dquot. For others, timers are
+ * automatically started and stopped. Superusers timer values set the limits
+ * for the rest.  In case these values are zero, the DQ_{F,B}TIMELIMIT values
+ * defined below are used. 
+ * These values also apply only to the d_fieldmask field for Q_XSETQLIM.
+ */
+#define FS_DQ_BTIMER	(1<<6)
+#define FS_DQ_ITIMER	(1<<7)
+#define FS_DQ_RTBTIMER 	(1<<8)
+#define FS_DQ_TIMER_MASK	(FS_DQ_BTIMER | FS_DQ_ITIMER | FS_DQ_RTBTIMER)
+
+/*
+ * The following constants define the default amount of time given a user
+ * before the soft limits are treated as hard limits (usually resulting
+ * in an allocation failure).  These may be modified by the quotactl(2)
+ * system call with the Q_XSETQLIM command.
+ */
+#define	DQ_FTIMELIMIT	(7 * 24*60*60)		/* 1 week */
+#define	DQ_BTIMELIMIT	(7 * 24*60*60)		/* 1 week */
+
+/*
+ * Various flags related to quotactl(2).  Only relevant to XFS filesystems.
+ */
+#define XFS_QUOTA_UDQ_ACCT	(1<<0)  /* user quota accounting */
+#define XFS_QUOTA_UDQ_ENFD	(1<<1)  /* user quota limits enforcement */
+#define XFS_QUOTA_PDQ_ACCT	(1<<2)  /* project quota accounting */
+#define XFS_QUOTA_PDQ_ENFD	(1<<3)  /* project quota limits enforcement */
+#define XFS_QUOTA_GDQ_ACCT	(1<<4)  /* group quota accounting */
+#define XFS_QUOTA_GDQ_ENFD	(1<<5)  /* group quota limits enforcement */
+
+#define XFS_USER_QUOTA		(1<<0)	/* user quota type */
+#define XFS_PROJ_QUOTA		(1<<1)	/* project quota type */
+#define XFS_GROUP_QUOTA		(1<<2)	/* group quota type */
+
+/*
+ * fs_quota_stat is the struct returned in Q_XGETQSTAT for a given file system.
+ * Provides a centralized way to get meta infomation about the quota subsystem.
+ * eg. space taken up for user and aggregate quotas, number of dquots currently
+ * incore.
+ */
+#define FS_QSTAT_VERSION	1	/* fs_quota_stat.qs_version */
+
+/*
+ * Some basic infomation about 'quota files'.
+ */
+typedef struct fs_qfilestat {
+	__u64		qfs_ino;	/* inode number */
+	__u64		qfs_nblks;	/* number of BBs 512-byte-blks */
+	__u32		qfs_nextents;	/* number of extents */
+} fs_qfilestat_t;
+
+typedef struct fs_quota_stat {
+	__s8		qs_version;	/* version number for future changes */
+	__u16		qs_flags;	/* XFS_QUOTA_{U,P,G}DQ_{ACCT,ENFD} */
+	__s8		qs_pad;		/* unused */
+	fs_qfilestat_t	qs_uquota;	/* user quota storage information */
+	fs_qfilestat_t	qs_aquota;	/* aggr quota storage information */
+	__u32		qs_incoredqs;	/* number of dquots incore */
+	__s32		qs_btimelimit;  /* limit for blks timer */	
+	__s32		qs_itimelimit;  /* limit for inodes timer */	
+	__s32		qs_rtbtimelimit;/* limit for rt blks timer */	
+	__u16		qs_bwarnlimit;	/* limit for num warnings */
+	__u16		qs_iwarnlimit;	/* limit for num warnings */
+} fs_quota_stat_t;
+
+
+#ifdef __KERNEL__
+extern int xqm_quotactl(int, const char *, int, caddr_t);
+#endif
+
+#endif	/* __XQM_H__ */
diff --git a/libxfs/Makefile b/libxfs/Makefile
new file mode 100644
index 000000000..ce45344de
--- /dev/null
+++ b/libxfs/Makefile
@@ -0,0 +1,62 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+STATICLIBTARGET = libxfs.a
+HFILES = xfs.h
+CFILES = arch.c init.c logitem.c rdwr.c trans.c util.c \
+	xfs_bit.c xfs_rtbit.c xfs_alloc.c xfs_ialloc.c xfs_rtalloc.c \
+	xfs_inode.c xfs_btree.c xfs_alloc_btree.c xfs_ialloc_btree.c \
+	xfs_bmap_btree.c xfs_da_btree.c xfs_dir.c xfs_dir_leaf.c \
+	xfs_dir2.c xfs_dir2_leaf.c xfs_attr_leaf.c xfs_dir2_block.c \
+	xfs_dir2_node.c xfs_dir2_data.c xfs_dir2_sf.c xfs_bmap.c \
+	xfs_mount.c xfs_trans.c
+
+# xfs_repair is braindead, don't try linking it with a debug libxfs yet.
+DEBUG = -DNDEBUG
+
+# 
+# Tracing flags:
+# -DIO_DEBUG		reads and writes of buffers
+# -DMEM_DEBUG		all zone memory use
+# -DLI_DEBUG		log item (ino/buf) manipulation
+# -DXACT_DEBUG		transaction state changes
+# 
+LCFLAGS += -Wno-unknown-pragmas -Wno-unused -Wno-uninitialized -I.
+
+default: $(STATICLIBTARGET)
+
+include $(BUILDRULES)
+
+install: default
diff --git a/libxfs/init.c b/libxfs/init.c
new file mode 100644
index 000000000..e3142dd47
--- /dev/null
+++ b/libxfs/init.c
@@ -0,0 +1,764 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#define ustat __kernel_ustat
+#include <libxfs.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <mntent.h>
+#include <sys/stat.h>
+#undef ustat
+#include <sys/ustat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+
+#ifndef BLKSETSIZE	/* Baaad m'kay, but it's not in libc yet */
+#define BLKSETSIZE _IO(0x12,108)	/* set device block size */
+#endif
+
+#define findrawpath(x)	x
+#define findblockpath(x) x
+
+char *progname = "libxfs";	/* default, changed by each tool */
+
+/*
+ * dev_map - map open devices to fd.
+ */
+#define MAX_DEVS 10	/* arbitary maximum */
+int nextfakedev = -1;	/* device number to give to next fake device */
+static struct dev_to_fd {
+	dev_t dev;
+	int fd;
+} dev_map[MAX_DEVS]={{0}};
+
+static int
+check_ismounted(char *name, char *block, int verbose)
+{
+	struct ustat	ust;
+	struct stat64	st;
+
+	if (stat64(block, &st) < 0)
+		return 0;
+	if ((st.st_mode & S_IFMT) != S_IFBLK)
+		return 0;
+	if (ustat(st.st_rdev, &ust) >= 0) {
+		if (verbose)
+			fprintf(stderr,
+				"%s: %s contains a mounted filesystem\n",
+				progname, name);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Checks whether a given device has a mounted, writable
+ * filesystem, returns 1 if it does & fatal (just warns
+ * if not fatal, but allows us to proceed).
+ * 
+ * Useful to tools which will produce uncertain results
+ * if the filesystem is active - repair, check, logprint.
+ */
+static int
+check_isactive(char *name, char *block, int fatal)
+{
+	int		sts = 0;
+	FILE		*f;
+	struct mntent	*mnt;
+
+	if (check_ismounted(name, block, 0)) {
+		if ((f = setmntent(MOUNTED, "r")) == NULL) {
+			fprintf(stderr,
+				"%s: %s contains a possibly writable, mounted "
+				"filesystem\n", progname, name);
+			return fatal;
+		}
+		while ((mnt = getmntent(f)) != NULL) {
+			if (hasmntopt(mnt, MNTOPT_RO) != NULL)
+				break;
+		}
+		if (mnt == NULL) {
+			fprintf(stderr,
+				"%s: %s contains a writable mounted "
+				"filesystem\n", progname, name);
+			sts = fatal;
+		}
+		endmntent(f);
+	}
+	return sts;
+}
+
+static __int64_t
+findsize(char *path)
+{
+	int	fd;
+	int	error;
+	long	size;
+	struct stat64   st;
+
+	/* Test to see if we are dealing with a regular file rather than a
+	 * block device, if we are just use the size returned by stat64
+	 */
+	if (stat64(path, &st) < 0) {
+		fprintf(stderr, "%s: "
+			"cannot stat the device special file \"%s\": %s\n",
+			progname, path, strerror(errno));
+		exit(1);
+	}
+	if ((st.st_mode & S_IFMT) == S_IFREG) {
+		return (__int64_t)(st.st_size >> 9);
+	}
+
+	if ((fd = open(path, 0)) < 0) {
+		fprintf(stderr, "%s: "
+			"error opening the device special file \"%s\": %s\n",
+			progname, path, strerror(errno));
+		exit(1);
+	}
+	error = ioctl(fd, BLKGETSIZE, &size);
+	if (error < 0) {
+		fprintf(stderr, "%s: can't determine device size\n", progname);
+		exit(1);
+	}
+
+	close(fd);
+
+	return (__int64_t)size;
+}
+
+
+/* libxfs_device_to_fd: 
+ *     lookup a device number in the device map
+ *     return the associated fd
+ */
+int
+libxfs_device_to_fd(dev_t device)
+{
+	int d;
+	
+	for (d=0;d<MAX_DEVS;d++)
+		if (dev_map[d].dev == device) 
+			return dev_map[d].fd;
+	
+	fprintf(stderr, "%s: device_to_fd: device %Ld is not open\n", 
+		progname, device);
+	exit(1);
+}
+
+/* libxfs_device_open:
+ *     open a device and return its device number
+ */
+dev_t
+libxfs_device_open(char *path, int creat, int readonly)
+{
+	int		fd;
+	dev_t		dev;
+	int		d;
+	struct stat     statb;
+	int		blocksize = 512; /* bytes */
+
+	if ((fd = open(path,
+			(readonly ? O_RDONLY : O_RDWR) |
+			(creat ? O_CREAT|O_TRUNC : 0),
+			0666)) < 0) {
+		fprintf(stderr, "%s: cannot open %s: %s\n",
+			progname, path, strerror(errno));
+		exit(1);
+	}
+
+	if (stat(path, &statb)<0) {
+		fprintf(stderr, "%s: cannot stat %s: %s\n",
+			progname, path, strerror(errno));
+		exit(1);
+	}
+	
+	/* Set device blocksize to 512 bytes */
+	if ((statb.st_mode & S_IFMT) == S_IFBLK) {
+		if (ioctl(fd, BLKSETSIZE, &blocksize) < 0) {
+			fprintf(stderr, "%s: warning - cannot set blocksize on "
+				"block device %s: %s\n",
+				progname, path, strerror(errno));
+		}
+	}
+
+	/* get the device number from the stat buf - unless
+	 * we're not opening a real device, in which case
+	 * choose a new fake device number
+	 */
+	dev=(statb.st_rdev)?(statb.st_rdev):(nextfakedev--);
+
+	for (d=0;d<MAX_DEVS;d++)
+		if (dev_map[d].dev == dev) {
+			fprintf(stderr, "%s: device %Ld is already open\n", 
+			    progname, dev);
+			exit(1);
+		}
+
+	for (d=0;d<MAX_DEVS;d++)
+		if (!dev_map[d].dev) {
+			dev_map[d].dev=dev;
+			dev_map[d].fd=fd;
+			
+			return dev;
+		}
+
+	fprintf(stderr, "%s: device_open: too many open devices\n", progname);
+	exit(1);
+}
+
+void
+libxfs_device_close(dev_t dev)
+{
+	int     d;
+
+	for (d=0;d<MAX_DEVS;d++)
+		if (dev_map[d].dev == dev) {
+			int fd;
+			
+			fd=dev_map[d].dev;
+			dev_map[d].dev=dev_map[d].fd=0;
+			
+			fsync(fd);
+			ioctl(fd, BLKFLSBUF, 0);
+			close(fd);
+			
+			return;
+		}
+
+	fprintf(stderr, "%s: device_close: device %Ld is not open\n",
+			progname, dev);
+	ASSERT(0);
+	exit(1);
+}
+
+
+/*
+ * libxfs initialization.
+ * Caller gets a 0 on failure (and we print a message), 1 on success.
+ */
+int
+libxfs_init(libxfs_init_t *a)
+{
+	char		*blockfile;
+	char		curdir[MAXPATHLEN];
+	char		*dname;
+	char		dpath[25];
+	int		fd;
+	char		*logname;
+	char		logpath[25];
+	int		needcd;
+	char		*rawfile;
+	char		*rtname;
+	char		rtpath[25];
+	int		rval = 0;
+	int		readonly;
+	int		inactive;
+	struct stat64	stbuf;
+
+	dpath[0] = logpath[0] = rtpath[0] = '\0';
+	dname = a->dname;
+	logname = a->logname;
+	rtname = a->rtname;
+	a->ddev = a->logdev = a->rtdev = 0;
+	a->dfd = a->logfd = a->rtfd = -1;
+	a->dsize = a->logBBsize = a->logBBstart = a->rtsize = 0;
+
+	(void)getcwd(curdir,MAXPATHLEN);
+	needcd = 0;
+	fd = -1;
+	readonly = (a->isreadonly & LIBXFS_ISREADONLY);
+	inactive = (a->isreadonly & LIBXFS_ISINACTIVE);
+	if (a->volname) {
+		if (stat64(a->volname, &stbuf) < 0) {
+			perror(a->volname);
+			goto done;
+		}
+		if (!(rawfile = findrawpath(a->volname))) {
+			fprintf(stderr, "%s: "
+				"can't find a character device matching %s\n",
+				progname, a->volname);
+			goto done;
+		}
+		if (!(blockfile = findblockpath(a->volname))) {
+			fprintf(stderr, "%s: "
+				"can't find a block device matching %s\n",
+				progname, a->volname);
+			goto done;
+		}
+		if (!readonly && !inactive && check_ismounted(
+					a->volname, blockfile, 1))
+			goto done;
+		if (inactive && check_isactive(
+					a->volname, blockfile, readonly))
+			goto done;
+		needcd = 1;
+		fd = open(rawfile, O_RDONLY);
+#ifdef HAVE_VOLUME_MANAGER
+		xlv_getdev_t getdev;
+		if (ioctl(fd, DIOCGETVOLDEV, &getdev) < 0)
+#else
+		if (1)
+#endif
+		{
+			if (a->notvolok) {
+				dname = a->dname = a->volname;
+				a->volname = NULL;
+				goto voldone;
+			}
+			fprintf(stderr, "%s: "
+				"%s is not a volume device name\n",
+				progname, a->volname);
+			if (a->notvolmsg)
+				fprintf(stderr, a->notvolmsg, a->volname);
+			goto done;
+		}
+#ifdef HAVE_VOLUME_MANAGER
+		if (getdev.data_subvol_dev && dname) {
+			fprintf(stderr, "%s: "
+				"%s has a data subvolume, cannot specify %s\n",
+				progname, a->volname, dname);
+			goto done;
+		}
+		if (getdev.log_subvol_dev && logname) {
+			fprintf(stderr, "%s: "
+				"%s has a log subvolume, cannot specify %s\n",
+				progname, a->volname, logname);
+			goto done;
+		}
+		if (getdev.rt_subvol_dev && rtname) {
+			fprintf(stderr, "%s: %s has a realtime subvolume, "
+				"cannot specify %s\n",
+				progname, a->volname, rtname);
+			goto done;
+		}
+		if (!dname && getdev.data_subvol_dev) {
+			strcpy(dpath, "/tmp/libxfsdXXXXXX");
+			(void)mktemp(dpath);
+			if (mknod(dpath, S_IFCHR | 0600,
+				  getdev.data_subvol_dev) < 0) {
+				fprintf(stderr, "%s: mknod failed: %s\n",
+					progname, strerror(errno));
+				goto done;
+			}
+			dname = dpath;
+		}
+		if (!logname && getdev.log_subvol_dev) {
+			strcpy(logpath, "/tmp/libxfslXXXXXX");
+			(void)mktemp(logpath);
+			if (mknod(logpath, S_IFCHR | 0600,
+				  getdev.log_subvol_dev) < 0) {
+				fprintf(stderr, "%s: mknod failed: %s\n",
+					progname, strerror(errno));
+				goto done;
+			}
+			logname = logpath;
+		}
+		if (!rtname && getdev.rt_subvol_dev) {
+			strcpy(rtpath, "/tmp/libxfsrXXXXXX");
+			(void)mktemp(rtpath);
+			if (mknod(rtpath, S_IFCHR | 0600,
+				  getdev.rt_subvol_dev) < 0) {
+				fprintf(stderr, "%s: mknod failed: %s\n",
+					progname, strerror(errno));
+				goto done;
+			}
+			rtname = rtpath;
+		}
+#endif
+	}
+voldone:
+	if (dname) {
+		if (dname[0] != '/' && needcd)
+			chdir(curdir);
+		if (a->disfile) {
+			a->ddev= libxfs_device_open(dname, a->dcreat, readonly);
+			a->dfd = libxfs_device_to_fd(a->ddev);
+		} else {
+			if (stat64(dname, &stbuf) < 0) {
+				fprintf(stderr, "%s: stat64 failed on %s: %s\n",
+					progname, dname, strerror(errno));
+				goto done;
+			}
+			if (!(rawfile = findrawpath(dname))) {
+				fprintf(stderr, "%s: can't find a char device "
+					"matching %s\n", progname, dname);
+				goto done;
+			}
+			if (!(blockfile = findblockpath(dname))) {
+				fprintf(stderr, "%s: can't find a block device "
+					"matching %s\n", progname, dname);
+				goto done;
+			}
+			if (!readonly && !inactive && check_ismounted(
+						dname, blockfile, 1))
+				goto done;
+			if (inactive && check_isactive(
+						dname, blockfile, readonly))
+				goto done;
+			a->ddev = libxfs_device_open(rawfile,
+					a->dcreat, readonly);
+			a->dfd = libxfs_device_to_fd(a->ddev);
+			a->dsize = findsize(rawfile);
+		}
+		needcd = 1;
+	} else
+		a->dsize = 0;
+	if (logname) {
+		if (logname[0] != '/' && needcd)
+			chdir(curdir);
+		if (a->lisfile) {
+			a->logdev = libxfs_device_open(logname,
+					a->lcreat, readonly);
+			a->logfd = libxfs_device_to_fd(a->logdev);
+		} else {
+			if (stat64(logname, &stbuf) < 0) {
+				fprintf(stderr, "%s: stat64 failed on %s: %s\n",
+					progname, logname, strerror(errno));
+				goto done;
+			}
+			if (!(rawfile = findrawpath(logname))) {
+				fprintf(stderr, "%s: can't find a char device "
+					"matching %s\n", progname, logname);
+				goto done;
+			}
+			if (!(blockfile = findblockpath(logname))) {
+				fprintf(stderr, "%s: can't find a block device "
+					"matching %s\n", progname, logname);
+				goto done;
+			}
+			if (!readonly && !inactive && check_ismounted(
+						logname, blockfile, 1))
+				goto done;
+			else if (inactive && check_isactive(
+						logname, blockfile, readonly))
+				goto done;
+			a->logdev = libxfs_device_open(rawfile,
+					a->lcreat, readonly);
+			a->logfd = libxfs_device_to_fd(a->logdev);
+			a->logBBsize = findsize(rawfile);
+		}
+		needcd = 1;
+	} else
+		a->logBBsize = 0;
+	if (rtname) {
+		if (rtname[0] != '/' && needcd)
+			chdir(curdir);
+		if (a->risfile) {
+			a->rtdev = libxfs_device_open(rtname,
+					a->rcreat, readonly);
+			a->rtfd = libxfs_device_to_fd(a->rtdev);
+		} else {
+			if (stat64(rtname, &stbuf) < 0) {
+				fprintf(stderr, "%s: stat64 failed on %s: %s\n",
+					progname, rtname, strerror(errno));
+				goto done;
+			}
+			if (!(rawfile = findrawpath(rtname))) {
+				fprintf(stderr, "%s: can't find a char device "
+					"matching %s\n", progname, rtname);
+				goto done;
+			}
+			if (!(blockfile = findblockpath(rtname))) {
+				fprintf(stderr, "%s: can't find a block device "
+					"matching %s\n", progname, rtname);
+				goto done;
+			}
+			if (!readonly && !inactive && check_ismounted(
+						rtname, blockfile, 1))
+				goto done;
+			if (inactive && check_isactive(
+						rtname, blockfile, readonly))
+				goto done;
+			a->rtdev = libxfs_device_open(rawfile,
+					a->rcreat, readonly);
+			a->rtfd = libxfs_device_to_fd(a->rtdev);
+			a->rtsize = findsize(rawfile);
+		}
+		needcd = 1;
+	} else
+		a->rtsize = 0;
+	if (a->dsize < 0) {
+		fprintf(stderr, "%s: can't get size for data subvolume\n",
+			progname);
+		goto done;
+	}
+	if (a->logBBsize < 0) {
+		fprintf(stderr, "%s: can't get size for log subvolume\n",
+			progname);
+		goto done;
+	}
+	if (a->rtsize < 0) {
+		fprintf(stderr, "%s: can't get size for realtime subvolume\n",
+			progname);
+		goto done;
+	}
+	if (needcd)
+		chdir(curdir);
+	rval = 1;
+done:
+	if (dpath[0])
+		unlink(dpath);
+	if (logpath[0])
+		unlink(logpath);
+	if (rtpath[0])
+		unlink(rtpath);
+	if (fd >= 0)
+		close(fd);
+	if (!rval && a->ddev)
+		libxfs_device_close(a->ddev);
+	if (!rval && a->logdev)
+		libxfs_device_close(a->logdev);
+	if (!rval && a->rtdev)
+		libxfs_device_close(a->rtdev);
+	return rval;
+}
+
+
+/*
+ * Initialize/destroy all of the zone allocators we use.
+ */
+static void
+manage_zones(int release)
+{
+	extern xfs_zone_t	*xfs_ili_zone;
+	extern xfs_zone_t	*xfs_inode_zone;
+	extern xfs_zone_t	*xfs_ifork_zone;
+	extern xfs_zone_t	*xfs_dabuf_zone;
+	extern xfs_zone_t	*xfs_buf_item_zone;
+	extern xfs_zone_t	*xfs_da_state_zone;
+	extern xfs_zone_t	*xfs_btree_cur_zone;
+	extern xfs_zone_t	*xfs_bmap_free_item_zone;
+	extern void		xfs_dir_startup();
+
+	if (release) {	/* free zone allocation */
+		libxfs_free(xfs_inode_zone);
+		libxfs_free(xfs_ifork_zone);
+		libxfs_free(xfs_dabuf_zone);
+		libxfs_free(xfs_buf_item_zone);
+		libxfs_free(xfs_da_state_zone);
+		libxfs_free(xfs_btree_cur_zone);
+		libxfs_free(xfs_bmap_free_item_zone);
+		return;
+	}
+	/* otherwise initialise zone allocation */
+	xfs_inode_zone = libxfs_zone_init(sizeof(xfs_inode_t), "xfs_inode");
+	xfs_ifork_zone = libxfs_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+	xfs_dabuf_zone = libxfs_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+	xfs_ili_zone = libxfs_zone_init(
+			sizeof(xfs_inode_log_item_t), "xfs_inode_log_item");
+	xfs_buf_item_zone = libxfs_zone_init(
+			sizeof(xfs_buf_log_item_t), "xfs_buf_log_item");
+	xfs_da_state_zone = libxfs_zone_init(
+			sizeof(xfs_da_state_t), "xfs_da_state");
+	xfs_btree_cur_zone = libxfs_zone_init(
+			sizeof(xfs_btree_cur_t), "xfs_btree_cur");
+	xfs_bmap_free_item_zone = libxfs_zone_init(
+			sizeof(xfs_bmap_free_item_t), "xfs_bmap_free_item");
+	xfs_dir_startup();
+}
+
+/*
+ * Get the bitmap and summary inodes into the mount structure
+ * at mount time.
+ */
+static int
+rtmount_inodes(xfs_mount_t *mp)
+{
+	int		error;
+	xfs_sb_t	*sbp;
+
+	sbp = &mp->m_sb;
+	if (sbp->sb_rbmino == NULLFSINO)
+		return 0;
+	error = libxfs_iread(mp, NULL, sbp->sb_rbmino, &mp->m_rbmip, 0);
+	if (error) {
+		fprintf(stderr, "%s: cannot read realtime bitmap inode (%d)\n",
+			progname, error);
+		return error;
+	}
+	ASSERT(mp->m_rbmip != NULL);
+	ASSERT(sbp->sb_rsumino != NULLFSINO);
+	error = libxfs_iread(mp, NULL, sbp->sb_rsumino, &mp->m_rsumip, 0);
+	if (error) {
+		fprintf(stderr, "%s: cannot read realtime summary inode (%d)\n",
+			progname, error);
+		return error;
+	}
+	ASSERT(mp->m_rsumip != NULL);
+	return 0;
+}
+
+/*
+ * Mount structure initialization, provides a filled-in xfs_mount_t
+ * such that the numerous XFS_* macros can be used.  If dev is zero,
+ * no IO will be performed (no size checks, read root inodes).
+ */
+xfs_mount_t *
+libxfs_mount(
+	xfs_mount_t	*mp,
+	xfs_sb_t	*sb,
+	dev_t		dev,
+	dev_t		logdev,
+	dev_t		rtdev,
+	int		rrootinos)
+{
+	xfs_daddr_t	d;
+	xfs_buf_t	*bp;
+	xfs_sb_t	*sbp;
+	size_t		size;
+	int		error;
+
+	mp->m_dev = dev;
+	mp->m_rtdev = rtdev;
+	mp->m_logdev = logdev;
+	mp->m_sb = *sb;
+	sbp = &(mp->m_sb);
+	manage_zones(0);
+
+	libxfs_mount_common(mp, sb);
+
+	libxfs_alloc_compute_maxlevels(mp);
+	libxfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
+	libxfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
+	libxfs_ialloc_compute_maxlevels(mp);
+
+	if (sbp->sb_imax_pct) {
+		/* Make sure the maximum inode count is a multiple of the
+		 * units we allocate inodes in.
+		 */
+		mp->m_maxicount = (sbp->sb_dblocks * sbp->sb_imax_pct) / 100;
+		mp->m_maxicount = ((mp->m_maxicount / mp->m_ialloc_blks) *
+				  mp->m_ialloc_blks)  << sbp->sb_inopblog;
+	} else
+		mp->m_maxicount = 0;
+
+	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+
+	/*
+	 * Set whether we're using inode alignment.
+	 */
+	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
+	    mp->m_sb.sb_inoalignmt >=
+	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
+	else
+		mp->m_inoalign_mask = 0;
+	/*
+	 * If we are using stripe alignment, check whether
+	 * the stripe unit is a multiple of the inode alignment
+	 */
+	if (   mp->m_dalign
+	    && mp->m_inoalign_mask && !(mp->m_dalign & mp->m_inoalign_mask))
+		mp->m_sinoalign = mp->m_dalign;
+	else
+		mp->m_sinoalign = 0;
+
+	/*
+	 * Check that the data (and log if separate) are an ok size.
+	 */
+	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
+		fprintf(stderr, "%s: size check failed\n", progname);
+		return NULL;
+	}
+
+	/* Initialize the appropriate directory manager */
+	if (XFS_SB_VERSION_HASDIRV2(sbp))
+		libxfs_dir2_mount(mp);
+	else
+		libxfs_dir_mount(mp);
+
+	/* Initialize the precomputed transaction reservations values */
+	libxfs_trans_init(mp);
+
+	if (dev == 0)	/* maxtrres, we have no device so leave now */
+		return mp;
+
+	bp = libxfs_readbuf(mp->m_dev, d - 1, 1, 0);
+	if (bp == NULL) {
+		fprintf(stderr, "%s: data size check failed\n", progname);
+		return NULL;
+	}
+	libxfs_putbuf(bp);
+
+	if (mp->m_logdev && mp->m_logdev != mp->m_dev) {
+		d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
+		if ( (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) ||
+		     (!(bp = libxfs_readbuf(mp->m_logdev, d - 1, 1, 1)))) {
+			fprintf(stderr, "%s: log size checks failed\n",
+					progname);
+			return NULL;
+		}
+		libxfs_putbuf(bp);
+	}
+
+	/* Initialize realtime fields in the mount structure */
+	if (libxfs_rtmount_init(mp)) {
+		fprintf(stderr, "%s: real-time device init failed\n", progname);
+		return NULL;
+	}
+
+	/* Allocate and initialize the per-ag data */
+	size = sbp->sb_agcount * sizeof(xfs_perag_t);
+	if ((mp->m_perag = calloc(size, 1)) == NULL) {
+		fprintf(stderr, "%s: failed to alloc %d bytes: %s\n",
+			progname, size, strerror(errno));
+		exit(1);
+	}
+
+	/*
+	 * mkfs calls mount before the root inode is allocated.
+	 */
+	if (rrootinos && sbp->sb_rootino != NULLFSINO) {
+		error = libxfs_iread(mp, NULL, sbp->sb_rootino,
+				&mp->m_rootip, 0);
+		if (error) {
+			fprintf(stderr, "%s: cannot read root inode (%d)\n",
+				progname, error);
+			return NULL;
+		}
+		ASSERT(mp->m_rootip != NULL);
+	}
+	if (rrootinos && rtmount_inodes(mp))
+		return NULL;
+	return mp;
+}
+
+/*
+ * Release any resourse obtained during a mount.
+ */
+void
+libxfs_umount(xfs_mount_t *mp)
+{
+	manage_zones(1);
+	free(mp->m_perag);
+}
diff --git a/libxfs/logitem.c b/libxfs/logitem.c
new file mode 100644
index 000000000..b26106088
--- /dev/null
+++ b/libxfs/logitem.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+xfs_zone_t	*xfs_buf_item_zone;
+xfs_zone_t	*xfs_ili_zone;		/* inode log item zone */
+
+
+/*
+ * This is called to add the given log item to the transaction's
+ * list of log items.  It must find a free log item descriptor
+ * or allocate a new one and add the item to that descriptor.
+ * The function returns a pointer to item descriptor used to point
+ * to the new item.  The log item will now point to its new descriptor
+ * with its li_desc field.
+ */
+xfs_log_item_desc_t *
+xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
+{
+	xfs_log_item_desc_t	*lidp;
+	xfs_log_item_chunk_t	*licp;
+	int			i;
+
+	/*
+	 * If there are no free descriptors, allocate a new chunk
+	 * of them and put it at the front of the chunk list.
+	 */
+	if (tp->t_items_free == 0) {
+		licp = (xfs_log_item_chunk_t*)
+		       kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP);
+		ASSERT(licp != NULL);
+		/*
+		 * Initialize the chunk, and then
+		 * claim the first slot in the newly allocated chunk.
+		 */
+		XFS_LIC_INIT(licp);
+		XFS_LIC_CLAIM(licp, 0);
+		licp->lic_unused = 1;
+		XFS_LIC_INIT_SLOT(licp, 0);
+		lidp = XFS_LIC_SLOT(licp, 0);
+
+		/*
+		 * Link in the new chunk and update the free count.
+		 */
+		licp->lic_next = tp->t_items.lic_next;
+		tp->t_items.lic_next = licp;
+		tp->t_items_free = XFS_LIC_NUM_SLOTS - 1;
+
+		/*
+		 * Initialize the descriptor and the generic portion
+		 * of the log item.
+		 *
+		 * Point the new slot at this item and return it.
+		 * Also point the log item at its currently active
+		 * descriptor and set the item's mount pointer.
+		 */
+		lidp->lid_item = lip;
+		lidp->lid_flags = 0;
+		lidp->lid_size = 0;
+		lip->li_desc = lidp;
+		lip->li_mountp = tp->t_mountp;
+		return (lidp);
+	}
+
+	/*
+	 * Find the free descriptor. It is somewhere in the chunklist
+	 * of descriptors.
+	 */
+	licp = &tp->t_items;
+	while (licp != NULL) {
+		if (XFS_LIC_VACANCY(licp)) {
+			if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
+				i = licp->lic_unused;
+				ASSERT(XFS_LIC_ISFREE(licp, i));
+				break;
+			}
+			for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
+				if (XFS_LIC_ISFREE(licp, i))
+					break;
+			}
+			ASSERT(i <= XFS_LIC_MAX_SLOT);
+			break;
+		}
+		licp = licp->lic_next;
+	}
+	ASSERT(licp != NULL);
+	/*
+	 * If we find a free descriptor, claim it,
+	 * initialize it, and return it.
+	 */
+	XFS_LIC_CLAIM(licp, i);
+	if (licp->lic_unused <= i) {
+		licp->lic_unused = i + 1;
+		XFS_LIC_INIT_SLOT(licp, i);
+	}
+	lidp = XFS_LIC_SLOT(licp, i);
+	tp->t_items_free--;
+	lidp->lid_item = lip;
+	lidp->lid_flags = 0;
+	lidp->lid_size = 0;
+	lip->li_desc = lidp;
+	lip->li_mountp = tp->t_mountp;
+	return (lidp);
+}
+
+/*
+ * Free the given descriptor.
+ * 
+ * This requires setting the bit in the chunk's free mask corresponding
+ * to the given slot.
+ */
+void
+xfs_trans_free_item(xfs_trans_t	*tp, xfs_log_item_desc_t *lidp)
+{
+	uint			slot;
+	xfs_log_item_chunk_t	*licp;
+	xfs_log_item_chunk_t	**licpp;
+
+	slot = XFS_LIC_DESC_TO_SLOT(lidp);
+	licp = XFS_LIC_DESC_TO_CHUNK(lidp);
+	XFS_LIC_RELSE(licp, slot);
+	lidp->lid_item->li_desc = NULL;
+	tp->t_items_free++;
+
+	/*
+	 * If there are no more used items in the chunk and this is not
+	 * the chunk embedded in the transaction structure, then free
+	 * the chunk. First pull it from the chunk list and then
+	 * free it back to the heap.  We didn't bother with a doubly
+	 * linked list here because the lists should be very short
+	 * and this is not a performance path.  It's better to save
+	 * the memory of the extra pointer.
+	 *
+	 * Also decrement the transaction structure's count of free items
+	 * by the number in a chunk since we are freeing an empty chunk.
+	 */
+	if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) {
+		licpp = &(tp->t_items.lic_next);
+		while (*licpp != licp) {
+			ASSERT(*licpp != NULL);
+			licpp = &((*licpp)->lic_next);
+		}
+		*licpp = licp->lic_next;
+		kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+		tp->t_items_free -= XFS_LIC_NUM_SLOTS;
+	}
+}
+
+/*
+ * This is called to find the descriptor corresponding to the given
+ * log item.  It returns a pointer to the descriptor.
+ * The log item MUST have a corresponding descriptor in the given
+ * transaction.  This routine does not return NULL, it panics.
+ *
+ * The descriptor pointer is kept in the log item's li_desc field.
+ * Just return it.
+ */
+xfs_log_item_desc_t *
+xfs_trans_find_item(xfs_trans_t	*tp, xfs_log_item_t *lip)
+{
+	ASSERT(lip->li_desc != NULL);
+
+	return (lip->li_desc);
+}
+
+/*
+ * This is called to unlock all of the items of a transaction and to free
+ * all the descriptors of that transaction.
+ *
+ * It walks the list of descriptors and unlocks each item.  It frees
+ * each chunk except that embedded in the transaction as it goes along.
+ */
+void
+xfs_trans_free_items(
+	xfs_trans_t	*tp,
+	int		flags)
+{
+	xfs_log_item_chunk_t	*licp;
+	xfs_log_item_chunk_t	*next_licp;
+	int			abort;
+
+	abort = flags & XFS_TRANS_ABORT;
+	licp = &tp->t_items;
+	/*
+	 * Special case the embedded chunk so we don't free it below.
+	 */
+	if (!XFS_LIC_ARE_ALL_FREE(licp)) {
+		(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
+		XFS_LIC_ALL_FREE(licp);
+		licp->lic_unused = 0;
+	}
+	licp = licp->lic_next;
+
+	/*
+	 * Unlock each item in each chunk and free the chunks.
+	 */
+	while (licp != NULL) {
+		ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
+		(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
+		next_licp = licp->lic_next;
+		kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+		licp = next_licp;
+	}
+
+	/*
+	 * Reset the transaction structure's free item count.
+	 */
+	tp->t_items_free = XFS_LIC_NUM_SLOTS;
+	tp->t_items.lic_next = NULL;
+}
+
+/*
+ * Check to see if a buffer matching the given parameters is already
+ * a part of the given transaction.  Only check the first, embedded
+ * chunk, since we don't want to spend all day scanning large transactions.
+ */
+STATIC xfs_buf_t *
+xfs_trans_buf_item_match(
+	xfs_trans_t	*tp,
+	buftarg_t	*target,
+	xfs_daddr_t	blkno,
+	int		len)
+{
+	xfs_log_item_chunk_t	*licp;
+	xfs_log_item_desc_t	*lidp;
+	xfs_buf_log_item_t	*blip;
+	xfs_buf_t			*bp;
+	int			i;
+
+#ifdef LI_DEBUG
+	fprintf(stderr, "buf_item_match (fast) log items for xact %p\n", tp);
+#endif
+
+	bp = NULL;
+	len = BBTOB(len);
+	licp = &tp->t_items;
+	if (!XFS_LIC_ARE_ALL_FREE(licp)) {
+		for (i = 0; i < licp->lic_unused; i++) {
+			/*
+			 * Skip unoccupied slots.
+			 */
+			if (XFS_LIC_ISFREE(licp, i)) {
+				continue;
+			}
+
+			lidp = XFS_LIC_SLOT(licp, i);
+			blip = (xfs_buf_log_item_t *)lidp->lid_item;
+#ifdef LI_DEBUG
+			fprintf(stderr,
+				"\tfound log item, xact %p, blip=%p (%d/%d)\n",
+				tp, blip, i, licp->lic_unused);
+#endif
+			if (blip->bli_item.li_type != XFS_LI_BUF) {
+				continue;
+			}
+
+			bp = blip->bli_buf;
+#ifdef LI_DEBUG
+			fprintf(stderr,
+			"\tfound buf %p log item, xact %p, blip=%p (%d)\n",
+				bp, tp, blip, i);
+#endif
+			if ((XFS_BUF_TARGET(bp) == target->dev) &&
+			    (XFS_BUF_ADDR(bp) == blkno) &&
+			    (XFS_BUF_COUNT(bp) == len)) {
+				/*
+				 * We found it.  Break out and
+				 * return the pointer to the buffer.
+				 */
+#ifdef LI_DEBUG
+				fprintf(stderr,
+					"\tfound REAL buf log item, bp=%p\n",
+					bp);
+#endif
+				break;
+			} else {
+				bp = NULL;
+			}
+		}
+	}
+#ifdef LI_DEBUG
+	if (!bp) fprintf(stderr, "\tfast search - got nothing\n");
+#endif
+	return bp;
+}
+
+/*
+ * Check to see if a buffer matching the given parameters is already
+ * a part of the given transaction.  Check all the chunks, we
+ * want to be thorough.
+ */
+STATIC xfs_buf_t *
+xfs_trans_buf_item_match_all(
+	xfs_trans_t	*tp,
+	buftarg_t	*target,
+	xfs_daddr_t	blkno,
+	int		len)
+{
+	xfs_log_item_chunk_t	*licp;
+	xfs_log_item_desc_t	*lidp;
+	xfs_buf_log_item_t	*blip;
+	xfs_buf_t			*bp;
+	int			i;
+
+#ifdef LI_DEBUG
+	fprintf(stderr, "buf_item_match_all (slow) log items for xact %p\n",
+		tp);
+#endif
+
+	bp = NULL;
+	len = BBTOB(len);
+	for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
+		if (XFS_LIC_ARE_ALL_FREE(licp)) {
+			ASSERT(licp == &tp->t_items);
+			ASSERT(licp->lic_next == NULL);
+			return NULL;
+		}
+		for (i = 0; i < licp->lic_unused; i++) {
+			/*
+			 * Skip unoccupied slots.
+			 */
+			if (XFS_LIC_ISFREE(licp, i)) {
+				continue;
+			}
+
+			lidp = XFS_LIC_SLOT(licp, i);
+			blip = (xfs_buf_log_item_t *)lidp->lid_item;
+#ifdef LI_DEBUG
+			fprintf(stderr,
+				"\tfound log item, xact %p, blip=%p (%d/%d)\n",
+				tp, blip, i, licp->lic_unused);
+#endif
+			if (blip->bli_item.li_type != XFS_LI_BUF) {
+				continue;
+			}
+
+			bp = blip->bli_buf;
+			ASSERT(bp);
+			ASSERT(XFS_BUF_ADDR(bp));
+#ifdef LI_DEBUG
+			fprintf(stderr,
+			"\tfound buf %p log item, xact %p, blip=%p (%d)\n",
+				bp, tp, blip, i);
+#endif
+			if ((XFS_BUF_TARGET(bp) == target->dev) &&
+			    (XFS_BUF_ADDR(bp) == blkno) &&
+			    (XFS_BUF_COUNT(bp) == len)) {
+				/*
+				 * We found it.  Break out and
+				 * return the pointer to the buffer.
+				 */
+#ifdef LI_DEBUG
+				fprintf(stderr,
+					"\tfound REAL buf log item, bp=%p\n",
+					bp);
+#endif
+				return bp;
+			}
+		}
+	}
+#ifdef LI_DEBUG
+	if (!bp) fprintf(stderr, "slow search - got nothing\n");
+#endif
+	return NULL;
+}
+
+/*
+ * Allocate a new buf log item to go with the given buffer.
+ * Set the buffer's b_fsprivate field to point to the new
+ * buf log item.  If there are other item's attached to the
+ * buffer (see xfs_buf_attach_iodone() below), then put the
+ * buf log item at the front.
+ */
+void
+xfs_buf_item_init(
+	xfs_buf_t	*bp,
+	xfs_mount_t	*mp)
+{
+	xfs_log_item_t		*lip;
+	xfs_buf_log_item_t	*bip;
+
+#ifdef LI_DEBUG
+	fprintf(stderr, "buf_item_init for buffer %p\n", bp);
+#endif
+
+	/*
+	 * Check to see if there is already a buf log item for
+	 * this buffer.  If there is, it is guaranteed to be
+	 * the first.  If we do already have one, there is
+	 * nothing to do here so return.
+	 */
+	if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp)
+		XFS_BUF_SET_FSPRIVATE3(bp, mp);
+	XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
+	if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+		if (lip->li_type == XFS_LI_BUF) {
+#ifdef LI_DEBUG
+			fprintf(stderr,
+				"reused buf item %p for pre-logged buffer %p\n",
+				lip, bp);
+#endif
+			return;
+		}
+	}
+
+	bip = (xfs_buf_log_item_t *)kmem_zone_zalloc(xfs_buf_item_zone,
+						    KM_SLEEP);
+#ifdef LI_DEBUG
+	fprintf(stderr, "adding buf item %p for not-logged buffer %p\n",
+		bip, bp);
+#endif
+	bip->bli_item.li_type = XFS_LI_BUF;
+	bip->bli_item.li_mountp = mp;
+	bip->bli_buf = bp;
+	bip->bli_format.blf_type = XFS_LI_BUF;
+	bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
+	bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
+	XFS_BUF_SET_FSPRIVATE(bp, bip);
+}
+
+
+/*
+ * Mark bytes first through last inclusive as dirty in the buf
+ * item's bitmap.
+ */
+void
+xfs_buf_item_log(
+	xfs_buf_log_item_t	*bip,
+	uint			first,
+	uint			last)
+{
+	/*
+	 * Mark the item as having some dirty data for
+	 * quick reference in xfs_buf_item_dirty.
+	 */
+	bip->bli_flags |= XFS_BLI_DIRTY;
+}
+
+/*
+ * Initialize the inode log item for a newly allocated (in-core) inode.
+ */
+void
+xfs_inode_item_init(
+	xfs_inode_t	*ip,
+	xfs_mount_t	*mp)
+{
+	xfs_inode_log_item_t	*iip;
+
+	ASSERT(ip->i_itemp == NULL);
+	iip = ip->i_itemp = (xfs_inode_log_item_t *)
+			kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
+#ifdef LI_DEBUG
+	fprintf(stderr, "inode_item_init for inode %llu, iip=%p\n",
+		ip->i_ino, iip);
+#endif
+
+	iip->ili_item.li_type = XFS_LI_INODE;
+	iip->ili_item.li_mountp = mp;
+	iip->ili_inode = ip;
+	iip->ili_format.ilf_type = XFS_LI_INODE;
+	iip->ili_format.ilf_ino = ip->i_ino;
+	iip->ili_format.ilf_blkno = ip->i_blkno;
+	iip->ili_format.ilf_len = ip->i_len;
+	iip->ili_format.ilf_boffset = ip->i_boffset;
+}
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
new file mode 100644
index 000000000..06fb1a5e2
--- /dev/null
+++ b/libxfs/rdwr.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <malloc.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <xfs_log.h>
+#include <xfs_log_priv.h>
+
+#define BBTOOFF64(bbs)  (((xfs_off_t)(bbs)) << BBSHIFT)
+#define BDSTRAT_SIZE    (256 * 1024)
+
+void
+libxfs_device_zero(dev_t dev, xfs_daddr_t start, uint len)
+{
+	xfs_daddr_t     bno;
+	uint		nblks;
+	int		size;
+	int		fd;
+	char		*z;
+
+	size = BDSTRAT_SIZE <= BBTOB(len) ? BDSTRAT_SIZE : BBTOB(len);
+	if ((z = memalign(getpagesize(), size)) == NULL) {
+		fprintf(stderr, "%s: device_zero can't memalign %d bytes: %s\n",
+			progname, size, strerror(errno));
+		exit(1);
+	}
+	bzero(z, size);
+	fd = libxfs_device_to_fd(dev);
+	for (bno = start; bno < start + len; ) {
+		nblks = (uint)BTOBB(size);
+		if (bno + nblks > start + len)
+			nblks = (uint)(start + len - bno);
+		if (lseek64(fd, BBTOOFF64(bno), SEEK_SET) < 0) {
+			fprintf(stderr, "%s: device_zero lseek64 failed: %s\n",
+				progname, strerror(errno));
+			exit(1);
+		}
+		if (write(fd, z, BBTOB(nblks)) < BBTOB(nblks)) {
+			fprintf(stderr, "%s: device_zero write failed: %s\n",
+				progname, strerror(errno));
+			exit(1);
+		}
+		bno += nblks;
+	}
+	free(z);
+}
+
+int
+libxfs_log_clear(
+        dev_t       device, 
+        xfs_daddr_t start,
+        uint        length,
+        uuid_t      *fs_uuid, 
+        int         fmt)
+{
+	xfs_buf_t		*buf;
+        xlog_rec_header_t       *head;
+        xlog_op_header_t        *op;
+        /* the data section must be 32 bit size aligned */
+        struct {
+            __uint16_t magic;
+            __uint16_t pad1;
+            __uint32_t pad2; /* may as well make it 64 bits */
+        } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
+                
+	if (!device || !fs_uuid)
+		return -EINVAL;
+        
+        /* first zero the log */
+        libxfs_device_zero(device, start, length);   
+                   
+        /* then write a log record header */
+        buf = libxfs_getbuf(device, start, 1);
+        if (!buf) 
+            return -1;
+        
+        memset(XFS_BUF_PTR(buf), 0, BBSIZE);
+	head = (xlog_rec_header_t *)XFS_BUF_PTR(buf);
+        
+        /* note that oh_tid actually contains the cycle number
+         * and the tid is stored in h_cycle_data[0] - that's the
+         * way things end up on disk.
+         */
+        
+	INT_SET(head->h_magicno,        ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM);
+	INT_SET(head->h_cycle,          ARCH_CONVERT, 1);
+	INT_SET(head->h_version,        ARCH_CONVERT, 1);
+	INT_SET(head->h_len,            ARCH_CONVERT, 20);
+	INT_SET(head->h_chksum,         ARCH_CONVERT, 0);
+	INT_SET(head->h_prev_block,     ARCH_CONVERT, -1);
+	INT_SET(head->h_num_logops,     ARCH_CONVERT, 1);
+	INT_SET(head->h_cycle_data[0],  ARCH_CONVERT, 0xb0c0d0d0);
+	INT_SET(head->h_fmt,            ARCH_CONVERT, fmt);
+        
+        ASSIGN_ANY_LSN(head->h_lsn,         1, 0, ARCH_CONVERT);
+        ASSIGN_ANY_LSN(head->h_tail_lsn,    1, 0, ARCH_CONVERT);
+        
+        memcpy(head->h_fs_uuid,  fs_uuid, sizeof(uuid_t));
+        
+        if (libxfs_writebuf(buf, 0))
+            return -1;
+         
+        buf = libxfs_getbuf(device, start + 1, 1);
+        if (!buf) 
+            return -1;
+        
+        /* now a log unmount op */
+        memset(XFS_BUF_PTR(buf), 0, BBSIZE);
+	op = (xlog_op_header_t *)XFS_BUF_PTR(buf);
+	INT_SET(op->oh_tid,             ARCH_CONVERT, 1);
+	INT_SET(op->oh_len,             ARCH_CONVERT, sizeof(magic));
+	INT_SET(op->oh_clientid,        ARCH_CONVERT, XFS_LOG);
+	INT_SET(op->oh_flags,           ARCH_CONVERT, XLOG_UNMOUNT_TRANS);
+	INT_SET(op->oh_res2,            ARCH_CONVERT, 0);
+        
+        /* and the data for this op */
+        
+        memcpy(XFS_BUF_PTR(buf) + sizeof(xlog_op_header_t), 
+                &magic, 
+                sizeof(magic));
+        
+        if (libxfs_writebuf(buf, 0))
+            return -1;
+
+	return 0;
+}
+
+/*
+ * Simple I/O interface
+ */
+
+xfs_buf_t *
+libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
+{
+	xfs_buf_t	*buf;
+	size_t		total;
+
+	total = sizeof(xfs_buf_t) + BBTOB(len);
+	if ((buf = calloc(total, 1)) == NULL) {
+		fprintf(stderr, "%s: buf calloc failed (%d bytes): %s\n",
+			progname, total, strerror(errno));
+		exit(1);
+	}
+	/* by default, we allocate buffer directly after the header */
+	buf->b_blkno = blkno;
+	buf->b_bcount = BBTOB(len);
+	buf->b_dev = device;
+	buf->b_addr = (char *)(&buf->b_addr + 1);	/* must be last field */
+#ifdef IO_DEBUG
+	fprintf(stderr, "getbuf allocated %ubytes, blkno=%llu(%llu), %p\n",
+		BBTOB(len), BBTOOFF64(blkno), blkno, buf);
+#endif
+
+	return(buf);
+}
+
+int
+libxfs_readbufr(dev_t dev, xfs_daddr_t blkno, xfs_buf_t *buf, int len, int die)
+{
+	int	fd = libxfs_device_to_fd(dev);
+
+	buf->b_dev = dev;
+	buf->b_blkno = blkno;
+	ASSERT(BBTOB(len) <= buf->b_bcount);
+
+	if (lseek64(fd, BBTOOFF64(blkno), SEEK_SET) < 0) {
+		fprintf(stderr, "%s: lseek64 to %llu failed: %s\n",
+			progname, BBTOOFF64(blkno), strerror(errno));
+		ASSERT(0);
+		if (die)
+			exit(1);
+		return errno;
+	}
+	if (read(fd, buf->b_addr, BBTOB(len)) < 0) {
+		fprintf(stderr, "%s: read failed: %s\n",
+			progname, strerror(errno));
+		if (die)
+			exit(1);
+		return errno;
+	}
+#ifdef IO_DEBUG
+	fprintf(stderr, "readbufr read %ubytes, blkno=%llu(%llu), %p\n",
+		BBTOB(len), BBTOOFF64(blkno), blkno, buf);
+#endif
+	return 0;
+}
+
+xfs_buf_t *
+libxfs_readbuf(dev_t dev, xfs_daddr_t blkno, int len, int die)
+{
+	xfs_buf_t	*buf;
+	int		error;
+
+	buf = libxfs_getbuf(dev, blkno, len);
+	error = libxfs_readbufr(dev, blkno, buf, len, die);
+	if (error) {
+		libxfs_putbuf(buf);
+		return NULL;
+	}
+	return buf;
+}
+
+xfs_buf_t *
+libxfs_getsb(xfs_mount_t *mp, int die)
+{
+	return libxfs_readbuf(mp->m_dev, XFS_SB_DADDR,
+				XFS_FSB_TO_BB(mp, 1), die);
+}
+
+int
+libxfs_writebuf_int(xfs_buf_t *buf, int die)
+{
+	int	sts;
+	int	fd = libxfs_device_to_fd(buf->b_dev);
+
+	if (lseek64(fd, BBTOOFF64(buf->b_blkno), SEEK_SET) < 0) {
+		fprintf(stderr, "%s: lseek64 to %llu failed: %s\n",
+			progname, BBTOOFF64(buf->b_blkno), strerror(errno));
+		ASSERT(0);
+		if (die)
+			exit(1);
+		return errno;
+	}
+#ifdef IO_DEBUG
+	fprintf(stderr, "writing %ubytes at blkno=%llu(%llu), %p\n",
+		buf->b_bcount, BBTOOFF64(buf->b_blkno), buf->b_blkno, buf);
+#endif
+	sts = write(fd, buf->b_addr, buf->b_bcount);
+	if (sts < 0) {
+		fprintf(stderr, "%s: write failed: %s\n",
+			progname, strerror(errno));
+		ASSERT(0);
+		if (die)
+			exit(1);
+		return errno;
+	}
+	else if (sts != buf->b_bcount) {
+		fprintf(stderr, "%s: error - wrote only %d of %d bytes\n",
+			progname, sts, buf->b_bcount);
+		if (die)
+			exit(1);
+		return EIO;
+	}
+	return 0;
+}
+
+int
+libxfs_writebuf(xfs_buf_t *buf, int die)
+{
+	int error = libxfs_writebuf_int(buf, die);
+	libxfs_putbuf(buf);
+	return error;
+}
+
+void
+libxfs_putbuf(xfs_buf_t *buf)
+{
+	if (buf != NULL) {
+                xfs_buf_log_item_t	*bip; 
+                extern xfs_zone_t       *xfs_buf_item_zone;   
+                    
+	        bip = XFS_BUF_FSPRIVATE(buf, xfs_buf_log_item_t *);
+                
+                if (bip)
+                    libxfs_zone_free(xfs_buf_item_zone, bip);
+#ifdef IO_DEBUG
+		fprintf(stderr, "putbuf released %ubytes, %p\n",
+			buf->b_bcount, buf);
+#endif
+		free(buf);
+		buf = NULL;
+	}
+}
+
+
+/*
+ * Simple memory interface
+ */
+
+xfs_zone_t *
+libxfs_zone_init(int size, char *name)
+{
+	xfs_zone_t	*ptr;
+
+	if ((ptr = malloc(sizeof(xfs_zone_t))) == NULL) {
+		fprintf(stderr, "%s: zone init failed (%s, %d bytes): %s\n",
+			progname, name, sizeof(xfs_zone_t), strerror(errno));
+		exit(1);
+	}
+	ptr->zone_unitsize = size;
+	ptr->zone_name = name;
+#ifdef MEM_DEBUG
+        ptr->allocated = 0;
+	fprintf(stderr, "new zone %p for \"%s\", size=%d\n", ptr, name, size);
+#endif
+	return ptr;
+}
+
+void *
+libxfs_zone_zalloc(xfs_zone_t *z)
+{
+	void	*ptr;
+
+	ASSERT(z != NULL);
+	if ((ptr = calloc(z->zone_unitsize, 1)) == NULL) {
+		fprintf(stderr, "%s: zone calloc failed (%s, %d bytes): %s\n",
+			progname, z->zone_name, z->zone_unitsize,
+			strerror(errno));
+		exit(1);
+	}
+#ifdef MEM_DEBUG
+        z->allocated++;
+	fprintf(stderr, "## zone alloc'd item %p from %s (%d bytes) (%d active)\n", 
+                ptr, z->zone_name,  z->zone_unitsize,
+                z->allocated);
+#endif
+	return ptr;
+}
+
+void
+libxfs_zone_free(xfs_zone_t *z, void *ptr)
+{
+#ifdef MEM_DEBUG
+        z->allocated--;
+	fprintf(stderr, "## zone freed item %p from %s (%d bytes) (%d active)\n", 
+                ptr, z->zone_name, z->zone_unitsize,
+                z->allocated);
+#endif
+	if (ptr != NULL) {
+		free(ptr);
+		ptr = NULL;
+	}
+}
+
+void *
+libxfs_malloc(size_t size)
+{
+	void	*ptr;
+
+	if ((ptr = malloc(size)) == NULL) {
+		fprintf(stderr, "%s: malloc failed (%d bytes): %s\n",
+			progname, size, strerror(errno));
+		exit(1);
+	}
+#ifdef MEM_DEBUG
+	fprintf(stderr, "## malloc'd item %p size %d bytes\n", 
+                ptr, size);
+#endif
+	return ptr;
+}
+
+void
+libxfs_free(void *ptr)
+{
+#ifdef MEM_DEBUG
+	fprintf(stderr, "## freed item %p\n", 
+                ptr);
+#endif
+	if (ptr != NULL) {
+		free(ptr);
+		ptr = NULL;
+	}
+}
+
+void *
+libxfs_realloc(void *ptr, size_t size)
+{
+#ifdef MEM_DEBUG
+        void *optr=ptr;
+#endif
+	if ((ptr = realloc(ptr, size)) == NULL) {
+		fprintf(stderr, "%s: realloc failed (%d bytes): %s\n",
+			progname, size, strerror(errno));
+		exit(1);
+	}
+#ifdef MEM_DEBUG
+	fprintf(stderr, "## realloc'd item %p now %p size %d bytes\n", 
+                optr, ptr, size);
+#endif
+	return ptr;
+}
+
+
+int
+libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags,
+		xfs_inode_t **ipp, xfs_daddr_t bno)
+{
+	xfs_inode_t	*ip;
+	int		error;
+
+	error = libxfs_iread(mp, tp, ino, &ip, bno);
+	if (error)
+		return error;
+	*ipp = ip;
+	return 0;
+}
+
+void
+libxfs_iput(xfs_inode_t *ip, uint lock_flags)
+{
+        extern xfs_zone_t       *xfs_ili_zone;
+	extern xfs_zone_t	*xfs_inode_zone;
+
+	if (ip != NULL) {
+            
+                /* free attached inode log item */
+	        if (ip->i_itemp)
+		        libxfs_zone_free(xfs_ili_zone, ip->i_itemp);
+                ip->i_itemp = NULL;
+                
+		libxfs_zone_free(xfs_inode_zone, ip);
+		ip = NULL;
+	}
+}
+
+/*
+ * libxfs_mod_sb can be used to copy arbitrary changes to the
+ * in-core superblock into the superblock buffer to be logged.
+ *
+ * In user-space, we simply convert to big-endian, and write the
+ * the whole superblock - the in-core changes have all been made
+ * already.
+ */
+void
+libxfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
+{
+	int		fd;
+	xfs_buf_t	*bp;
+	xfs_mount_t	*mp;
+
+	mp = tp->t_mountp;
+	bp = libxfs_getbuf(mp->m_dev, XFS_SB_DADDR, 1);
+	libxfs_xlate_sb(XFS_BUF_PTR(bp), &mp->m_sb, -1, ARCH_CONVERT,
+			XFS_SB_ALL_BITS);
+	libxfs_writebuf(bp, 1);
+}
diff --git a/libxfs/trans.c b/libxfs/trans.c
new file mode 100644
index 000000000..980d69a09
--- /dev/null
+++ b/libxfs/trans.c
@@ -0,0 +1,754 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Simple transaction interface
+ */
+
+xfs_trans_t *
+libxfs_trans_alloc(xfs_mount_t *mp, int type)
+{
+	xfs_trans_t	*ptr;
+
+	if ((ptr = calloc(sizeof(xfs_trans_t), 1)) == NULL) {
+		fprintf(stderr, "%s: xact calloc failed (%d bytes): %s\n",
+			progname, sizeof(xfs_trans_t), strerror(errno));
+		exit(1);
+	}
+	ptr->t_mountp = mp;
+	ptr->t_type = type;
+	ptr->t_items_free = XFS_LIC_NUM_SLOTS;
+	XFS_LIC_INIT(&(ptr->t_items));
+#ifdef XACT_DEBUG
+	fprintf(stderr, "allocated new transaction %p\n", ptr);
+#endif
+	return ptr;
+}
+
+xfs_trans_t *
+libxfs_trans_dup(xfs_trans_t *tp)
+{
+	xfs_trans_t *ptr;
+
+	ptr = libxfs_trans_alloc(tp->t_mountp, tp->t_type);
+#ifdef XACT_DEBUG
+	fprintf(stderr, "duplicated transaction %p (new=%p)\n", tp, ptr);
+#endif
+	return ptr;
+}
+
+int
+libxfs_trans_reserve(xfs_trans_t *tp,
+	uint blocks, uint logspace, uint rtextents, uint flags, uint logcount)
+{
+	xfs_sb_t	*mpsb = &tp->t_mountp->m_sb;
+
+	/*
+	 * Attempt to reserve the needed disk blocks by decrementing
+	 * the number needed from the number available.  This will
+	 * fail if the count would go below zero.
+	 */
+	if (blocks > 0) {
+		if (mpsb->sb_fdblocks < blocks)
+			return ENOSPC;
+	}
+	/* user space, don't need log/RT stuff (preserve the API though) */
+	return 0;
+}
+
+void
+libxfs_trans_cancel(xfs_trans_t *tp, int flags)
+{
+#ifdef XACT_DEBUG
+        xfs_trans_t *otp = tp;
+#endif
+	if (tp != NULL) {
+		xfs_trans_free_items(tp, flags);
+		free(tp);
+		tp = NULL;
+	}
+#ifdef XACT_DEBUG
+	fprintf(stderr, "## cancelled transaction %p\n", otp);
+#endif
+}
+
+int
+libxfs_trans_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino,
+		uint lock_flags, xfs_inode_t **ipp)
+{
+	int			error;
+	xfs_inode_t		*ip;
+	xfs_inode_log_item_t	*iip;
+
+	if (tp == NULL)
+		return libxfs_iread(mp, tp, ino, ipp, 0);
+
+	error = libxfs_iread(mp, tp, ino, &ip, 0);
+	if (error)
+		return error;
+	ASSERT(ip != NULL);
+
+	if (ip->i_itemp == NULL)
+		xfs_inode_item_init(ip, mp);
+	iip = ip->i_itemp;
+	xfs_trans_add_item(tp, (xfs_log_item_t *)(iip));
+
+	/* initialize i_transp so we can find it incore */
+	ip->i_transp = tp;
+
+	*ipp = ip;
+	return 0;
+}
+
+void
+libxfs_trans_iput(xfs_trans_t *tp, xfs_inode_t *ip, uint lock_flags)
+{
+	xfs_inode_log_item_t	*iip;
+	xfs_log_item_desc_t	*lidp;
+
+	if (tp == NULL) {
+		libxfs_iput(ip, lock_flags);
+		return;
+	}
+
+	ASSERT(ip->i_transp == tp);
+	iip = ip->i_itemp;
+	ASSERT(iip != NULL);
+
+	lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)iip);
+	ASSERT(lidp != NULL);
+	ASSERT(lidp->lid_item == (xfs_log_item_t *)iip);
+	ASSERT(!(lidp->lid_flags & XFS_LID_DIRTY));
+	xfs_trans_free_item(tp, lidp);
+
+	libxfs_iput(ip, lock_flags);
+}
+
+void
+libxfs_trans_ijoin(xfs_trans_t *tp, xfs_inode_t *ip, uint lock_flags)
+{
+	xfs_inode_log_item_t	*iip;
+
+	ASSERT(ip->i_transp == NULL);
+	if (ip->i_itemp == NULL)
+		xfs_inode_item_init(ip, ip->i_mount);
+	iip = ip->i_itemp;
+	ASSERT(iip->ili_flags == 0);
+	ASSERT(iip->ili_inode != NULL);
+
+	xfs_trans_add_item(tp, (xfs_log_item_t *)(iip));
+
+	ip->i_transp = tp;
+#ifdef XACT_DEBUG
+	fprintf(stderr, "ijoin'd inode %llu, transaction %p\n", ip->i_ino, tp);
+#endif
+}
+
+void
+libxfs_trans_ihold(xfs_trans_t *tp, xfs_inode_t *ip)
+{
+	ASSERT(ip->i_transp == tp);
+	ASSERT(ip->i_itemp != NULL);
+
+	ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
+#ifdef XACT_DEBUG
+	fprintf(stderr, "ihold'd inode %llu, transaction %p\n", ip->i_ino, tp);
+#endif
+}
+
+void
+libxfs_trans_inode_alloc_buf(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+	xfs_buf_log_item_t	*bip;
+
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+}
+
+/*
+ * This is called to mark the fields indicated in fieldmask as needing
+ * to be logged when the transaction is committed.  The inode must
+ * already be associated with the given transaction.
+ *
+ * The values for fieldmask are defined in xfs_inode_item.h.  We always
+ * log all of the core inode if any of it has changed, and we always log
+ * all of the inline data/extents/b-tree root if any of them has changed.
+ */
+void
+xfs_trans_log_inode(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	uint		flags)
+{
+	xfs_log_item_desc_t	*lidp;
+
+	ASSERT(ip->i_transp == tp);
+	ASSERT(ip->i_itemp != NULL);
+#ifdef XACT_DEBUG
+	fprintf(stderr, "dirtied inode %llu, transaction %p\n", ip->i_ino, tp);
+#endif
+
+	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
+	ASSERT(lidp != NULL);
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	lidp->lid_flags |= XFS_LID_DIRTY;
+
+	/*
+	 * Always OR in the bits from the ili_last_fields field.
+	 * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
+	 * routines in the eventual clearing of the ilf_fields bits.
+	 * See the big comment in xfs_iflush() for an explanation of
+	 * this coordination mechanism.
+	 */
+	flags |= ip->i_itemp->ili_last_fields;
+	ip->i_itemp->ili_format.ilf_fields |= flags;
+}
+
+/*
+ * This is called to mark bytes first through last inclusive of the given
+ * buffer as needing to be logged when the transaction is committed.
+ * The buffer must already be associated with the given transaction.
+ * 
+ * First and last are numbers relative to the beginning of this buffer,
+ * so the first byte in the buffer is numbered 0 regardless of the
+ * value of b_blkno.
+ */
+void
+libxfs_trans_log_buf(xfs_trans_t *tp, xfs_buf_t *bp, uint first, uint last)
+{
+	xfs_buf_log_item_t	*bip;
+	xfs_log_item_desc_t	*lidp;
+
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+	ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
+#ifdef XACT_DEBUG
+	fprintf(stderr, "dirtied buffer %p, transaction %p\n", bp, tp);
+#endif
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+
+	lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
+	ASSERT(lidp != NULL);
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	lidp->lid_flags |= XFS_LID_DIRTY;
+	xfs_buf_item_log(bip, first, last);
+}
+
+void
+libxfs_trans_brelse(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+	xfs_buf_log_item_t	*bip;
+	xfs_log_item_desc_t	*lidp;
+#ifdef XACT_DEBUG
+	fprintf(stderr, "released buffer %p, transaction %p\n", bp, tp);
+#endif
+
+	if (tp == NULL) {
+		ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+		libxfs_putbuf(bp);
+		return;
+	}
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+	ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
+	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
+	ASSERT(lidp != NULL);
+	if (bip->bli_recur > 0) {
+		bip->bli_recur--;
+		return;
+	}
+	/* If dirty, can't release till transaction committed */
+	if (lidp->lid_flags & XFS_LID_DIRTY) {
+		return;
+	}
+	xfs_trans_free_item(tp, lidp);
+	if (bip->bli_flags & XFS_BLI_HOLD) {
+		bip->bli_flags &= ~XFS_BLI_HOLD;
+	}
+	XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+	libxfs_putbuf(bp);
+}
+
+void
+libxfs_trans_binval(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+	xfs_log_item_desc_t	*lidp;
+	xfs_buf_log_item_t	*bip;
+#ifdef XACT_DEBUG
+	fprintf(stderr, "binval'd buffer %p, transaction %p\n", bp, tp);
+#endif
+
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
+	ASSERT(lidp != NULL);
+	bip->bli_flags &= ~(XFS_BLI_DIRTY);
+	bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
+	bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
+	lidp->lid_flags |= XFS_LID_DIRTY;
+	tp->t_flags |= XFS_TRANS_DIRTY;
+}
+
+void
+libxfs_trans_bjoin(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+	xfs_buf_log_item_t	*bip;
+
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+#ifdef XACT_DEBUG
+	fprintf(stderr, "bjoin'd buffer %p, transaction %p\n", bp, tp);
+#endif
+
+	xfs_buf_item_init(bp, tp->t_mountp);
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+	xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
+	XFS_BUF_SET_FSPRIVATE2(bp, tp);
+}
+
+void
+libxfs_trans_bhold(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+	xfs_buf_log_item_t	*bip;
+
+	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+#ifdef XACT_DEBUG
+	fprintf(stderr, "bhold'd buffer %p, transaction %p\n", bp, tp);
+#endif
+
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+	bip->bli_flags |= XFS_BLI_HOLD;
+}
+
+xfs_buf_t *
+libxfs_trans_get_buf(xfs_trans_t *tp, dev_t dev, xfs_daddr_t d, int len, uint f)
+{
+	xfs_buf_t		*bp;
+	xfs_buf_log_item_t	*bip;
+	buftarg_t		bdev = { dev };
+
+	if (tp == NULL)
+		return libxfs_getbuf(dev, d, len);
+
+	if (tp->t_items.lic_next == NULL)
+		bp = xfs_trans_buf_item_match(tp, &bdev, d, len);
+	else
+		bp = xfs_trans_buf_item_match_all(tp, &bdev, d, len);
+	if (bp != NULL) {
+		ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+		ASSERT(bip != NULL);
+		bip->bli_recur++;
+		return bp;
+	}
+
+	bp = libxfs_getbuf(dev, d, len);
+	if (bp == NULL)
+		return NULL;
+#ifdef XACT_DEBUG
+	fprintf(stderr, "trans_get_buf buffer %p, transaction %p\n", bp, tp);
+#endif
+
+	xfs_buf_item_init(bp, tp->t_mountp);
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+	bip->bli_recur = 0;
+	xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
+
+	/* initialize b_fsprivate2 so we can find it incore */
+	XFS_BUF_SET_FSPRIVATE2(bp, tp);
+	return bp;
+}
+
+int
+libxfs_trans_read_buf(xfs_mount_t *mp, xfs_trans_t *tp, dev_t dev,
+			xfs_daddr_t blkno, int len, uint f, xfs_buf_t **bpp)
+{
+	xfs_buf_t		*bp;
+	xfs_buf_log_item_t	*bip;
+	int			error;
+	buftarg_t		bdev = { dev };
+
+	if (tp == NULL) {
+		bp = libxfs_getbuf(mp->m_dev, blkno, len);
+		error = libxfs_readbufr(dev, blkno, bp, len, 0);
+		*bpp = bp;
+		return error;
+	}
+
+	if (tp->t_items.lic_next == NULL)
+		bp = xfs_trans_buf_item_match(tp, &bdev, blkno, len);
+	else
+		bp = xfs_trans_buf_item_match_all(tp, &bdev, blkno, len);
+	if (bp != NULL) {
+		ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+		ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+		bip->bli_recur++;
+		*bpp = bp;
+		return 0;
+	}
+
+	bp = libxfs_getbuf(mp->m_dev, blkno, len);
+	error = libxfs_readbufr(dev, blkno, bp, len, 0);
+	if (error) {
+		*bpp = NULL;
+		return error;
+	}
+#ifdef XACT_DEBUG
+	fprintf(stderr, "trans_read_buf buffer %p, transaction %p\n", bp, tp);
+#endif
+
+	xfs_buf_item_init(bp, tp->t_mountp);
+	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+	bip->bli_recur = 0;
+	xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
+
+	/* initialise b_fsprivate2 so we can find it incore */
+	XFS_BUF_SET_FSPRIVATE2(bp, tp);
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Record the indicated change to the given field for application
+ * to the file system's superblock when the transaction commits.
+ * For now, just store the change in the transaction structure.
+ * Mark the transaction structure to indicate that the superblock
+ * needs to be updated before committing. 
+ *
+ * Originally derived from xfs_trans_mod_sb().
+ */
+void
+libxfs_trans_mod_sb(xfs_trans_t *tp, uint field, long delta)
+{
+	switch (field) {
+	case XFS_TRANS_SB_RES_FDBLOCKS:
+		return;
+	case XFS_TRANS_SB_FDBLOCKS:
+		tp->t_fdblocks_delta += delta;
+		break;
+	case XFS_TRANS_SB_ICOUNT:
+		ASSERT(delta > 0);
+		tp->t_icount_delta += delta;
+		break;
+	case XFS_TRANS_SB_IFREE:
+		tp->t_ifree_delta += delta;
+		break;
+	case XFS_TRANS_SB_FREXTENTS:
+		tp->t_frextents_delta += delta;
+		break;
+	default:
+		ASSERT(0);
+		return;
+	}
+	tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY);
+}
+
+
+/*
+ * Transaction commital code follows (i.e. write to disk in libxfs)
+ */
+
+STATIC void
+inode_item_done(xfs_inode_log_item_t *iip)
+{
+	xfs_dinode_t	*dip;
+	xfs_inode_t	*ip;
+	xfs_mount_t	*mp;
+	xfs_buf_t	*bp;
+	int		hold;
+	int		error;
+	extern xfs_zone_t *xfs_ili_zone;
+
+	ip = iip->ili_inode;
+	mp = iip->ili_item.li_mountp;
+	hold = iip->ili_flags & XFS_ILI_HOLD;
+	ASSERT(ip != NULL);
+
+	if (!(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) {
+		ip->i_transp = NULL;	/* disassociate from transaction */
+		iip->ili_flags = 0;	/* reset all flags */
+		if (!hold)
+			goto ili_done;
+		return;
+	}
+
+	/*
+	 * Get the buffer containing the on-disk inode.
+	 */
+	error = libxfs_itobp(mp, NULL, ip, &dip, &bp, 0);
+	if (error) {
+		fprintf(stderr, "%s: warning - itobp failed (%d)\n",
+			progname, error);
+		goto ili_done;
+	}
+
+	XFS_BUF_SET_FSPRIVATE(bp, iip);
+	error = libxfs_iflush_int(ip, bp);
+	if (error) {
+		fprintf(stderr, "%s: warning - iflush_int failed (%d)\n",
+			progname, error);
+		goto ili_done;
+	}
+
+	ip->i_transp = NULL;	/* disassociate from transaction */
+	XFS_BUF_SET_FSPRIVATE(bp, NULL);	/* remove log item */
+	XFS_BUF_SET_FSPRIVATE2(bp, NULL);	/* remove xact ptr */
+	libxfs_writebuf_int(bp, 0);
+#ifdef XACT_DEBUG
+	fprintf(stderr, "flushing dirty inode %llu, buffer %p (hold=%u)\n",
+			ip->i_ino, bp, hold);
+#endif
+	if (hold) {
+		iip->ili_flags &= ~XFS_ILI_HOLD;
+		return;
+	}
+	else {
+		/*libxfs_iput(iip->ili_inode, 0);	- nathans TODO? */
+		libxfs_putbuf(bp);
+	}
+
+ili_done:
+	if (ip->i_itemp)
+		kmem_zone_free(xfs_ili_zone, ip->i_itemp);
+	else
+		ASSERT(0);
+	ip->i_itemp = NULL;
+}
+
+STATIC void
+buf_item_done(xfs_buf_log_item_t *bip)
+{
+	extern xfs_zone_t *xfs_buf_item_zone;
+	xfs_buf_t	*bp;
+	int		hold;
+
+	bp = bip->bli_buf;
+	ASSERT(bp != NULL);
+	XFS_BUF_SET_FSPRIVATE(bp, NULL);	/* remove log item */
+	XFS_BUF_SET_FSPRIVATE2(bp, NULL);	/* remove xact ptr */
+
+	hold = (bip->bli_flags & XFS_BLI_HOLD);
+	if (bip->bli_flags & XFS_BLI_DIRTY) {
+#ifdef XACT_DEBUG
+		fprintf(stderr, "flushing dirty buffer %p (hold=%d)\n",
+			bp, hold);
+#endif
+		libxfs_writebuf_int(bp, 0);
+		if (hold)
+			bip->bli_flags &= ~XFS_BLI_HOLD;
+		else
+			libxfs_putbuf(bp);
+	}
+	/* release the buf item */
+	kmem_zone_free(xfs_buf_item_zone, bip);
+}
+
+/*
+ * This is called to perform the commit processing for each
+ * item described by the given chunk.
+ */
+static void
+trans_chunk_committed(xfs_log_item_chunk_t *licp)
+{
+	xfs_log_item_desc_t	*lidp;
+	xfs_log_item_t		*lip;
+	int			i;
+
+	lidp = licp->lic_descs;
+	for (i = 0; i < licp->lic_unused; i++, lidp++) {
+		if (XFS_LIC_ISFREE(licp, i))
+			continue;
+		lip = lidp->lid_item;
+		if (lip->li_type == XFS_LI_BUF)
+			buf_item_done((xfs_buf_log_item_t *)lidp->lid_item);
+		else if (lip->li_type == XFS_LI_INODE)
+			inode_item_done((xfs_inode_log_item_t *)lidp->lid_item);
+		else {
+			fprintf(stderr, "%s: unrecognised log item type\n",
+				progname);
+			ASSERT(0);
+		}
+	}
+}
+
+/*
+ * Calls trans_chunk_committed() to process the items in each chunk.
+ */
+static void
+trans_committed(xfs_trans_t *tp)
+{
+	xfs_log_item_chunk_t	*licp;
+	xfs_log_item_chunk_t	*next_licp;
+
+	/*
+	 * Special case the chunk embedded in the transaction.
+	 */
+	licp = &(tp->t_items);
+	if (!(XFS_LIC_ARE_ALL_FREE(licp))) {
+		trans_chunk_committed(licp);
+	}
+
+	/*
+	 * Process the items in each chunk in turn.
+	 */
+	licp = licp->lic_next;
+	while (licp != NULL) {
+		trans_chunk_committed(licp);
+		next_licp = licp->lic_next;
+		kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+		licp = next_licp;
+	}
+}
+
+/*
+ * Unlock each item pointed to by a descriptor in the given chunk.
+ * Free descriptors pointing to items which are not dirty if freeing_chunk
+ * is zero. If freeing_chunk is non-zero, then we need to unlock all
+ * items in the chunk.  Return the number of descriptors freed.
+ * Originally based on xfs_trans_unlock_chunk() - adapted for libxfs
+ * transactions though.
+ */
+int
+xfs_trans_unlock_chunk(
+	xfs_log_item_chunk_t 	*licp,
+	int			freeing_chunk,
+	int			abort,
+	xfs_lsn_t		commit_lsn)	/* nb: unused */
+{
+	xfs_log_item_desc_t	*lidp;
+	xfs_log_item_t		*lip;
+	int			i;
+	int			freed;
+
+	freed = 0;
+	lidp = licp->lic_descs;
+	for (i = 0; i < licp->lic_unused; i++, lidp++) {
+		if (XFS_LIC_ISFREE(licp, i)) {
+			continue;
+		}
+		lip = lidp->lid_item;
+		lip->li_desc = NULL;
+
+		/*
+		 * Disassociate the logged item from this transaction
+		 */
+		if (lip->li_type == XFS_LI_BUF) {
+			xfs_buf_log_item_t	*bip;
+
+			bip = (xfs_buf_log_item_t *)lidp->lid_item;
+			XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
+			bip->bli_flags &= ~XFS_BLI_HOLD;
+		}
+		else if (lip->li_type == XFS_LI_INODE) {
+			xfs_inode_log_item_t	*iip;
+
+			iip = (xfs_inode_log_item_t*)lidp->lid_item;
+			iip->ili_inode->i_transp = NULL;
+			iip->ili_flags &= ~XFS_ILI_HOLD;
+		}
+		else {
+			fprintf(stderr, "%s: unrecognised log item type\n",
+				progname);
+			ASSERT(0);
+		}
+
+		/*
+		 * Free the descriptor if the item is not dirty
+		 * within this transaction and the caller is not
+		 * going to just free the entire thing regardless.
+		 */
+		if (!(freeing_chunk) &&
+		    (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
+			XFS_LIC_RELSE(licp, i);
+			freed++;
+		}
+	}
+
+	return (freed);
+}
+
+
+/*
+ * Commit the changes represented by this transaction
+ */
+int
+libxfs_trans_commit(xfs_trans_t *tp, uint flags, xfs_lsn_t *commit_lsn_p)
+{
+	xfs_sb_t	*sbp;
+	int		error;
+
+	if (tp == NULL)
+		return 0;
+
+	if (!(tp->t_flags & XFS_TRANS_DIRTY)) {
+#ifdef XACT_DEBUG
+		fprintf(stderr, "committed clean transaction %p\n", tp);
+#endif
+		xfs_trans_free_items(tp, flags);
+		free(tp);
+		tp = NULL;
+		return 0;
+	}
+
+	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+		sbp = &(tp->t_mountp->m_sb);
+		if (tp->t_icount_delta)
+			sbp->sb_icount += tp->t_icount_delta;
+		if (tp->t_ifree_delta)
+			sbp->sb_ifree += tp->t_ifree_delta;
+		if (tp->t_fdblocks_delta)
+			sbp->sb_fdblocks += tp->t_fdblocks_delta;
+		if (tp->t_frextents_delta)
+			sbp->sb_frextents += tp->t_frextents_delta;
+		libxfs_mod_sb(tp, XFS_SB_ALL_BITS);
+	}
+
+#ifdef XACT_DEBUG
+	fprintf(stderr, "committing dirty transaction %p\n", tp);
+#endif
+	trans_committed(tp);
+
+	/* That's it for the transaction structure.  Free it. */
+	free(tp);
+	tp = NULL;
+	return 0;
+}
diff --git a/libxfs/util.c b/libxfs/util.c
new file mode 100644
index 000000000..44222c30e
--- /dev/null
+++ b/libxfs/util.c
@@ -0,0 +1,735 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include <time.h>
+
+/*
+ * Wrapper around call to libxfs_ialloc. Takes care of committing and
+ * allocating a new transaction as needed.
+ *
+ * Originally there were two copies of this code - one in mkfs, the
+ * other in repair - now there is just the one.
+ */
+int
+libxfs_inode_alloc(
+	xfs_trans_t     **tp,
+	xfs_inode_t     *pip,
+	mode_t		mode,
+	ushort		nlink,
+	dev_t		rdev,
+	cred_t		*cr,
+	xfs_inode_t	**ipp)
+{
+	boolean_t	call_again;
+	int		i;
+	xfs_buf_t	*ialloc_context;
+	xfs_inode_t	*ip;
+	xfs_trans_t	*ntp;
+	int		error;
+
+	call_again = B_FALSE;
+	ialloc_context = (xfs_buf_t *)0;
+	error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, (xfs_prid_t) 0,
+			   1, &ialloc_context, &call_again, &ip);
+	if (error) {
+		return error;
+	}
+	if (call_again) {
+		xfs_trans_bhold(*tp, ialloc_context);
+		ntp = xfs_trans_dup(*tp);
+		xfs_trans_commit(*tp, 0, NULL);
+		*tp = ntp;
+		if ((i = xfs_trans_reserve(*tp, 0, 0, 0, 0, 0))) {
+			fprintf(stderr, "%s: cannot reserve space: %s\n",
+				progname, strerror(errno));
+			exit(1);
+		}
+		xfs_trans_bjoin(*tp, ialloc_context);
+		error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr,
+				   (xfs_prid_t) 0, 1, &ialloc_context,
+				   &call_again, &ip);
+		if (error) {
+			return error;
+		}
+	}
+	*ipp = ip;
+	ASSERT(ip);
+	return error;
+}
+
+/*
+ * Change the requested timestamp in the given inode.
+ * 
+ * This was once shared with the kernel, but has diverged to the point
+ * where its no longer worth the hassle of maintaining common code.
+ */
+void
+libxfs_ichgtime(xfs_inode_t *ip, int flags)
+{
+	struct timespec	tv;
+	struct timeval	stv;
+
+	gettimeofday(&stv, (struct timezone *)0);
+	tv.tv_sec = stv.tv_sec;
+	tv.tv_nsec = stv.tv_usec * 1000;
+	if (flags & XFS_ICHGTIME_MOD) {
+		ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
+	}
+	if (flags & XFS_ICHGTIME_ACC) {
+		ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
+	}
+	if (flags & XFS_ICHGTIME_CHG) {
+		ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
+		ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
+	}
+}
+
+/*
+ * Allocate an inode on disk and return a copy of it's in-core version.
+ * Set mode, nlink, and rdev appropriately within the inode.
+ * The uid and gid for the inode are set according to the contents of
+ * the given cred structure.
+ *
+ * This was once shared with the kernel, but has diverged to the point
+ * where its no longer worth the hassle of maintaining common code.
+ */
+int
+libxfs_ialloc(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*pip,
+	mode_t		mode,
+	nlink_t		nlink,
+	dev_t		rdev,
+	cred_t		*cr,
+	xfs_prid_t	prid,
+	int		okalloc,
+	xfs_buf_t	**ialloc_context,
+	boolean_t	*call_again,
+	xfs_inode_t	**ipp)
+{
+	xfs_ino_t	ino;
+	xfs_inode_t	*ip;
+	uint		flags;
+	int		error;
+
+	/*
+	 * Call the space management code to pick
+	 * the on-disk inode to be allocated.
+	 */
+	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
+			    ialloc_context, call_again, &ino);
+	if (error != 0)
+		return error;
+	if (*call_again || ino == NULLFSINO) {
+		*ipp = NULL;
+		return 0;
+	}
+	ASSERT(*ialloc_context == NULL);
+
+	error = xfs_trans_iget(tp->t_mountp, tp, ino, 0, &ip);
+	if (error != 0)
+		return error;
+	ASSERT(ip != NULL);
+
+	ip->i_d.di_mode = (__uint16_t)mode;
+	ip->i_d.di_onlink = 0;
+	ip->i_d.di_nlink = nlink;
+	ASSERT(ip->i_d.di_nlink == nlink);
+	ip->i_d.di_uid = cr->cr_uid;
+	ip->i_d.di_gid = cr->cr_gid;
+	ip->i_d.di_projid = prid;
+	bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+
+	/*
+	 * If the superblock version is up to where we support new format
+	 * inodes and this is currently an old format inode, then change
+	 * the inode version number now.  This way we only do the conversion
+	 * here rather than here and in the flush/logging code.
+	 */
+	if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) &&
+	    ip->i_d.di_version == XFS_DINODE_VERSION_1) {
+		ip->i_d.di_version = XFS_DINODE_VERSION_2;
+		/* old link count, projid field, pad field already zeroed */
+        }
+
+	ip->i_d.di_size = 0;
+	ip->i_d.di_nextents = 0;
+	ASSERT(ip->i_d.di_nblocks == 0);
+	xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
+	/*
+	 * di_gen will have been taken care of in xfs_iread.
+	 */
+	ip->i_d.di_extsize = 0;
+	ip->i_d.di_dmevmask = 0;
+	ip->i_d.di_dmstate = 0;
+	ip->i_d.di_flags = 0;
+	flags = XFS_ILOG_CORE;
+	switch (mode & IFMT) {
+	case IFIFO:
+	case IFCHR:
+	case IFBLK:
+	case IFSOCK:
+		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
+		ip->i_df.if_u2.if_rdev = makedev(major(rdev), minor(rdev));			ip->i_df.if_flags = 0;
+		flags |= XFS_ILOG_DEV;
+		break;
+	case IFREG:
+	case IFDIR:
+	case IFLNK:
+		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+		ip->i_df.if_flags = XFS_IFEXTENTS;
+		ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+		ip->i_df.if_u1.if_extents = NULL;
+		break;
+	default:
+		ASSERT(0);
+	}
+	/* Attribute fork settings for new inode. */
+	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+	ip->i_d.di_anextents = 0;
+
+	/*
+	 * Log the new values stuffed into the inode.
+	 */
+	xfs_trans_log_inode(tp, ip, flags);
+	*ipp = ip;
+	return 0;
+}
+
+void
+libxfs_iprint(xfs_inode_t *ip)
+{
+	xfs_dinode_core_t	*dip;
+	xfs_bmbt_rec_t	*ep;
+	xfs_extnum_t	i;
+	xfs_extnum_t	nextents;
+
+	printf("Inode %p\n", ip);
+	printf("    i_dev %x\n", (uint)ip->i_dev);
+	printf("    i_ino %Lx\n", ip->i_ino);
+
+	if (ip->i_df.if_flags & XFS_IFEXTENTS)
+		printf("EXTENTS ");
+	printf("\n");
+	printf("    i_df.if_bytes %d\n", ip->i_df.if_bytes);
+	printf("    i_df.if_u1.if_extents/if_data %p\n", ip->i_df.if_u1.if_extents);
+	if (ip->i_df.if_flags & XFS_IFEXTENTS) {
+		nextents = ip->i_df.if_bytes / (uint)sizeof(*ep);
+		for (ep = ip->i_df.if_u1.if_extents, i = 0; i < nextents; i++, ep++) {
+			xfs_bmbt_irec_t	rec;
+
+			xfs_bmbt_get_all(ep, &rec);
+			printf("\t%d: startoff %Lu, startblock 0x%Lx,"
+			" blockcount %Lu, state %d\n",
+				i, (xfs_dfiloff_t)rec.br_startoff,
+				(xfs_dfsbno_t)rec.br_startblock,
+				(xfs_dfilblks_t)rec.br_blockcount,
+				(int)rec.br_state);
+		}
+	}
+	printf("    i_df.if_broot %p\n", ip->i_df.if_broot);
+	printf("    i_df.if_broot_bytes %x\n", ip->i_df.if_broot_bytes);
+
+	dip = &(ip->i_d);
+	printf("\nOn disk portion\n");
+	printf("    di_magic %x\n", dip->di_magic);
+	printf("    di_mode %o\n", dip->di_mode);
+	printf("    di_version %x\n", (uint)dip->di_version);
+	switch (ip->i_d.di_format) {
+	case XFS_DINODE_FMT_LOCAL:
+		printf("    Inline inode\n");
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		printf("    Extents inode\n");
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		printf("    B-tree inode\n");
+		break;
+	default:
+		printf("    Other inode\n");
+		break;
+	}
+	printf("   di_nlink %x\n", dip->di_nlink);
+	printf("   di_uid %d\n", dip->di_uid);
+	printf("   di_gid %d\n", dip->di_gid);
+	printf("   di_nextents %d\n", dip->di_nextents);
+	printf("   di_size %Ld\n", dip->di_size);
+	printf("   di_gen %x\n", dip->di_gen);
+	printf("   di_extsize %d\n", dip->di_extsize);
+	printf("   di_flags %x\n", dip->di_flags);
+	printf("   di_nblocks %Ld\n", dip->di_nblocks);
+}
+
+/*
+ * Writes a modified inode's changes out to the inode's on disk home.
+ * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
+ */
+int
+libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp)
+{
+	xfs_inode_log_item_t	*iip;
+	xfs_dinode_t		*dip;
+	xfs_mount_t		*mp;
+
+	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+		ip->i_d.di_nextents > ip->i_df.if_ext_max);
+
+	iip = ip->i_itemp;
+	mp = ip->i_mount;
+
+	/* set *dip = inode's place in the buffer */
+	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset);
+
+#ifdef DEBUG
+	ASSERT(ip->i_d.di_magic == XFS_DINODE_MAGIC);
+	if ((ip->i_d.di_mode & IFMT) == IFREG) {
+		ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
+			(ip->i_d.di_format == XFS_DINODE_FMT_BTREE) );
+	}
+	else if ((ip->i_d.di_mode & IFMT) == IFDIR) {
+		ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
+			(ip->i_d.di_format == XFS_DINODE_FMT_BTREE)   ||
+			(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) );
+	}
+	ASSERT(ip->i_d.di_nextents+ip->i_d.di_anextents <= ip->i_d.di_nblocks);
+	ASSERT(ip->i_d.di_forkoff <= mp->m_sb.sb_inodesize);
+#endif
+
+	/*
+	 * Copy the dirty parts of the inode into the on-disk
+	 * inode.  We always copy out the core of the inode,
+	 * because if the inode is dirty at all the core must
+	 * be.
+	 */
+	xfs_xlate_dinode_core((xfs_caddr_t)&(dip->di_core), &(ip->i_d), -1,
+				ARCH_CONVERT);
+	/*
+	 * If this is really an old format inode and the superblock version
+	 * has not been updated to support only new format inodes, then
+	 * convert back to the old inode format.  If the superblock version
+	 * has been updated, then make the conversion permanent.
+	 */
+	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
+		XFS_SB_VERSION_HASNLINK(&mp->m_sb));
+	if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
+		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+			/*
+			 * Convert it back.
+			 */
+			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
+			INT_SET(dip->di_core.di_onlink, ARCH_CONVERT,
+				ip->i_d.di_nlink);
+		} else {
+			/*
+			 * The superblock version has already been bumped,
+			 * so just make the conversion to the new inode
+			 * format permanent.
+			 */
+			ip->i_d.di_version = XFS_DINODE_VERSION_2;
+			INT_SET(dip->di_core.di_version, ARCH_CONVERT,
+				XFS_DINODE_VERSION_2);
+			ip->i_d.di_onlink = 0;
+			INT_ZERO(dip->di_core.di_onlink, ARCH_CONVERT);
+			bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+			bzero(&(dip->di_core.di_pad[0]),
+				sizeof(dip->di_core.di_pad));
+			ASSERT(ip->i_d.di_projid == 0);
+		}
+	}
+
+	if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED)
+		return EFSCORRUPTED;
+	if (XFS_IFORK_Q(ip)) {
+		/* The only error from xfs_iflush_fork is on the data fork. */
+		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
+	}
+
+	return 0;
+}
+
+/*
+ * Given a block number in a fork, return the next valid block number
+ * (not a hole).
+ * If this is the last block number then NULLFILEOFF is returned.
+ *
+ * This was originally in the kernel, but only used in xfs_repair.
+ */
+int
+libxfs_bmap_next_offset(
+	xfs_trans_t	*tp,			/* transaction pointer */
+	xfs_inode_t	*ip,			/* incore inode */
+	xfs_fileoff_t	*bnop,			/* current block */
+	int		whichfork)		/* data or attr fork */
+{
+	xfs_fileoff_t	bno;			/* current block */
+	int		eof;			/* hit end of file */
+	int		error;			/* error return value */
+	xfs_bmbt_irec_t	got;			/* current extent value */
+	xfs_ifork_t	*ifp;			/* inode fork pointer */
+	xfs_extnum_t	lastx;			/* last extent used */
+	xfs_bmbt_irec_t	prev;			/* previous extent value */
+
+	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+	       return XFS_ERROR(EIO);
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+		*bnop = NULLFILEOFF;
+		return 0;
+	}
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+	    (error = xfs_iread_extents(tp, ip, whichfork)))
+		return error;
+	bno = *bnop + 1;
+	xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
+	if (eof)
+		*bnop = NULLFILEOFF;
+	else
+		*bnop = got.br_startoff < bno ? bno : got.br_startoff;
+	return 0;
+}
+
+/*
+ * Like xfs_dir_removename, but only for removing entries with
+ * (name, hashvalue) pairs that may not be consistent (hashvalue
+ * may not be correctly set for the name).
+ * 
+ * This was originally in the kernel, but only used in xfs_repair.
+ */
+int
+xfs_dir_bogus_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
+		xfs_fsblock_t *firstblock, xfs_bmap_free_t *flist,
+		xfs_extlen_t total, xfs_dahash_t hashval, int namelen)
+{
+	xfs_da_args_t args;
+	int count, totallen, newsize, retval;
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	if (namelen >= MAXNAMELEN) {
+		return EINVAL;
+	}
+
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = hashval;
+	args.inumber = 0;
+	args.dp = dp;
+	args.firstblock = firstblock;
+	args.flist = flist;
+	args.total = total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = trans;
+	args.justcheck = args.addname = 0;
+	args.oknoent = 1;
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+		retval = xfs_dir_shortform_removename(&args);
+	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+		retval = xfs_dir_leaf_removename(&args, &count, &totallen);
+		if (retval == 0) {
+			newsize = XFS_DIR_SF_ALLFIT(count, totallen);
+			if (newsize <= XFS_IFORK_DSIZE(dp)) {
+				retval = xfs_dir_leaf_to_shortform(&args);
+			}
+		}
+	} else {
+		retval = xfs_dir_node_removename(&args);
+	}
+	return(retval);
+}
+
+/*
+ * Like xfs_dir_removename, but only for removing entries with
+ * (name, hashvalue) pairs that may not be consistent (hashvalue
+ * may not be correctly set for the name).
+ * 
+ * This was originally in the kernel, but only used in xfs_repair.
+ */
+int
+xfs_dir2_bogus_removename(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*dp,		/* incore directory inode */
+	char		*name,		/* name of entry to remove */
+	xfs_fsblock_t	*first,		/* bmap's firstblock */
+	xfs_bmap_free_t	*flist,		/* bmap's freeblock list */
+	xfs_extlen_t	total,		/* bmap's total block count */
+	xfs_dahash_t	hash,		/* name's real hash value */
+	int		namelen)	/* entry's name length */
+{
+	xfs_da_args_t	args;		/* operation arguments */
+	int		rval;		/* return value */
+	int		v;		/* type-checking value */
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	if (namelen >= MAXNAMELEN)
+		return EINVAL;
+
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = hash;
+	args.inumber = 0;
+	args.dp = dp;
+	args.firstblock = first;
+	args.flist = flist;
+	args.total = total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = tp;
+	args.justcheck = args.addname = 0;
+	args.oknoent = 1;
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+		rval = xfs_dir2_sf_removename(&args);
+	else if (rval = xfs_dir2_isblock(tp, dp, &v))
+		return rval;
+	else if (v)
+		rval = xfs_dir2_block_removename(&args);
+	else if (rval = xfs_dir2_isleaf(tp, dp, &v))
+		return rval;
+	else if (v)
+		rval = xfs_dir2_leaf_removename(&args);
+	else
+		rval = xfs_dir2_node_removename(&args);
+	return rval;
+}
+
+/*
+ * Utility routine common used to apply a delta to a field in the
+ * in-core superblock.
+ * Switch on the field indicated and apply the delta to that field.
+ * Fields are not allowed to dip below zero, so if the delta would
+ * do this do not apply it and return EINVAL.
+ *
+ * Originally derived from xfs_mod_incore_sb().
+ */
+int
+libxfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
+{
+	long long	lcounter;	/* long counter for 64 bit fields */
+
+	switch (field) {
+	case XFS_SBS_FDBLOCKS:
+		lcounter = (long long)mp->m_sb.sb_fdblocks;
+		lcounter += delta;
+		if (lcounter < 0)
+			return (XFS_ERROR(ENOSPC));
+		mp->m_sb.sb_fdblocks = lcounter;
+		break;
+	default:
+		ASSERT(0);
+	}
+	return 0;
+}
+
+int
+libxfs_bmap_finish(
+	xfs_trans_t	**tp,
+	xfs_bmap_free_t	*flist,
+	xfs_fsblock_t	firstblock,
+	int		*committed)
+{
+	xfs_bmap_free_item_t	*free;	/* free extent list item */
+	xfs_bmap_free_item_t	*next;	/* next item on free list */
+	int			error;
+	xfs_trans_t		*ntp;
+
+	if (flist->xbf_count == 0) {
+		*committed = 0;
+		return 0;
+	}
+
+	for (free = flist->xbf_first; free != NULL; free = next) {
+		next = free->xbfi_next;
+		if (error = xfs_free_extent(*tp, free->xbfi_startblock,
+				free->xbfi_blockcount))
+			return error;
+		xfs_bmap_del_free(flist, NULL, free);
+	}
+	return 0;
+}
+
+/*
+ * This routine allocates disk space for the given file.
+ * Originally derived from xfs_alloc_file_space().
+ */
+int
+libxfs_alloc_file_space(
+	xfs_inode_t	*ip,
+	xfs_off_t	offset,
+	xfs_off_t	len,
+	int		alloc_type,
+	int		attr_flags)
+{
+	xfs_mount_t	*mp;
+	xfs_off_t	count;
+	xfs_filblks_t	datablocks;
+	xfs_filblks_t	allocated_fsb;
+	xfs_filblks_t	allocatesize_fsb;
+	xfs_fsblock_t	firstfsb;
+	xfs_bmap_free_t	free_list;
+	xfs_bmbt_irec_t	*imapp;
+	xfs_bmbt_irec_t	imaps[1];
+	int		reccount;
+	uint		resblks;
+	xfs_fileoff_t	startoffset_fsb;
+	xfs_trans_t	*tp;
+	int		xfs_bmapi_flags;
+	int		committed;
+	int		error;
+
+	if (len <= 0)
+		return EINVAL;
+
+	count = len;
+	error = 0;
+	imapp = &imaps[0];
+	reccount = 1;
+	xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+	mp = ip->i_mount;
+	startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
+	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
+
+	/* allocate file space until done or until there is an error */
+	while (allocatesize_fsb && !error) {
+		datablocks = allocatesize_fsb;
+
+		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+		resblks = (uint)XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
+		error = xfs_trans_reserve(tp, resblks, 0, 0, 0, 0);
+		if (error)
+			break;
+		xfs_trans_ijoin(tp, ip, 0);
+		xfs_trans_ihold(tp, ip);
+
+		XFS_BMAP_INIT(&free_list, &firstfsb);
+		error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb,
+				xfs_bmapi_flags, &firstfsb, 0, imapp,
+				&reccount, &free_list);
+		if (error)
+			break;
+
+		/* complete the transaction */
+		error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
+		if (error)
+			break;
+
+		error = xfs_trans_commit(tp, 0, NULL);
+		if (error)
+			break;
+
+		allocated_fsb = imapp->br_blockcount;
+		if (reccount == 0)
+			return ENOSPC;
+
+		startoffset_fsb += allocated_fsb;
+		allocatesize_fsb -= allocated_fsb;
+	}
+	return error;
+}
+
+unsigned int
+libxfs_log2_roundup(unsigned int i)
+{
+	unsigned int	rval;
+
+	for (rval = 0; rval < NBBY * sizeof(i); rval++) {
+		if ((1 << rval) >= i)
+			break;
+	}
+	return rval;
+}
+
+/*
+ * Get a buffer for the dir/attr block, fill in the contents.
+ * Don't check magic number, the caller will (it's xfs_repair).
+ * 
+ * Originally from xfs_da_btree.c in the kernel, but only used
+ * in userspace so it now resides here.
+ */
+int
+libxfs_da_read_bufr(
+	xfs_trans_t	*trans,
+	xfs_inode_t	*dp,
+	xfs_dablk_t	bno,
+	xfs_daddr_t		mappedbno,
+	xfs_dabuf_t	**bpp,
+	int		whichfork)
+{
+	return libxfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 2,
+		(inst_t *)__return_address);
+}
+
+/*
+ * Hold dabuf at transaction commit.
+ * 
+ * Originally from xfs_da_btree.c in the kernel, but only used
+ * in userspace so it now resides here.
+ */
+void
+libxfs_da_bhold(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
+{
+	int	i;
+
+	for (i = 0; i < dabuf->nbuf; i++)
+		xfs_trans_bhold(tp, dabuf->bps[i]);
+}
+
+/*
+ * Join dabuf to transaction.
+ * 
+ * Originally from xfs_da_btree.c in the kernel, but only used
+ * in userspace so it now resides here.
+ */
+void
+libxfs_da_bjoin(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
+{
+	int	i;
+
+	for (i = 0; i < dabuf->nbuf; i++)
+		xfs_trans_bjoin(tp, dabuf->bps[i]);
+}
diff --git a/libxfs/xfs.h b/libxfs/xfs.h
new file mode 100644
index 000000000..d702a385e
--- /dev/null
+++ b/libxfs/xfs.h
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * This header is effectively a "namespace multiplexor" for the
+ * user level XFS code.  It provides all of the necessary stuff
+ * such that we can build some parts of the XFS kernel code in
+ * user space in a controlled fashion, and translates the names
+ * used in the kernel into the names which libxfs is going to
+ * make available to user tools.
+ *
+ * It should only ever be #include'd by XFS "kernel" code being
+ * compiled in user space.
+ * 
+ * Our goals here are to...
+ *      o  "share" large amounts of complex code between user and
+ *         kernel space;
+ *      o  shield the user tools from changes in the bleeding
+ *         edge kernel code, merging source changes when
+ *         convenient and not immediately (no symlinks);
+ *      o  i.e. be able to merge changes to the kernel source back
+ *         into the affected user tools in a controlled fashion;
+ *      o  provide a _minimalist_ life-support system for kernel
+ *         code in user land, not the "everything + the kitchen
+ *         sink" model which libsim had mutated into;
+ *      o  allow the kernel code to be completely free of code
+ *         specifically there to support the user level build.
+ */
+
+#include <libxfs.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <attributes.h>
+
+/*
+ * Map XFS kernel routine names to libxfs.h names
+ */
+
+#define xfs_xlatesb			libxfs_xlate_sb
+#define xfs_xlate_dinode_core		libxfs_xlate_dinode_core
+#define xfs_bmbt_get_all                libxfs_bmbt_get_all
+#define xfs_bmbt_get_blockcount         libxfs_bmbt_get_blockcount
+#define xfs_bmbt_get_startoff           libxfs_bmbt_get_startoff
+#define xfs_da_hashname                 libxfs_da_hashname
+#define xfs_da_log2_roundup             libxfs_da_log2_roundup
+#define xfs_highbit32                   libxfs_highbit32
+#define xfs_highbit64                   libxfs_highbit64
+#define xfs_attr_leaf_newentsize        libxfs_attr_leaf_newentsize
+#define xfs_alloc_compute_maxlevels     libxfs_alloc_compute_maxlevels
+#define xfs_bmap_compute_maxlevels      libxfs_bmap_compute_maxlevels
+#define xfs_ialloc_compute_maxlevels    libxfs_ialloc_compute_maxlevels
+
+#define xfs_dir_init			libxfs_dir_init
+#define xfs_dir2_init			libxfs_dir2_init
+#define xfs_dir_mount                   libxfs_dir_mount
+#define xfs_dir2_mount                  libxfs_dir2_mount
+#define xfs_dir_createname		libxfs_dir_createname
+#define xfs_dir2_createname		libxfs_dir2_createname
+#define xfs_dir_lookup			libxfs_dir_lookup
+#define xfs_dir2_lookup			libxfs_dir2_lookup
+#define xfs_dir_replace			libxfs_dir_replace
+#define xfs_dir2_replace		libxfs_dir2_replace
+#define xfs_dir_removename		libxfs_dir_removename
+#define xfs_dir2_removename		libxfs_dir2_removename
+#define xfs_dir_bogus_removename	libxfs_dir_bogus_removename
+#define xfs_dir2_bogus_removename	libxfs_dir2_bogus_removename
+
+#define xfs_mount_common                libxfs_mount_common
+#define xfs_rtmount_init                libxfs_rtmount_init
+#define xfs_alloc_fix_freelist		libxfs_alloc_fix_freelist
+#define xfs_iread			libxfs_iread
+#define xfs_ialloc			libxfs_ialloc
+#define xfs_idata_realloc		libxfs_idata_realloc
+#define xfs_itobp			libxfs_itobp
+#define xfs_ichgtime			libxfs_ichgtime
+#define xfs_bmapi			libxfs_bmapi
+#define xfs_bmap_finish			libxfs_bmap_finish
+#define xfs_bmap_del_free		libxfs_bmap_del_free
+#define xfs_bunmapi			libxfs_bunmapi
+#define xfs_free_extent			libxfs_free_extent
+#define xfs_rtfree_extent		libxfs_rtfree_extent
+#define xfs_mod_sb			libxfs_mod_sb
+#define xfs_mod_incore_sb		libxfs_mod_incore_sb
+
+#define xfs_trans_init                  libxfs_trans_init
+#define xfs_trans_dup			libxfs_trans_dup
+#define xfs_trans_iget			libxfs_trans_iget
+#define xfs_trans_ijoin			libxfs_trans_ijoin
+#define xfs_trans_ihold			libxfs_trans_ihold
+#define xfs_trans_bjoin			libxfs_trans_bjoin
+#define xfs_trans_bhold			libxfs_trans_bhold
+#define xfs_trans_alloc			libxfs_trans_alloc
+#define xfs_trans_commit		libxfs_trans_commit
+#define xfs_trans_mod_sb		libxfs_trans_mod_sb
+#define xfs_trans_reserve		libxfs_trans_reserve
+#define xfs_trans_get_buf		libxfs_trans_get_buf
+#define xfs_trans_log_buf		libxfs_trans_log_buf
+#define xfs_trans_read_buf		libxfs_trans_read_buf
+#define xfs_trans_log_inode		libxfs_trans_log_inode
+#define xfs_trans_inode_alloc_buf	libxfs_trans_inode_alloc_buf
+#define xfs_trans_brelse		libxfs_trans_brelse
+#define xfs_trans_binval		libxfs_trans_binval
+
+#define xfs_da_shrink_inode		libxfs_da_shrink_inode
+#define xfs_da_grow_inode		libxfs_da_grow_inode
+#define xfs_da_brelse			libxfs_da_brelse
+#define xfs_da_read_buf			libxfs_da_read_buf		
+#define xfs_da_get_buf			libxfs_da_get_buf
+#define xfs_da_log_buf			libxfs_da_log_buf
+#define xfs_da_do_buf			libxfs_da_do_buf
+#define xfs_dir2_shrink_inode		libxfs_dir2_shrink_inode
+#define xfs_dir2_grow_inode		libxfs_dir2_grow_inode
+#define xfs_dir2_isleaf			libxfs_dir2_isleaf
+#define xfs_dir2_isblock		libxfs_dir2_isblock
+#define xfs_dir2_data_use_free		libxfs_dir2_data_use_free
+#define xfs_dir2_data_make_free		libxfs_dir2_data_make_free
+#define xfs_dir2_data_log_entry		libxfs_dir2_data_log_entry
+#define xfs_dir2_data_log_header	libxfs_dir2_data_log_header
+#define xfs_dir2_data_freescan		libxfs_dir2_data_freescan
+#define xfs_dir2_free_log_bests		libxfs_dir2_free_log_bests
+
+
+/*
+ * Infrastructure to support building kernel XFS code in user space
+ */
+
+/* buffer management */
+#define XFS_BUF_LOCK			0
+#define XFS_BUF_MAPPED			0
+#define XFS_BUF_TRYLOCK			0
+#define XFS_BUF_ISDONE(bp)		0
+#define XFS_BUF_GETERROR(bp)		0
+#define XFS_BUF_DONE(bp)		((void) 0)
+#define XFS_BUF_SET_REF(a,b)		((void) 0)
+#define XFS_BUF_SET_VTYPE(a,b)		((void) 0)
+#define XFS_BUF_SET_VTYPE_REF(a,b,c)	((void) 0)
+#define XFS_BUF_SET_BDSTRAT_FUNC(a,b)	((void) 0)
+#define xfs_baread(a,b,c)		((void) 0)	/* no readahead */
+#define xfs_buftrace(x,y)		((void) 0)	/* debug only */
+#define xfs_buf_item_log_debug(bip,a,b)	((void) 0)	/* debug only */
+#define xfs_validate_extents(e,n,f)	((void) 0)	/* debug only */
+#define xfs_buf_relse(bp)		libxfs_putbuf(bp)
+#define xfs_read_buf(mp,x,blkno,len,f,bpp)	\
+	( *(bpp) = libxfs_readbuf( (mp)->m_dev, (blkno), (len), 1), 0 )
+
+
+/* transaction management */
+#define xfs_trans_set_sync(tp)			((void) 0)
+#define xfs_trans_agblocks_delta(tp, d)		((void) 0)	/* debug only */
+#define xfs_trans_agflist_delta(tp, d)		((void) 0)	/* debug only */
+#define xfs_trans_agbtree_delta(tp, d)		((void) 0)	/* debug only */
+#define xfs_trans_mod_dquot_byino(tp,ip,f,d)	((void) 0)
+#define xfs_trans_get_block_res(tp)		1
+#define xfs_trans_reserve_blkquota(tp,i,n)	0
+#define xfs_trans_unreserve_blkquota(tp,i,n)	((void) 0)
+#define xfs_trans_unreserve_rtblkquota(tp,i,n)	((void) 0)
+
+
+/* memory management */
+#define kmem_zone_init(a, b)	libxfs_zone_init(a, b)
+#define kmem_zone_alloc(z, f)	libxfs_zone_zalloc(z)
+#define kmem_zone_zalloc(z, f)	libxfs_zone_zalloc(z)
+#define kmem_zone_free(z, p)	libxfs_zone_free(z, p)
+#define kmem_realloc(p,sz,u,f)	libxfs_realloc(p,sz)
+#define kmem_alloc(size, f)	libxfs_malloc(size)
+#define kmem_free(p, size)	libxfs_free(p)
+
+/* directory management */
+#define xfs_dir2_trace_args(where, args)		((void) 0)
+#define xfs_dir2_trace_args_b(where, args, bp)		((void) 0)
+#define xfs_dir2_trace_args_bb(where, args, lbp, dbp)	((void) 0)
+#define xfs_dir2_trace_args_bibii(where, args, bs, ss, bd, sd, c) ((void) 0)
+#define xfs_dir2_trace_args_db(where, args, db, bp)	((void) 0)
+#define xfs_dir2_trace_args_i(where, args, i)		((void) 0)
+#define xfs_dir2_trace_args_s(where, args, s)		((void) 0)
+#define xfs_dir2_trace_args_sb(where, args, s, bp)	((void) 0)
+#define xfs_dir_shortform_validate_ondisk(a,b)		((void) 0)
+
+
+/* block management */
+#define xfs_bmap_check_extents(ip,w)			((void) 0)
+#define xfs_bmap_trace_delete(f,d,ip,i,c,w)		((void) 0)
+#define xfs_bmap_trace_exlist(f,ip,i,w)			((void) 0)
+#define xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w)	((void) 0)
+#define xfs_bmap_trace_post_update(f,d,ip,i,w)		((void) 0)
+#define xfs_bmap_trace_pre_update(f,d,ip,i,w)		((void) 0)
+#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	((void) 0)
+#define xfs_bunmap_trace(ip, bno, len, flags, ra)	((void) 0)
+#define XFS_BMBT_TRACE_ARGBI(c,b,i)			((void) 0)
+#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)			((void) 0)
+#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)		((void) 0)
+#define XFS_BMBT_TRACE_ARGI(c,i)			((void) 0)
+#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k)			((void) 0)
+#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)			((void) 0)
+#define XFS_BMBT_TRACE_ARGIK(c,i,k)			((void) 0)
+#define XFS_BMBT_TRACE_CURSOR(c,s)			((void) 0)
+
+
+/* anything else */
+typedef __uint32_t inst_t;	/* an instruction */
+typedef enum { B_FALSE, B_TRUE } boolean_t;
+typedef struct { dev_t dev; } buftarg_t;
+#define STATIC
+#define ENOATTR		1009	/* Attribute not found */
+#define EFSCORRUPTED	1010	/* Filesystem is corrupted */
+#define ktrace_t	void
+#define m_ddev_targp	m_dev
+#define KERN_WARNING
+#define XFS_ERROR(e)	(e)
+#define xfs_fs_cmn_err(a,b,msg,args...)	( fprintf(stderr, msg, ## args) )
+#define printk(msg,args...)		( fprintf(stderr, msg, ## args) )
+#define XFS_TEST_ERROR(expr,a,b,c)	( expr )
+#define TRACE_FREE(s,a,b,x,f)		((void) 0)
+#define TRACE_ALLOC(s,a)		((void) 0)
+#define TRACE_MODAGF(a,b,c)		((void) 0)
+#define XFS_FORCED_SHUTDOWN(mp)		0
+#define XFS_MOUNT_WSYNC			0
+#define XFS_MOUNT_NOALIGN		0
+#define XFS_ILOCK_EXCL			0
+#define mrlock(a,b,c)			((void) 0)
+#define mraccunlock(a)			((void) 0)
+#define mrunlock(a)			((void) 0)
+#define mraccess(a)			((void) 0)
+#define ismrlocked(a,b)			1
+#define ovbcopy(from,to,count)		memmove(to,from,count)
+#define __return_address		__builtin_return_address(0)
+#define xfs_btree_reada_bufl(m,fsb,c)	((void) 0)
+#define xfs_btree_reada_bufs(m,fsb,c,x)	((void) 0)
+#undef  XFS_DIR_SHORTFORM_VALIDATE_ONDISK
+#define XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip) 0
+
+#define do_mod(a, b)	((a) % (b))
+#define do_div(n,base)	({ \
+	int __res; \
+	__res = ((unsigned long) n) % (unsigned) base; \
+	n = ((unsigned long) n) / (unsigned) base; \
+	__res; })
+
+#include <asm/page.h>
+#define NBPP	PAGE_SIZE
+
+static inline int atomicIncWithWrap(int *a, int b)
+{
+	int r = *a;
+	(*a)++;
+	if (*a == b)
+		*a = 0;
+	return r;
+}
+
+
+/*
+ * Prototypes needed for a clean build
+ */
+
+/* xfs_alloc.c */
+int  xfs_alloc_get_freelist (xfs_trans_t *, xfs_buf_t *, xfs_agblock_t *);
+void xfs_alloc_log_agf (xfs_trans_t *, xfs_buf_t *, int);
+int  xfs_alloc_put_freelist (xfs_trans_t *, xfs_buf_t *, xfs_buf_t *,
+			xfs_agblock_t);
+int  xfs_alloc_read_agf (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t,
+			int, xfs_buf_t **);
+int  xfs_alloc_vextent (xfs_alloc_arg_t *);
+int  xfs_alloc_pagf_init (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t, int);
+int  xfs_alloc_ag_vextent_size (xfs_alloc_arg_t *);
+int  xfs_alloc_ag_vextent_near (xfs_alloc_arg_t *);
+int  xfs_alloc_ag_vextent_exact (xfs_alloc_arg_t *);
+int  xfs_alloc_ag_vextent_small (xfs_alloc_arg_t *, xfs_btree_cur_t *,
+			xfs_agblock_t *, xfs_extlen_t *, int *);
+
+/* xfs_ialloc.c */
+int  xfs_dialloc (xfs_trans_t *, xfs_ino_t, mode_t, int, xfs_buf_t **,
+			boolean_t *, xfs_ino_t *);
+void xfs_ialloc_log_agi (xfs_trans_t *, xfs_buf_t *, int);
+int  xfs_ialloc_read_agi (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t,
+			xfs_buf_t **);
+int  xfs_dilocate (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, xfs_fsblock_t *,
+			int *, int *, uint);
+
+/* xfs_rtalloc.c */
+int  xfs_rtfree_extent (xfs_trans_t *, xfs_rtblock_t, xfs_extlen_t);
+int  xfs_rtmodify_range (xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t,
+			xfs_extlen_t, int);
+int  xfs_rtmodify_summary (xfs_mount_t *, xfs_trans_t *, int,
+			xfs_rtblock_t, int, xfs_buf_t **, xfs_fsblock_t *);
+
+/* xfs_btree.c */
+extern xfs_zone_t *xfs_btree_cur_zone;
+void xfs_btree_check_key (xfs_btnum_t, void *, void *);
+void xfs_btree_check_rec (xfs_btnum_t, void *, void *);
+int  xfs_btree_check_lblock (xfs_btree_cur_t *, xfs_btree_lblock_t *,
+			int, xfs_buf_t *);
+int  xfs_btree_check_sblock (xfs_btree_cur_t *, xfs_btree_sblock_t *,
+			int, xfs_buf_t *);
+int  xfs_btree_check_sptr (xfs_btree_cur_t *, xfs_agblock_t, int);
+int  xfs_btree_check_lptr (xfs_btree_cur_t *, xfs_dfsbno_t, int);
+void xfs_btree_del_cursor (xfs_btree_cur_t *, int);
+int  xfs_btree_dup_cursor (xfs_btree_cur_t *, xfs_btree_cur_t **);
+int  xfs_btree_firstrec (xfs_btree_cur_t *, int);
+xfs_btree_block_t *xfs_btree_get_block (xfs_btree_cur_t *, int, xfs_buf_t **);
+xfs_buf_t *xfs_btree_get_bufs (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t,
+			xfs_agblock_t, uint);
+xfs_buf_t *xfs_btree_get_bufl (xfs_mount_t *, xfs_trans_t *tp,
+			xfs_fsblock_t, uint);
+xfs_btree_cur_t *xfs_btree_init_cursor (xfs_mount_t *, xfs_trans_t *,
+			xfs_buf_t *, xfs_agnumber_t, xfs_btnum_t,
+			xfs_inode_t *, int);
+int  xfs_btree_islastblock (xfs_btree_cur_t *, int);
+int  xfs_btree_lastrec (xfs_btree_cur_t *, int);
+void xfs_btree_offsets (__int64_t, const short *, int, int *, int *);
+int  xfs_btree_readahead (xfs_btree_cur_t *, int, int);
+void xfs_btree_setbuf (xfs_btree_cur_t *, int, xfs_buf_t *);
+int  xfs_btree_read_bufs (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t,
+			xfs_agblock_t, uint, xfs_buf_t **, int);
+int  xfs_btree_read_bufl (xfs_mount_t *, xfs_trans_t *, xfs_fsblock_t,
+			uint, xfs_buf_t **, int);
+
+/* xfs_inode.c */
+int  xfs_ialloc (xfs_trans_t *, xfs_inode_t *, mode_t, nlink_t, dev_t, cred_t *,
+		xfs_prid_t, int, xfs_buf_t **, boolean_t *, xfs_inode_t **);
+int  xfs_iread_extents (xfs_trans_t *, xfs_inode_t *, int);
+int  xfs_imap (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, xfs_imap_t *, uint);
+int  xfs_iextents_copy (xfs_inode_t *, xfs_bmbt_rec_32_t *, int);
+int  xfs_iflush_int (xfs_inode_t *, xfs_buf_t *);
+int  xfs_iflush_fork (xfs_inode_t *, xfs_dinode_t *, xfs_inode_log_item_t *,
+		int, xfs_buf_t *);
+int  xfs_iformat_local (xfs_inode_t *, xfs_dinode_t *, int, int);
+int  xfs_iformat_extents (xfs_inode_t *, xfs_dinode_t *, int);
+int  xfs_iformat_btree (xfs_inode_t *, xfs_dinode_t *, int);
+void xfs_iroot_realloc (xfs_inode_t *, int, int);
+void xfs_idata_realloc (xfs_inode_t *, int, int);
+void xfs_iext_realloc (xfs_inode_t *, int, int);
+void xfs_idestroy_fork (xfs_inode_t *, int);
+uint xfs_iroundup (uint);
+
+/* xfs_bmap.c */
+xfs_bmbt_rec_t *xfs_bmap_search_extents (xfs_inode_t *ip,
+			xfs_fileoff_t, int, int *, xfs_extnum_t *,
+			xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
+int  xfs_bmap_read_extents (xfs_trans_t *, xfs_inode_t *, int);
+void xfs_bmap_add_free (xfs_fsblock_t, xfs_filblks_t, xfs_bmap_free_t *,
+			xfs_mount_t *);
+int  xfs_bmap_first_unused (xfs_trans_t *, xfs_inode_t *, xfs_extlen_t,
+			xfs_fileoff_t *, int);
+int  xfs_bmap_last_offset (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t *, int);
+int  xfs_bmap_last_before (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t *, int);
+int  xfs_bmap_one_block (xfs_inode_t *, int);
+int  xfs_bmapi_single (xfs_trans_t *, xfs_inode_t *, int, xfs_fsblock_t *,
+			xfs_fileoff_t);
+int  xfs_bmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t,
+			xfs_filblks_t, int, xfs_fsblock_t *, xfs_extlen_t,
+			xfs_bmbt_irec_t *, int *, xfs_bmap_free_t *);
+int  xfs_bunmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t,
+			xfs_filblks_t, int, xfs_extnum_t, xfs_fsblock_t *,
+			xfs_bmap_free_t *, int *);
+int  xfs_bmap_add_extent_hole_delay (xfs_inode_t *ip, xfs_extnum_t,
+			xfs_btree_cur_t *, xfs_bmbt_irec_t *, int *, int);
+int  xfs_bmap_add_extent_hole_real (xfs_inode_t *, xfs_extnum_t,
+			xfs_btree_cur_t *, xfs_bmbt_irec_t *, int *, int);
+int  xfs_bmap_add_extent_unwritten_real (xfs_inode_t *, xfs_extnum_t,
+			xfs_btree_cur_t **, xfs_bmbt_irec_t *, int *);
+int  xfs_bmap_add_extent_delay_real (xfs_inode_t *, xfs_extnum_t,
+			xfs_btree_cur_t **, xfs_bmbt_irec_t *, xfs_filblks_t *,
+			xfs_fsblock_t *, xfs_bmap_free_t *, int *, int);
+int  xfs_bmap_extents_to_btree (xfs_trans_t *, xfs_inode_t *, xfs_fsblock_t *,
+			xfs_bmap_free_t *, xfs_btree_cur_t **, int, int *, int);
+void xfs_bmap_delete_exlist (xfs_inode_t *, xfs_extnum_t, xfs_extnum_t, int);
+xfs_filblks_t xfs_bmap_worst_indlen (xfs_inode_t *, xfs_filblks_t);
+int  xfs_bmap_isaeof (xfs_inode_t *, xfs_fileoff_t, int, int *);
+void xfs_bmap_insert_exlist (xfs_inode_t *, xfs_extnum_t, xfs_extnum_t,
+			xfs_bmbt_irec_t *, int);
+
+/* xfs_bmap_btree.c */
+int  xfs_check_nostate_extents (xfs_bmbt_rec_t *, xfs_extnum_t);
+void xfs_bmbt_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_bmbt_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+int  xfs_bmbt_killroot (xfs_btree_cur_t *, int);
+int  xfs_bmbt_updkey (xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
+int  xfs_bmbt_lshift (xfs_btree_cur_t *, int, int *);
+int  xfs_bmbt_rshift (xfs_btree_cur_t *, int, int *);
+int  xfs_bmbt_split (xfs_btree_cur_t *, int, xfs_fsblock_t *,
+			xfs_bmbt_key_t *, xfs_btree_cur_t **, int *);
+
+/* xfs_ialloc_btree.c */
+int  xfs_inobt_newroot (xfs_btree_cur_t *, int *);
+int  xfs_inobt_rshift (xfs_btree_cur_t *, int, int *);
+int  xfs_inobt_lshift (xfs_btree_cur_t *, int, int *);
+int  xfs_inobt_split (xfs_btree_cur_t *, int, xfs_agblock_t *,
+			xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
+void xfs_inobt_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_inobt_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_inobt_log_recs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_inobt_log_block (xfs_trans_t *, xfs_buf_t *, int);
+int  xfs_inobt_updkey (xfs_btree_cur_t *, xfs_inobt_key_t *, int);
+
+/* xfs_alloc_btree.c */
+void xfs_alloc_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_alloc_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_alloc_log_recs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_alloc_log_block (xfs_trans_t *, xfs_buf_t *, int);
+int  xfs_alloc_updkey (xfs_btree_cur_t *, xfs_alloc_key_t *, int);
+int  xfs_alloc_lshift (xfs_btree_cur_t *, int, int *);
+int  xfs_alloc_rshift (xfs_btree_cur_t *, int, int *);
+int  xfs_alloc_newroot (xfs_btree_cur_t *, int *);
+int  xfs_alloc_split (xfs_btree_cur_t *, int, xfs_agblock_t *,
+			xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
+
+/* xfs_da_btree.c */
+xfs_dabuf_t *xfs_da_buf_make (int, xfs_buf_t **, inst_t *);
+int  xfs_da_root_join (xfs_da_state_t *, xfs_da_state_blk_t *);
+int  xfs_da_root_split (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *);
+void xfs_da_node_add (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *);
+int  xfs_da_node_split (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *, xfs_da_state_blk_t *, int, int *);
+void xfs_da_node_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *);
+void xfs_da_node_remove (xfs_da_state_t *, xfs_da_state_blk_t *);
+void xfs_da_node_unbalance (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *);
+int  xfs_da_node_order (xfs_dabuf_t *, xfs_dabuf_t *);
+int  xfs_da_node_toosmall (xfs_da_state_t *, int *);
+uint xfs_da_node_lasthash (xfs_dabuf_t *, int *);
+int  xfs_da_do_buf (xfs_trans_t *, xfs_inode_t *, xfs_dablk_t, xfs_daddr_t *,
+			xfs_dabuf_t **, int, int, inst_t *);
+
+/* xfs_dir.c */
+int  xfs_dir_node_addname (xfs_da_args_t *);
+int  xfs_dir_leaf_lookup (xfs_da_args_t *);
+int  xfs_dir_node_lookup (xfs_da_args_t *);
+int  xfs_dir_leaf_replace (xfs_da_args_t *);
+int  xfs_dir_node_replace (xfs_da_args_t *);
+int  xfs_dir_node_removename (xfs_da_args_t *);
+int  xfs_dir_leaf_removename (xfs_da_args_t *, int *, int *);
+
+/* xfs_dir_leaf.c */
+void xfs_dir_leaf_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *);
+void xfs_dir_leaf_add_work (xfs_dabuf_t *, xfs_da_args_t *, int, int);
+int  xfs_dir_leaf_compact (xfs_trans_t *, xfs_dabuf_t *, int, int);
+int  xfs_dir_leaf_figure_balance (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *, int *, int *);
+void xfs_dir_leaf_moveents (xfs_dir_leafblock_t *, int,
+			xfs_dir_leafblock_t *, int, int, xfs_mount_t *);
+
+/* xfs_dir2_leaf.c */
+void xfs_dir2_leaf_check (xfs_inode_t *, xfs_dabuf_t *);
+int  xfs_dir2_leaf_lookup_int (xfs_da_args_t *, xfs_dabuf_t **,
+			int *, xfs_dabuf_t **);
+
+/* xfs_dir2_block.c */
+void xfs_dir2_block_log_tail (xfs_trans_t *, xfs_dabuf_t *);
+void xfs_dir2_block_log_leaf (xfs_trans_t *, xfs_dabuf_t *, int, int);
+int  xfs_dir2_block_lookup_int (xfs_da_args_t *, xfs_dabuf_t **, int *);
+
+/* xfs_dir2_node.c */
+void xfs_dir2_leafn_check (xfs_inode_t *, xfs_dabuf_t *);
+int  xfs_dir2_leafn_remove (xfs_da_args_t *, xfs_dabuf_t *, int,
+			xfs_da_state_blk_t *, int *);
+int  xfs_dir2_node_addname_int (xfs_da_args_t *, xfs_da_state_blk_t *);
+
+/* xfs_dir2_sf.c */
+void xfs_dir2_sf_check (xfs_da_args_t *);
+int  xfs_dir2_sf_addname_pick (xfs_da_args_t *, int,
+			xfs_dir2_sf_entry_t **, xfs_dir2_data_aoff_t *);
+void xfs_dir2_sf_addname_easy (xfs_da_args_t *, xfs_dir2_sf_entry_t *,
+			xfs_dir2_data_aoff_t, int);
+void xfs_dir2_sf_addname_hard (xfs_da_args_t *, int, int);
+void xfs_dir2_sf_toino8 (xfs_da_args_t *);
+void xfs_dir2_sf_toino4 (xfs_da_args_t *);
+
+/* xfs_attr_leaf.c */
+void xfs_attr_leaf_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *);
+int  xfs_attr_leaf_add_work (xfs_dabuf_t *, xfs_da_args_t *, int);
+void xfs_attr_leaf_compact (xfs_trans_t *, xfs_dabuf_t *);
+void xfs_attr_leaf_moveents (xfs_attr_leafblock_t *, int,
+			xfs_attr_leafblock_t *, int, int, xfs_mount_t *);
+int  xfs_attr_leaf_figure_balance (xfs_da_state_t *, xfs_da_state_blk_t *,
+			xfs_da_state_blk_t *, int *, int *);
+
+/* xfs_trans_item.c */
+xfs_log_item_desc_t *xfs_trans_add_item (xfs_trans_t *, xfs_log_item_t *);
+xfs_log_item_desc_t *xfs_trans_find_item (xfs_trans_t *, xfs_log_item_t *);
+void xfs_trans_free_item (xfs_trans_t *, xfs_log_item_desc_t *);
+void xfs_trans_free_items (xfs_trans_t *, int);
+
+/* xfs_trans_buf.c */
+xfs_buf_t *xfs_trans_buf_item_match (xfs_trans_t *, buftarg_t *,
+			xfs_daddr_t, int);
+xfs_buf_t *xfs_trans_buf_item_match_all (xfs_trans_t *, buftarg_t *,
+			xfs_daddr_t, int);
+
+/* xfs_inode_item.c */
+void xfs_inode_item_init (xfs_inode_t *, xfs_mount_t *);
+
+/* xfs_buf_item.c */
+void xfs_buf_item_init (xfs_buf_t *, xfs_mount_t *);
+void xfs_buf_item_log (xfs_buf_log_item_t *, uint, uint);
+
+/* local source files */
+int  xfs_mod_incore_sb (xfs_mount_t *, xfs_sb_field_t, int, int);
+void xfs_trans_mod_sb (xfs_trans_t *, uint, long);
+int  xfs_trans_unlock_chunk (xfs_log_item_chunk_t *, int, int, xfs_lsn_t);
+
+
+#ifndef DEBUG
+#define xfs_inobp_check(mp,bp)				((void) 0)
+#define xfs_btree_check_key(a,b,c)			((void) 0)
+#define xfs_btree_check_rec(a,b,c)			((void) 0)
+#define xfs_btree_check_block(a,b,c,d)			((void) 0)
+#define xfs_dir2_sf_check(args)				((void) 0)
+#define xfs_dir2_leaf_check(dp,bp)			((void) 0)
+#define xfs_dir2_leafn_check(dp,bp)			((void) 0)
+#undef xfs_dir2_data_check
+#define xfs_dir2_data_check(dp,bp)			((void) 0)
+#endif
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
new file mode 100644
index 000000000..9792416be
--- /dev/null
+++ b/libxfs/xfs_alloc.c
@@ -0,0 +1,2355 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+ 
+#include <xfs.h>
+
+#define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
+#define	XFSA_FIXUP_BNO_OK	1
+#define	XFSA_FIXUP_CNT_OK	2
+
+/*
+ * Compute aligned version of the found extent.
+ * Takes alignment and min length into account.
+ */
+STATIC int				/* success (>= minlen) */
+xfs_alloc_compute_aligned(
+	xfs_agblock_t	foundbno,	/* starting block in found extent */
+	xfs_extlen_t	foundlen,	/* length in found extent */
+	xfs_extlen_t	alignment,	/* alignment for allocation */
+	xfs_extlen_t	minlen,		/* minimum length for allocation */
+	xfs_agblock_t	*resbno,	/* result block number */
+	xfs_extlen_t	*reslen)	/* result length */
+{
+	xfs_agblock_t	bno;
+	xfs_extlen_t	diff;
+	xfs_extlen_t	len;
+
+	if (alignment > 1 && foundlen >= minlen) {
+		bno = roundup(foundbno, alignment);
+		diff = bno - foundbno;
+		len = diff >= foundlen ? 0 : foundlen - diff;
+	} else {
+		bno = foundbno;
+		len = foundlen;
+	}
+	*resbno = bno;
+	*reslen = len;
+	return len >= minlen;
+}
+
+/*
+ * Compute best start block and diff for "near" allocations.
+ * freelen >= wantlen already checked by caller.
+ */
+STATIC xfs_extlen_t			/* difference value (absolute) */
+xfs_alloc_compute_diff(
+	xfs_agblock_t	wantbno,	/* target starting block */
+	xfs_extlen_t	wantlen,	/* target length */
+	xfs_extlen_t	alignment,	/* target alignment */
+	xfs_agblock_t	freebno,	/* freespace's starting block */
+	xfs_extlen_t	freelen,	/* freespace's length */
+	xfs_agblock_t	*newbnop)	/* result: best start block from free */
+{
+	xfs_agblock_t	freeend;	/* end of freespace extent */
+	xfs_agblock_t	newbno1;	/* return block number */
+	xfs_agblock_t	newbno2;	/* other new block number */
+	xfs_extlen_t	newlen1;	/* length with newbno1 */
+	xfs_extlen_t	newlen2;	/* length with newbno2 */
+	xfs_agblock_t	wantend;	/* end of target extent */
+
+	ASSERT(freelen >= wantlen);
+	freeend = freebno + freelen;
+	wantend = wantbno + wantlen;
+	if (freebno >= wantbno) {
+		if ((newbno1 = roundup(freebno, alignment)) >= freeend)
+			newbno1 = NULLAGBLOCK;
+	} else if (freeend >= wantend && alignment > 1) {
+		newbno1 = roundup(wantbno, alignment);
+		newbno2 = newbno1 - alignment;
+		if (newbno1 >= freeend)
+			newbno1 = NULLAGBLOCK;
+		else
+			newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
+		if (newbno2 < freebno)
+			newbno2 = NULLAGBLOCK;
+		else
+			newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
+		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
+			if (newlen1 < newlen2 ||
+			    (newlen1 == newlen2 &&
+			     XFS_ABSDIFF(newbno1, wantbno) >
+			     XFS_ABSDIFF(newbno2, wantbno)))
+				newbno1 = newbno2;
+		} else if (newbno2 != NULLAGBLOCK)
+			newbno1 = newbno2;
+	} else if (freeend >= wantend) {
+		newbno1 = wantbno;
+	} else if (alignment > 1) {
+		newbno1 = roundup(freeend - wantlen, alignment);
+		if (newbno1 > freeend - wantlen &&
+		    newbno1 - alignment >= freebno)
+			newbno1 -= alignment;
+		else if (newbno1 >= freeend)
+			newbno1 = NULLAGBLOCK;
+	} else
+		newbno1 = freeend - wantlen;
+	*newbnop = newbno1;
+	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
+}
+
+/*
+ * Fix up the length, based on mod and prod.
+ * len should be k * prod + mod for some k.
+ * If len is too small it is returned unchanged.
+ * If len hits maxlen it is left alone.
+ */
+STATIC void
+xfs_alloc_fix_len(
+	xfs_alloc_arg_t	*args)		/* allocation argument structure */
+{
+	xfs_extlen_t	k;
+	xfs_extlen_t	rlen;
+
+	ASSERT(args->mod < args->prod);
+	rlen = args->len;
+	ASSERT(rlen >= args->minlen);
+	ASSERT(rlen <= args->maxlen);
+	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
+	    (args->mod == 0 && rlen < args->prod))
+		return;
+	k = rlen % args->prod;
+	if (k == args->mod)
+		return;
+	if (k > args->mod) {
+		if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
+			return;
+	} else {
+		if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
+		    (int)args->minlen)
+			return;
+	}
+	ASSERT(rlen >= args->minlen);
+	ASSERT(rlen <= args->maxlen);
+	args->len = rlen;
+}
+
+/*
+ * Fix up length if there is too little space left in the a.g.
+ * Return 1 if ok, 0 if too little, should give up.
+ */
+STATIC int
+xfs_alloc_fix_minleft(
+	xfs_alloc_arg_t	*args)		/* allocation argument structure */
+{
+	xfs_agf_t	*agf;		/* a.g. freelist header */
+	int		diff;		/* free space difference */
+
+	if (args->minleft == 0)
+		return 1;
+	agf = XFS_BUF_TO_AGF(args->agbp);
+	diff = INT_GET(agf->agf_freeblks, ARCH_CONVERT)
+		+ INT_GET(agf->agf_flcount, ARCH_CONVERT)
+		- args->len - args->minleft;
+	if (diff >= 0)
+		return 1;
+	args->len += diff;		/* shrink the allocated space */
+	if (args->len >= args->minlen)
+		return 1;
+	args->agbno = NULLAGBLOCK;
+	return 0;
+}
+
+/*
+ * Update the two btrees, logically removing from freespace the extent
+ * starting at rbno, rlen blocks.  The extent is contained within the
+ * actual (current) free extent fbno for flen blocks.
+ * Flags are passed in indicating whether the cursors are set to the
+ * relevant records.
+ */
+STATIC int				/* error code */
+xfs_alloc_fixup_trees(
+	xfs_btree_cur_t	*cnt_cur,	/* cursor for by-size btree */
+	xfs_btree_cur_t	*bno_cur,	/* cursor for by-block btree */
+	xfs_agblock_t	fbno,		/* starting block of free extent */
+	xfs_extlen_t	flen,		/* length of free extent */
+	xfs_agblock_t	rbno,		/* starting block of returned extent */
+	xfs_extlen_t	rlen,		/* length of returned extent */
+	int		flags)		/* flags, XFSA_FIXUP_... */
+{
+	int		error;		/* error code */
+	int		i;		/* operation results */
+	xfs_agblock_t	nfbno1;		/* first new free startblock */
+	xfs_agblock_t	nfbno2;		/* second new free startblock */
+	xfs_extlen_t	nflen1;		/* first new free length */
+	xfs_extlen_t	nflen2;		/* second new free length */
+
+	/*
+	 * Look up the record in the by-size tree if necessary.
+	 */
+	if (flags & XFSA_FIXUP_CNT_OK) {
+#ifdef DEBUG
+		if (error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(
+			i == 1 && nfbno1 == fbno && nflen1 == flen);
+#endif
+	} else {
+		if (error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 1);
+	}
+	/*
+	 * Look up the record in the by-block tree if necessary.
+	 */
+	if (flags & XFSA_FIXUP_BNO_OK) {
+#ifdef DEBUG
+		if (error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(
+			i == 1 && nfbno1 == fbno && nflen1 == flen);
+#endif
+	} else {
+		if (error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 1);
+	}
+#ifdef DEBUG
+	{
+		xfs_alloc_block_t	*bnoblock;
+		xfs_alloc_block_t	*cntblock;
+
+		if (bno_cur->bc_nlevels == 1 &&
+		    cnt_cur->bc_nlevels == 1) {
+			bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
+			cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
+			XFS_WANT_CORRUPTED_RETURN(
+				INT_GET(bnoblock->bb_numrecs, ARCH_CONVERT) == INT_GET(cntblock->bb_numrecs, ARCH_CONVERT));
+		}
+	}
+#endif
+	/*
+	 * Deal with all four cases: the allocated record is contained
+	 * within the freespace record, so we can have new freespace
+	 * at either (or both) end, or no freespace remaining.
+	 */
+	if (rbno == fbno && rlen == flen)
+		nfbno1 = nfbno2 = NULLAGBLOCK;
+	else if (rbno == fbno) {
+		nfbno1 = rbno + rlen;
+		nflen1 = flen - rlen;
+		nfbno2 = NULLAGBLOCK;
+	} else if (rbno + rlen == fbno + flen) {
+		nfbno1 = fbno;
+		nflen1 = flen - rlen;
+		nfbno2 = NULLAGBLOCK;
+	} else {
+		nfbno1 = fbno;
+		nflen1 = rbno - fbno;
+		nfbno2 = rbno + rlen;
+		nflen2 = (fbno + flen) - nfbno2;
+	}
+	/*
+	 * Delete the entry from the by-size btree.
+	 */
+	if (error = xfs_alloc_delete(cnt_cur, &i))
+		return error;
+	XFS_WANT_CORRUPTED_RETURN(i == 1);
+	/*
+	 * Add new by-size btree entry(s).
+	 */
+	if (nfbno1 != NULLAGBLOCK) {
+		if (error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 0);
+		if (error = xfs_alloc_insert(cnt_cur, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 1);
+	}
+	if (nfbno2 != NULLAGBLOCK) {
+		if (error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 0);
+		if (error = xfs_alloc_insert(cnt_cur, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 1);
+	}
+	/*
+	 * Fix up the by-block btree entry(s).
+	 */
+	if (nfbno1 == NULLAGBLOCK) {
+		/*
+		 * No remaining freespace, just delete the by-block tree entry.
+		 */
+		if (error = xfs_alloc_delete(bno_cur, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 1);
+	} else {
+		/*
+		 * Update the by-block entry to start later|be shorter.
+		 */
+		if (error = xfs_alloc_update(bno_cur, nfbno1, nflen1))
+			return error;
+	}
+	if (nfbno2 != NULLAGBLOCK) {
+		/*
+		 * 2 resulting free entries, need to add one.
+		 */
+		if (error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 0);
+		if (error = xfs_alloc_insert(bno_cur, &i))
+			return error;
+		XFS_WANT_CORRUPTED_RETURN(i == 1);
+	}
+	return 0;
+}
+
+/*
+ * Read in the allocation group free block array.
+ */
+STATIC int				/* error */
+xfs_alloc_read_agfl(
+	xfs_mount_t	*mp,		/* mount point structure */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_agnumber_t	agno,		/* allocation group number */
+	xfs_buf_t	**bpp)		/* buffer for the ag free block array */
+{
+	xfs_buf_t	*bp;		/* return value */
+	xfs_daddr_t	d;		/* disk block address */
+	int		error;
+
+	ASSERT(agno != NULLAGNUMBER);
+	d = XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR);
+	if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1, 0, &bp))
+		return error;
+	ASSERT(bp);
+	ASSERT(!XFS_BUF_GETERROR(bp));
+	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
+	*bpp = bp;
+	return 0;
+}
+
+#if defined(XFS_ALLOC_TRACE)
+/*
+ * Add an allocation trace entry for an alloc call.
+ */
+STATIC void
+xfs_alloc_trace_alloc(
+	char		*name,		/* function tag string */
+	char		*str,		/* additional string */
+	xfs_alloc_arg_t	*args,		/* allocation argument structure */
+	int		line)		/* source line number */
+{
+	ktrace_enter(xfs_alloc_trace_buf,
+		(void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
+		(void *)name,
+		(void *)str,
+		(void *)args->mp,
+		(void *)(__psunsigned_t)args->agno,
+		(void *)(__psunsigned_t)args->agbno,
+		(void *)(__psunsigned_t)args->minlen,
+		(void *)(__psunsigned_t)args->maxlen,
+		(void *)(__psunsigned_t)args->mod,
+		(void *)(__psunsigned_t)args->prod,
+		(void *)(__psunsigned_t)args->minleft,
+		(void *)(__psunsigned_t)args->total,
+		(void *)(__psunsigned_t)args->alignment,
+		(void *)(__psunsigned_t)args->len,
+		(void *)((((__psint_t)args->type) << 16) |
+			 (__psint_t)args->otype),
+		(void *)(__psint_t)((args->wasdel << 3) |
+				    (args->wasfromfl << 2) |
+				    (args->isfl << 1) |
+				    (args->userdata << 0)));
+}
+
+/*
+ * Add an allocation trace entry for a free call.
+ */
+STATIC void
+xfs_alloc_trace_free(
+	char		*name,		/* function tag string */
+	char		*str,		/* additional string */
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_agnumber_t	agno,		/* allocation group number */
+	xfs_agblock_t	agbno,		/* a.g. relative block number */
+	xfs_extlen_t	len,		/* length of extent */
+	int		isfl,		/* set if is freelist allocation/free */
+	int		line)		/* source line number */
+{
+	ktrace_enter(xfs_alloc_trace_buf,
+		(void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
+		(void *)name,
+		(void *)str,
+		(void *)mp,
+		(void *)(__psunsigned_t)agno,
+		(void *)(__psunsigned_t)agbno,
+		(void *)(__psunsigned_t)len,
+		(void *)(__psint_t)isfl,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+}
+
+/*
+ * Add an allocation trace entry for modifying an agf.
+ */
+STATIC void
+xfs_alloc_trace_modagf(
+	char		*name,		/* function tag string */
+	char		*str,		/* additional string */
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_agf_t	*agf,		/* new agf value */
+	int		flags,		/* logging flags for agf */
+	int		line)		/* source line number */
+{
+	ktrace_enter(xfs_alloc_trace_buf,
+		(void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
+		(void *)name,
+		(void *)str,
+		(void *)mp,
+		(void *)(__psint_t)flags,
+		(void *)(__psunsigned_t)INT_GET(agf->agf_seqno, ARCH_CONVERT),
+		(void *)(__psunsigned_t)INT_GET(agf->agf_length, ARCH_CONVERT),
+		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_BNO],
+						ARCH_CONVERT);
+		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_CNT],
+						ARCH_CONVERT);
+		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_BNO],
+						ARCH_CONVERT);
+		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_CNT],
+						ARCH_CONVERT);
+		(void *)(__psunsigned_t)INT_GET(agf->agf_flfirst, ARCH_CONVERT),
+		(void *)(__psunsigned_t)INT_GET(agf->agf_fllast, ARCH_CONVERT),
+		(void *)(__psunsigned_t)INT_GET(agf->agf_flcount, ARCH_CONVERT),
+		(void *)(__psunsigned_t)INT_GET(agf->agf_freeblks, ARCH_CONVERT),
+		(void *)(__psunsigned_t)INT_GET(agf->agf_longest, ARCH_CONVERT));
+}
+#endif	/* XFS_ALLOC_TRACE */
+
+/*
+ * Allocation group level functions.
+ */
+
+/*
+ * Allocate a variable extent in the allocation group agno.
+ * Type and bno are used to determine where in the allocation group the
+ * extent will start.
+ * Extent's length (returned in *len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int			/* error */
+xfs_alloc_ag_vextent(
+	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
+{
+	int		error;
+#ifdef XFS_ALLOC_TRACE
+	static char	fname[] = "xfs_alloc_ag_vextent";
+#endif
+
+	ASSERT(args->minlen > 0);
+	ASSERT(args->maxlen > 0);
+	ASSERT(args->minlen <= args->maxlen);
+	ASSERT(args->mod < args->prod);
+	ASSERT(args->alignment > 0);
+	/*
+	 * Branch to correct routine based on the type.
+	 */
+	args->wasfromfl = 0;
+	switch (args->type) {
+	case XFS_ALLOCTYPE_THIS_AG:
+		error = xfs_alloc_ag_vextent_size(args);
+		break;
+	case XFS_ALLOCTYPE_NEAR_BNO:
+		error = xfs_alloc_ag_vextent_near(args);
+		break;
+	case XFS_ALLOCTYPE_THIS_BNO:
+		error = xfs_alloc_ag_vextent_exact(args);
+		break;
+	default:
+		ASSERT(0);
+		/* NOTREACHED */
+	}
+	if (error)
+		return error;
+	/*
+	 * If the allocation worked, need to change the agf structure
+	 * (and log it), and the superblock.
+	 */
+	if (args->agbno != NULLAGBLOCK) {
+		xfs_agf_t	*agf;	/* allocation group freelist header */
+#ifdef XFS_ALLOC_TRACE
+		xfs_mount_t	*mp = args->mp;
+#endif
+		long 		slen = (long)args->len;
+
+		ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
+		ASSERT(!(args->wasfromfl) || !args->isfl);
+		ASSERT(args->agbno % args->alignment == 0);
+		if (!(args->wasfromfl)) {
+
+			agf = XFS_BUF_TO_AGF(args->agbp);
+			INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -(args->len));
+			xfs_trans_agblocks_delta(args->tp,
+						 -((long)(args->len)));
+			args->pag->pagf_freeblks -= args->len;
+			ASSERT(INT_GET(agf->agf_freeblks, ARCH_CONVERT)
+				<= INT_GET(agf->agf_length, ARCH_CONVERT));
+			TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
+			xfs_alloc_log_agf(args->tp, args->agbp,
+						XFS_AGF_FREEBLKS);
+		}
+		if (!args->isfl)
+			xfs_trans_mod_sb(args->tp,
+				args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
+					XFS_TRANS_SB_FDBLOCKS, -slen);
+		XFS_STATS_INC(xs_allocx);
+		XFS_STATS_ADD(xs_allocb, args->len);
+	}
+	return 0;
+}
+
+/*
+ * Allocate a variable extent at exactly agno/bno.
+ * Extent's length (returned in *len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
+ */
+STATIC int			/* error */
+xfs_alloc_ag_vextent_exact(
+	xfs_alloc_arg_t	*args)	/* allocation argument structure */
+{
+	xfs_btree_cur_t	*bno_cur;/* by block-number btree cursor */
+	xfs_btree_cur_t	*cnt_cur;/* by count btree cursor */
+	xfs_agblock_t	end;	/* end of allocated extent */
+	int		error;
+	xfs_agblock_t	fbno;	/* start block of found extent */
+	xfs_agblock_t	fend;	/* end block of found extent */
+	xfs_extlen_t	flen;	/* length of found extent */
+#ifdef XFS_ALLOC_TRACE
+	static char	fname[] = "xfs_alloc_ag_vextent_exact";
+#endif
+	int		i;	/* success/failure of operation */
+	xfs_agblock_t	maxend;	/* end of maximal extent */
+	xfs_agblock_t	minend;	/* end of minimal extent */
+	xfs_extlen_t	rlen;	/* length of returned extent */
+
+	ASSERT(args->alignment == 1);
+	/*
+	 * Allocate/initialize a cursor for the by-number freespace btree.
+	 */
+	bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+		args->agno, XFS_BTNUM_BNO, 0, 0);
+	/*
+	 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
+	 * Look for the closest free block <= bno, it must contain bno
+	 * if any free block does.
+	 */
+	if (error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen,
+			&i))
+		goto error0;
+	if (!i) {
+		/*
+		 * Didn't find it, return null.
+		 */
+		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+		args->agbno = NULLAGBLOCK;
+		return 0;
+	}
+	/*
+	 * Grab the freespace record.
+	 */
+	if (error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))
+		goto error0;
+	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+	ASSERT(fbno <= args->agbno);
+	minend = args->agbno + args->minlen;
+	maxend = args->agbno + args->maxlen;
+	fend = fbno + flen;
+	/* 
+	 * Give up if the freespace isn't long enough for the minimum request.
+	 */
+	if (fend < minend) {
+		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+		args->agbno = NULLAGBLOCK;
+		return 0;
+	}
+	/*
+	 * End of extent will be smaller of the freespace end and the
+	 * maximal requested end.
+	 */
+	end = XFS_AGBLOCK_MIN(fend, maxend);
+	/*
+	 * Fix the length according to mod and prod if given.
+	 */
+	args->len = end - args->agbno;
+	xfs_alloc_fix_len(args);
+	if (!xfs_alloc_fix_minleft(args)) {
+		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+		return 0;
+	}
+	rlen = args->len;
+	ASSERT(args->agbno + rlen <= fend);
+	end = args->agbno + rlen;
+	/*
+	 * We are allocating agbno for rlen [agbno .. end]
+	 * Allocate/initialize a cursor for the by-size btree.
+	 */
+	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+		args->agno, XFS_BTNUM_CNT, 0, 0);
+	ASSERT(args->agbno + args->len <=
+		INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+			ARCH_CONVERT));
+	if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 
+			args->agbno, args->len, XFSA_FIXUP_BNO_OK)) {
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+		goto error0;
+	}
+	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+	TRACE_ALLOC("normal", args);
+	args->wasfromfl = 0;
+	return 0;
+
+error0:
+	xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+	TRACE_ALLOC("error", args);
+	return error;
+}
+
+/*
+ * Allocate a variable extent near bno in the allocation group agno.
+ * Extent's length (returned in len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int				/* error */
+xfs_alloc_ag_vextent_near(
+	xfs_alloc_arg_t	*args)		/* allocation argument structure */
+{
+	xfs_btree_cur_t	*bno_cur_gt;	/* cursor for bno btree, right side */
+	xfs_btree_cur_t	*bno_cur_lt;	/* cursor for bno btree, left side */
+	xfs_btree_cur_t	*cnt_cur;	/* cursor for count btree */
+#ifdef XFS_ALLOC_TRACE
+	static char	fname[] = "xfs_alloc_ag_vextent_near";
+#endif
+	xfs_agblock_t	gtbno;		/* start bno of right side entry */
+	xfs_agblock_t	gtbnoa;		/* aligned ... */
+	xfs_extlen_t	gtdiff;		/* difference to right side entry */
+	xfs_extlen_t	gtlen;		/* length of right side entry */
+	xfs_extlen_t	gtlena;		/* aligned ... */
+	xfs_agblock_t	gtnew;		/* useful start bno of right side */
+	int		error;		/* error code */
+	int		i;		/* result code, temporary */
+	int		j;		/* result code, temporary */
+	xfs_agblock_t	ltbno;		/* start bno of left side entry */
+	xfs_agblock_t	ltbnoa;		/* aligned ... */
+	xfs_extlen_t	ltdiff;		/* difference to left side entry */
+	/*REFERENCED*/
+	xfs_agblock_t	ltend;		/* end bno of left side entry */
+	xfs_extlen_t	ltlen;		/* length of left side entry */
+	xfs_extlen_t	ltlena;		/* aligned ... */
+	xfs_agblock_t	ltnew;		/* useful start bno of left side */
+	xfs_extlen_t	rlen;		/* length of returned extent */
+#if defined(DEBUG) && defined(__KERNEL__)
+	/*
+	 * Randomly don't execute the first algorithm.
+	 */
+	static int	seed;		/* randomizing seed value */
+	int		dofirst;	/* set to do first algorithm */
+	timespec_t	now;		/* current time */
+
+	if (!seed) {
+		nanotime(&now);
+		seed = (int)now.tv_sec ^ (int)now.tv_nsec;
+	}
+	dofirst = random() & 1;
+#endif
+	/*
+	 * Get a cursor for the by-size btree.
+	 */
+	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+		args->agno, XFS_BTNUM_CNT, 0, 0);
+	ltlen = 0;
+	bno_cur_lt = bno_cur_gt = NULL;
+	/*
+	 * See if there are any free extents as big as maxlen.
+	 */
+	if (error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))
+		goto error0;
+	/*
+	 * If none, then pick up the last entry in the tree unless the
+	 * tree is empty.
+	 */ 
+	if (!i) {
+		if (error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
+				&ltlen, &i))
+			goto error0;
+		if (i == 0 || ltlen == 0) {
+			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+			return 0;
+		}
+		ASSERT(i == 1);
+	}
+	args->wasfromfl = 0;
+	/* 
+	 * First algorithm.
+	 * If the requested extent is large wrt the freespaces available
+	 * in this a.g., then the cursor will be pointing to a btree entry
+	 * near the right edge of the tree.  If it's in the last btree leaf
+	 * block, then we just examine all the entries in that block
+	 * that are big enough, and pick the best one.
+	 * This is written as a while loop so we can break out of it,
+	 * but we never loop back to the top.
+	 */
+	while (xfs_btree_islastblock(cnt_cur, 0)) {
+		xfs_extlen_t	bdiff;
+		int		besti;
+		xfs_extlen_t	blen;
+		xfs_agblock_t	bnew;
+
+#if defined(DEBUG) && defined(__KERNEL__)
+		if (!dofirst)
+			break;
+#endif
+		/*
+		 * Start from the entry that lookup found, sequence through
+		 * all larger free blocks.  If we're actually pointing at a
+		 * record smaller than maxlen, go to the start of this block,
+		 * and skip all those smaller than minlen.
+		 */
+		if (ltlen || args->alignment > 1) {
+			cnt_cur->bc_ptrs[0] = 1;
+			do {
+				if (error = xfs_alloc_get_rec(cnt_cur, &ltbno,
+						&ltlen, &i))
+					goto error0;
+				XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+				if (ltlen >= args->minlen)
+					break;
+				if (error = xfs_alloc_increment(cnt_cur, 0, &i))
+					goto error0;
+			} while (i);
+			ASSERT(ltlen >= args->minlen);
+			if (!i)
+				break;
+		}
+		i = cnt_cur->bc_ptrs[0];
+		for (j = 1, blen = 0, bdiff = 0;
+		     !error && j && (blen < args->maxlen || bdiff > 0);
+		     error = xfs_alloc_increment(cnt_cur, 0, &j)) {
+			/*
+			 * For each entry, decide if it's better than
+			 * the previous best entry.
+			 */
+			if (error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen,
+					&i))
+				goto error0;
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			if (!xfs_alloc_compute_aligned(ltbno, ltlen,
+					args->alignment, args->minlen,
+					&ltbnoa, &ltlena))
+				continue;
+			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+			xfs_alloc_fix_len(args);
+			ASSERT(args->len >= args->minlen);
+			if (args->len < blen)
+				continue;
+			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+				args->alignment, ltbno, ltlen, &ltnew);
+			if (ltnew != NULLAGBLOCK &&
+			    (args->len > blen || ltdiff < bdiff)) {
+				bdiff = ltdiff;
+				bnew = ltnew;
+				blen = args->len;
+				besti = cnt_cur->bc_ptrs[0];
+			}
+		}
+		/*
+		 * It didn't work.  We COULD be in a case where
+		 * there's a good record somewhere, so try again.
+		 */
+		if (blen == 0)
+			break;
+		/*
+		 * Point at the best entry, and retrieve it again.
+		 */
+		cnt_cur->bc_ptrs[0] = besti;
+		if (error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		ltend = ltbno + ltlen;
+		ASSERT(ltend <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+				ARCH_CONVERT));
+		args->len = blen;
+		if (!xfs_alloc_fix_minleft(args)) {
+			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+			TRACE_ALLOC("nominleft", args);
+			return 0;
+		}
+		blen = args->len;
+		/*
+		 * We are allocating starting at bnew for blen blocks.
+		 */
+		args->agbno = bnew;
+		ASSERT(bnew >= ltbno);
+		ASSERT(bnew + blen <= ltend);
+		/*
+		 * Set up a cursor for the by-bno tree.
+		 */
+		bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp,
+			args->agbp, args->agno, XFS_BTNUM_BNO, 0, 0);
+		/*
+		 * Fix up the btree entries.
+		 */
+		if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
+				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))
+			goto error0;
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+		TRACE_ALLOC("first", args);
+		return 0;
+	}
+	/*
+	 * Second algorithm.
+	 * Search in the by-bno tree to the left and to the right
+	 * simultaneously, until in each case we find a space big enough,
+	 * or run into the edge of the tree.  When we run into the edge,
+	 * we deallocate that cursor.
+	 * If both searches succeed, we compare the two spaces and pick
+	 * the better one.
+	 * With alignment, it's possible for both to fail; the upper
+	 * level algorithm that picks allocation groups for allocations
+	 * is not supposed to do this.
+	 */
+	/*
+	 * Allocate and initialize the cursor for the leftward search.
+	 */
+	bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+		args->agno, XFS_BTNUM_BNO, 0, 0);
+	/*
+	 * Lookup <= bno to find the leftward search's starting point.
+	 */
+	if (error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen,
+			&i))
+		goto error0;
+	if (!i) {
+		/*
+		 * Didn't find anything; use this cursor for the rightward
+		 * search.
+		 */
+		bno_cur_gt = bno_cur_lt;
+		bno_cur_lt = 0;
+	}
+	/*
+	 * Found something.  Duplicate the cursor for the rightward search.
+	 */
+	else if (error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))
+		goto error0;
+	/*
+	 * Increment the cursor, so we will point at the entry just right
+	 * of the leftward entry if any, or to the leftmost entry.
+	 */
+	if (error = xfs_alloc_increment(bno_cur_gt, 0, &i))
+		goto error0;
+	if (!i) {
+		/*
+		 * It failed, there are no rightward entries.
+		 */
+		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
+		bno_cur_gt = NULL;
+	}
+	/*
+	 * Loop going left with the leftward cursor, right with the
+	 * rightward cursor, until either both directions give up or
+	 * we find an entry at least as big as minlen.
+	 */
+	do {
+		if (bno_cur_lt) {
+			if (error = xfs_alloc_get_rec(bno_cur_lt, &ltbno,
+					&ltlen, &i))
+				goto error0;
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			if (xfs_alloc_compute_aligned(ltbno, ltlen,
+					args->alignment, args->minlen,
+					&ltbnoa, &ltlena))
+				break;
+			if (error = xfs_alloc_decrement(bno_cur_lt, 0, &i))
+				goto error0;
+			if (!i) {
+				xfs_btree_del_cursor(bno_cur_lt,
+						     XFS_BTREE_NOERROR);
+				bno_cur_lt = NULL;
+			}
+		}
+		if (bno_cur_gt) {
+			if (error = xfs_alloc_get_rec(bno_cur_gt, &gtbno,
+					&gtlen, &i))
+				goto error0;
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			if (xfs_alloc_compute_aligned(gtbno, gtlen,
+					args->alignment, args->minlen,
+					&gtbnoa, &gtlena))
+				break;
+			if (error = xfs_alloc_increment(bno_cur_gt, 0, &i))
+				goto error0;
+			if (!i) {
+				xfs_btree_del_cursor(bno_cur_gt,
+						     XFS_BTREE_NOERROR);
+				bno_cur_gt = NULL;
+			}
+		}
+	} while (bno_cur_lt || bno_cur_gt);
+	/*
+	 * Got both cursors still active, need to find better entry.
+	 */
+	if (bno_cur_lt && bno_cur_gt) {
+		/*
+		 * Left side is long enough, look for a right side entry.
+		 */
+		if (ltlena >= args->minlen) {
+			/*
+			 * Fix up the length.
+			 */
+			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+			xfs_alloc_fix_len(args);
+			rlen = args->len;
+			ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
+				args->alignment, ltbno, ltlen, &ltnew);
+			/*
+			 * Not perfect.
+			 */
+			if (ltdiff) {
+				/*
+				 * Look until we find a better one, run out of
+				 * space, or run off the end.
+				 */
+				while (bno_cur_lt && bno_cur_gt) {
+					if (error = xfs_alloc_get_rec(
+							bno_cur_gt, &gtbno,
+							&gtlen, &i))
+						goto error0;
+					XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+					xfs_alloc_compute_aligned(gtbno, gtlen,
+						args->alignment, args->minlen,
+						&gtbnoa, &gtlena);
+					/*
+					 * The left one is clearly better.
+					 */
+					if (gtbnoa >= args->agbno + ltdiff) {
+						xfs_btree_del_cursor(
+							bno_cur_gt,
+							XFS_BTREE_NOERROR);
+						bno_cur_gt = NULL;
+						break;
+					}
+					/*
+					 * If we reach a big enough entry,
+					 * compare the two and pick the best.
+					 */
+					if (gtlena >= args->minlen) {
+						args->len =
+							XFS_EXTLEN_MIN(gtlena,
+								args->maxlen);
+						xfs_alloc_fix_len(args);
+						rlen = args->len;
+						gtdiff = xfs_alloc_compute_diff(
+							args->agbno, rlen,
+							args->alignment,
+							gtbno, gtlen, &gtnew);
+						/*
+						 * Right side is better.
+						 */
+						if (gtdiff < ltdiff) {
+							xfs_btree_del_cursor(
+								bno_cur_lt,
+								XFS_BTREE_NOERROR);
+							bno_cur_lt = NULL;
+						}
+						/*
+						 * Left side is better.
+						 */
+						else {
+							xfs_btree_del_cursor(
+								bno_cur_gt,
+								XFS_BTREE_NOERROR);
+							bno_cur_gt = NULL;
+						}
+						break;
+					}
+					/*
+					 * Fell off the right end.
+					 */
+					if (error = xfs_alloc_increment(
+							bno_cur_gt, 0, &i))
+						goto error0;
+					if (!i) {
+						xfs_btree_del_cursor(
+							bno_cur_gt,
+							XFS_BTREE_NOERROR);
+						bno_cur_gt = NULL;
+						break;
+					}
+				}
+			}
+			/*
+			 * The left side is perfect, trash the right side.
+			 */
+			else {
+				xfs_btree_del_cursor(bno_cur_gt,
+						     XFS_BTREE_NOERROR);
+				bno_cur_gt = NULL;
+			}
+		}
+		/*
+		 * It's the right side that was found first, look left.
+		 */
+		else {
+			/*
+			 * Fix up the length.
+			 */
+			args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
+			xfs_alloc_fix_len(args);
+			rlen = args->len;
+			gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
+				args->alignment, gtbno, gtlen, &gtnew);
+			/*
+			 * Right side entry isn't perfect.
+			 */
+			if (gtdiff) {
+				/*
+				 * Look until we find a better one, run out of
+				 * space, or run off the end.
+				 */
+				while (bno_cur_lt && bno_cur_gt) {
+					if (error = xfs_alloc_get_rec(
+							bno_cur_lt, &ltbno,
+							&ltlen, &i))
+						goto error0;
+					XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+					xfs_alloc_compute_aligned(ltbno, ltlen,
+						args->alignment, args->minlen,
+						&ltbnoa, &ltlena);
+					/*
+					 * The right one is clearly better.
+					 */
+					if (ltbnoa <= args->agbno - gtdiff) {
+						xfs_btree_del_cursor(
+							bno_cur_lt,
+							XFS_BTREE_NOERROR);
+						bno_cur_lt = NULL;
+						break;
+					}
+					/*
+					 * If we reach a big enough entry,
+					 * compare the two and pick the best.
+					 */
+					if (ltlena >= args->minlen) {
+						args->len = XFS_EXTLEN_MIN(
+							ltlena, args->maxlen);
+						xfs_alloc_fix_len(args);
+						rlen = args->len;
+						ltdiff = xfs_alloc_compute_diff(
+							args->agbno, rlen,
+							args->alignment,
+							ltbno, ltlen, &ltnew);
+						/*
+						 * Left side is better.
+						 */
+						if (ltdiff < gtdiff) {
+							xfs_btree_del_cursor(
+								bno_cur_gt,
+								XFS_BTREE_NOERROR);
+							bno_cur_gt = NULL;
+						}
+						/*
+						 * Right side is better.
+						 */
+						else {
+							xfs_btree_del_cursor(
+								bno_cur_lt,
+								XFS_BTREE_NOERROR);
+							bno_cur_lt = NULL;
+						}
+						break;
+					}
+					/*
+					 * Fell off the left end.
+					 */
+					if (error = xfs_alloc_decrement(
+							bno_cur_lt, 0, &i))
+						goto error0;
+					if (!i) {
+						xfs_btree_del_cursor(bno_cur_lt,
+							XFS_BTREE_NOERROR);
+						bno_cur_lt = NULL;
+						break;
+					}
+				}
+			}
+			/*
+			 * The right side is perfect, trash the left side.
+			 */
+			else {
+				xfs_btree_del_cursor(bno_cur_lt,
+					XFS_BTREE_NOERROR);
+				bno_cur_lt = NULL;
+			}
+		}
+	}
+	/*
+	 * If we couldn't get anything, give up.
+	 */
+	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+		TRACE_ALLOC("neither", args);
+		args->agbno = NULLAGBLOCK;
+		return 0;
+	}
+	/*
+	 * At this point we have selected a freespace entry, either to the
+	 * left or to the right.  If it's on the right, copy all the
+	 * useful variables to the "left" set so we only have one
+	 * copy of this code.
+	 */
+	if (bno_cur_gt) {
+		bno_cur_lt = bno_cur_gt;
+		bno_cur_gt = NULL;
+		ltbno = gtbno;
+		ltbnoa = gtbnoa;
+		ltlen = gtlen;
+		ltlena = gtlena;
+		j = 1;
+	} else
+		j = 0;
+	/*
+	 * Fix up the length and compute the useful address.
+	 */
+	ltend = ltbno + ltlen;
+	args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+	xfs_alloc_fix_len(args);
+	if (!xfs_alloc_fix_minleft(args)) {
+		TRACE_ALLOC("nominleft", args);
+		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+		return 0;
+	}
+	rlen = args->len;
+	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
+		ltlen, &ltnew);
+	ASSERT(ltnew >= ltbno);
+	ASSERT(ltnew + rlen <= ltend);
+	ASSERT(ltnew + rlen <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+		ARCH_CONVERT));
+	args->agbno = ltnew;
+	if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
+			ltnew, rlen, XFSA_FIXUP_BNO_OK))
+		goto error0;
+	TRACE_ALLOC(j ? "gt" : "lt", args);
+	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+	return 0;
+
+ error0:
+	TRACE_ALLOC("error", args);
+	if (cnt_cur != NULL)
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+	if (bno_cur_lt != NULL)
+		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
+	if (bno_cur_gt != NULL)
+		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Allocate a variable extent anywhere in the allocation group agno.
+ * Extent's length (returned in len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int				/* error */
+xfs_alloc_ag_vextent_size(
+	xfs_alloc_arg_t	*args)		/* allocation argument structure */
+{
+	xfs_btree_cur_t	*bno_cur;	/* cursor for bno btree */
+	xfs_btree_cur_t	*cnt_cur;	/* cursor for cnt btree */
+	int		error;		/* error result */
+	xfs_agblock_t	fbno;		/* start of found freespace */
+	xfs_extlen_t	flen;		/* length of found freespace */
+#ifdef XFS_ALLOC_TRACE
+	static char	fname[] = "xfs_alloc_ag_vextent_size";
+#endif
+	int		i;		/* temp status variable */
+	xfs_agblock_t	rbno;		/* returned block number */
+	xfs_extlen_t	rlen;		/* length of returned extent */
+
+	/*
+	 * Allocate and initialize a cursor for the by-size btree.
+	 */
+	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+		args->agno, XFS_BTNUM_CNT, 0, 0);
+	bno_cur = NULL;
+	/*
+	 * Look for an entry >= maxlen+alignment-1 blocks.
+	 */
+	if (error = xfs_alloc_lookup_ge(cnt_cur, 0,
+			args->maxlen + args->alignment - 1, &i))
+		goto error0;
+	/*
+	 * If none, then pick up the last entry in the tree unless the
+	 * tree is empty.
+	 */ 
+	if (!i) {
+		if (error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
+				&flen, &i))
+			goto error0;
+		if (i == 0 || flen == 0) {
+			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+			TRACE_ALLOC("noentry", args);
+			return 0;
+		}
+		ASSERT(i == 1);
+	}
+	/*
+	 * There's a freespace as big as maxlen+alignment-1, get it.
+	 */
+	else {
+		if (error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+	}
+	/*
+	 * In the first case above, we got the last entry in the
+	 * by-size btree.  Now we check to see if the space hits maxlen
+	 * once aligned; if not, we search left for something better.
+	 * This can't happen in the second case above.
+	 */
+	xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
+		&rbno, &rlen);
+	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
+	XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 
+			(rlen <= flen && rbno + rlen <= fbno + flen), error0);
+	if (rlen < args->maxlen) {
+		xfs_agblock_t	bestfbno;
+		xfs_extlen_t	bestflen;
+		xfs_agblock_t	bestrbno;
+		xfs_extlen_t	bestrlen;
+
+		bestrlen = rlen;
+		bestrbno = rbno;
+		bestflen = flen;
+		bestfbno = fbno;
+		for (;;) {
+			if (error = xfs_alloc_decrement(cnt_cur, 0, &i))
+				goto error0;
+			if (i == 0)
+				break;
+			if (error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
+					&i))
+				goto error0;
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			if (flen < bestrlen)
+				break;
+			xfs_alloc_compute_aligned(fbno, flen, args->alignment,
+				args->minlen, &rbno, &rlen);
+			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
+			XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
+				(rlen <= flen && rbno + rlen <= fbno + flen),
+				error0);
+			if (rlen > bestrlen) {
+				bestrlen = rlen;
+				bestrbno = rbno;
+				bestflen = flen;
+				bestfbno = fbno;
+				if (rlen == args->maxlen)
+					break;
+			}
+		} 
+		if (error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
+				&i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		rlen = bestrlen;
+		rbno = bestrbno;
+		flen = bestflen;
+		fbno = bestfbno;
+	}
+	args->wasfromfl = 0;
+	/*
+	 * Fix up the length.
+	 */
+	args->len = rlen;
+	xfs_alloc_fix_len(args);
+	if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+		TRACE_ALLOC("nominleft", args);
+		args->agbno = NULLAGBLOCK;
+		return 0;
+	}
+	rlen = args->len;
+	XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
+	/*
+	 * Allocate and initialize a cursor for the by-block tree.
+	 */
+	bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+		args->agno, XFS_BTNUM_BNO, 0, 0);
+	if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
+			rbno, rlen, XFSA_FIXUP_CNT_OK))
+		goto error0;
+	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+	cnt_cur = bno_cur = NULL;
+	args->len = rlen;
+	args->agbno = rbno;
+	XFS_WANT_CORRUPTED_GOTO(
+		args->agbno + args->len <=
+			INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+			ARCH_CONVERT),
+		error0);
+	TRACE_ALLOC("normal", args);
+	return 0;
+
+error0:
+	TRACE_ALLOC("error", args);
+	if (cnt_cur)
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+	if (bno_cur)
+		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Deal with the case where only small freespaces remain.
+ * Either return the contents of the last freespace record,
+ * or allocate space from the freelist if there is nothing in the tree.
+ */
+STATIC int			/* error */
+xfs_alloc_ag_vextent_small(
+	xfs_alloc_arg_t	*args,	/* allocation argument structure */
+	xfs_btree_cur_t	*ccur,	/* by-size cursor */
+	xfs_agblock_t	*fbnop,	/* result block number */
+	xfs_extlen_t	*flenp,	/* result length */
+	int		*stat)	/* status: 0-freelist, 1-normal/none */
+{
+	int		error;
+	xfs_agblock_t	fbno;
+	xfs_extlen_t	flen;
+#ifdef XFS_ALLOC_TRACE
+	static char	fname[] = "xfs_alloc_ag_vextent_small";
+#endif
+	int		i;
+
+	if (error = xfs_alloc_decrement(ccur, 0, &i))
+		goto error0;
+	if (i) {
+		if (error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+	}
+	/*
+	 * Nothing in the btree, try the freelist.  Make sure
+	 * to respect minleft even when pulling from the
+	 * freelist.
+	 */
+	else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
+		 (INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_flcount,
+			ARCH_CONVERT) > args->minleft)) {
+		if (error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno))
+			goto error0;
+		if (fbno != NULLAGBLOCK) {
+			if (args->userdata) {
+				xfs_buf_t	*bp;
+
+				bp = xfs_btree_get_bufs(args->mp, args->tp,
+					args->agno, fbno, 0);
+				xfs_trans_binval(args->tp, bp);
+				/*
+				 * Since blocks move to the free list without
+				 * the coordination used in xfs_bmap_finish,
+				 * we can't allow the user to write to the
+				 * block until we know that the transaction
+				 * that moved it to the free list is
+				 * permanently on disk.  The only way to
+				 * ensure that is to make this transaction
+				 * synchronous.
+				 */
+				xfs_trans_set_sync(args->tp);
+			}
+			args->len = 1;
+			args->agbno = fbno;
+			XFS_WANT_CORRUPTED_GOTO(
+				args->agbno + args->len <=
+				INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+					ARCH_CONVERT),
+				error0);
+			args->wasfromfl = 1;
+			TRACE_ALLOC("freelist", args);
+			*stat = 0;
+			return 0;
+		}
+		/*
+		 * Nothing in the freelist.
+		 */
+		else
+			flen = 0;
+	}
+	/*
+	 * Can't allocate from the freelist for some reason.
+	 */
+	else
+		flen = 0;
+	/*
+	 * Can't do the allocation, give up.
+	 */
+	if (flen < args->minlen) {
+		args->agbno = NULLAGBLOCK;
+		TRACE_ALLOC("notenough", args);
+		flen = 0;
+	}
+	*fbnop = fbno;
+	*flenp = flen;
+	*stat = 1;
+	TRACE_ALLOC("normal", args);
+	return 0;
+
+error0:
+	TRACE_ALLOC("error", args);
+	return error;
+}
+
+/*
+ * Free the extent starting at agno/bno for length.
+ */
+STATIC int			/* error */
+xfs_free_ag_extent(
+	xfs_trans_t	*tp,	/* transaction pointer */
+	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
+	xfs_agnumber_t	agno,	/* allocation group number */
+	xfs_agblock_t	bno,	/* starting block number */
+	xfs_extlen_t	len,	/* length of extent */
+	int		isfl)	/* set if is freelist blocks - no sb acctg */
+{
+	xfs_btree_cur_t	*bno_cur;	/* cursor for by-block btree */
+	xfs_btree_cur_t	*cnt_cur;	/* cursor for by-size btree */
+	int		error;		/* error return value */
+#ifdef XFS_ALLOC_TRACE
+	static char	fname[] = "xfs_free_ag_extent";
+#endif
+	xfs_agblock_t	gtbno;		/* start of right neighbor block */
+	xfs_extlen_t	gtlen;		/* length of right neighbor block */
+	int		haveleft;	/* have a left neighbor block */
+	int		haveright;	/* have a right neighbor block */
+	int		i;		/* temp, result code */
+	xfs_agblock_t	ltbno;		/* start of left neighbor block */
+	xfs_extlen_t	ltlen;		/* length of left neighbor block */
+	xfs_mount_t	*mp;		/* mount point struct for filesystem */
+	xfs_agblock_t	nbno;		/* new starting block of freespace */
+	xfs_extlen_t	nlen;		/* new length of freespace */
+
+	mp = tp->t_mountp;
+	/* 
+	 * Allocate and initialize a cursor for the by-block btree.
+	 */
+	bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, 0,
+		0);
+	cnt_cur = NULL;
+	/* 
+	 * Look for a neighboring block on the left (lower block numbers)
+	 * that is contiguous with this space.
+	 */
+	if (error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))
+		goto error0;
+	if (haveleft) {
+		/*
+		 * There is a block to our left.
+		 */
+		if (error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * It's not contiguous, though.
+		 */
+		if (ltbno + ltlen < bno)
+			haveleft = 0;
+		else {
+			/*
+			 * If this failure happens the request to free this
+			 * space was invalid, it's (partly) already free.
+			 * Very bad.
+			 */
+			XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
+		}
+	}
+	/* 
+	 * Look for a neighboring block on the right (higher block numbers)
+	 * that is contiguous with this space.
+	 */
+	if (error = xfs_alloc_increment(bno_cur, 0, &haveright))
+		goto error0;
+	if (haveright) {
+		/*
+		 * There is a block to our right.
+		 */
+		if (error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * It's not contiguous, though.
+		 */
+		if (bno + len < gtbno)
+			haveright = 0;
+		else {
+			/*
+			 * If this failure happens the request to free this
+			 * space was invalid, it's (partly) already free.
+			 * Very bad.
+			 */
+			XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
+		}
+	}
+	/*
+	 * Now allocate and initialize a cursor for the by-size tree.
+	 */
+	cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, 0,
+		0);
+	/*
+	 * Have both left and right contiguous neighbors.
+	 * Merge all three into a single free block.
+	 */
+	if (haveleft && haveright) {
+		/*
+		 * Delete the old by-size entry on the left.
+		 */
+		if (error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		if (error = xfs_alloc_delete(cnt_cur, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * Delete the old by-size entry on the right.
+		 */
+		if (error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		if (error = xfs_alloc_delete(cnt_cur, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * Delete the old by-block entry for the right block.
+		 */
+		if (error = xfs_alloc_delete(bno_cur, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * Move the by-block cursor back to the left neighbor.
+		 */
+		if (error = xfs_alloc_decrement(bno_cur, 0, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+#ifdef DEBUG
+		/*
+		 * Check that this is the right record: delete didn't
+		 * mangle the cursor.
+		 */
+		{
+			xfs_agblock_t	xxbno;
+			xfs_extlen_t	xxlen;
+
+			if (error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
+					&i))
+				goto error0;
+			XFS_WANT_CORRUPTED_GOTO(
+				i == 1 && xxbno == ltbno && xxlen == ltlen,
+				error0);
+		}
+#endif
+		/*
+		 * Update remaining by-block entry to the new, joined block.
+		 */
+		nbno = ltbno;
+		nlen = len + ltlen + gtlen;
+		if (error = xfs_alloc_update(bno_cur, nbno, nlen))
+			goto error0;
+	}
+	/*
+	 * Have only a left contiguous neighbor.
+	 * Merge it together with the new freespace.
+	 */
+	else if (haveleft) {
+		/*
+		 * Delete the old by-size entry on the left.
+		 */
+		if (error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		if (error = xfs_alloc_delete(cnt_cur, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * Back up the by-block cursor to the left neighbor, and
+		 * update its length.
+		 */
+		if (error = xfs_alloc_decrement(bno_cur, 0, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		nbno = ltbno;
+		nlen = len + ltlen;
+		if (error = xfs_alloc_update(bno_cur, nbno, nlen))
+			goto error0;
+	}
+	/*
+	 * Have only a right contiguous neighbor.
+	 * Merge it together with the new freespace.
+	 */
+	else if (haveright) {
+		/*
+		 * Delete the old by-size entry on the right.
+		 */
+		if (error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		if (error = xfs_alloc_delete(cnt_cur, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * Update the starting block and length of the right 
+		 * neighbor in the by-block tree.
+		 */
+		nbno = bno;
+		nlen = len + gtlen;
+		if (error = xfs_alloc_update(bno_cur, nbno, nlen))
+			goto error0;
+	}
+	/*
+	 * No contiguous neighbors.
+	 * Insert the new freespace into the by-block tree.
+	 */
+	else {
+		nbno = bno;
+		nlen = len;
+		if (error = xfs_alloc_insert(bno_cur, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+	}
+	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+	bno_cur = NULL;
+	/*
+	 * In all cases we need to insert the new freespace in the by-size tree.
+	 */
+	if (error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))
+		goto error0;
+	XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
+	if (error = xfs_alloc_insert(cnt_cur, &i))
+		goto error0;
+	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+	cnt_cur = NULL;
+	/*
+	 * Update the freespace totals in the ag and superblock.
+	 */
+	{
+		xfs_agf_t	*agf;
+		xfs_perag_t	*pag;		/* per allocation group data */
+
+		agf = XFS_BUF_TO_AGF(agbp);
+		pag = &mp->m_perag[agno];
+		INT_MOD(agf->agf_freeblks, ARCH_CONVERT, len);
+		xfs_trans_agblocks_delta(tp, len);
+		pag->pagf_freeblks += len;
+		XFS_WANT_CORRUPTED_GOTO(
+			INT_GET(agf->agf_freeblks, ARCH_CONVERT)
+				<= INT_GET(agf->agf_length, ARCH_CONVERT),
+			error0);
+		TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
+		xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
+		if (!isfl)
+			xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
+		XFS_STATS_INC(xs_freex);
+		XFS_STATS_ADD(xs_freeb, len);
+	}
+	TRACE_FREE(haveleft ?
+			(haveright ? "both" : "left") :
+			(haveright ? "right" : "none"),
+		agno, bno, len, isfl);
+	return 0;
+
+ error0:
+	TRACE_FREE("error", agno, bno, len, isfl);
+	if (bno_cur)
+		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+	if (cnt_cur)
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/* 
+ * Visible (exported) allocation/free functions.
+ * Some of these are used just by xfs_alloc_btree.c and this file.
+ */
+
+/*
+ * Compute and fill in value of m_ag_maxlevels.
+ */
+void
+xfs_alloc_compute_maxlevels(
+	xfs_mount_t	*mp)	/* file system mount structure */
+{
+	int		level;
+	uint		maxblocks;
+	uint		maxleafents;
+	int		minleafrecs;
+	int		minnoderecs;
+
+	maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
+	minleafrecs = mp->m_alloc_mnr[0];
+	minnoderecs = mp->m_alloc_mnr[1];
+	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+	for (level = 1; maxblocks > 1; level++)
+		maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+	mp->m_ag_maxlevels = level;
+}
+
+/*
+ * Decide whether to use this allocation group for this allocation.
+ * If so, fix up the btree freelist's size.
+ * This is external so mkfs can call it, too.
+ */
+int				/* error */
+xfs_alloc_fix_freelist(
+	xfs_alloc_arg_t	*args,	/* allocation argument structure */
+	int		flags)	/* XFS_ALLOC_FLAG_... */
+{
+	xfs_buf_t	*agbp;	/* agf buffer pointer */
+	xfs_agf_t	*agf;	/* a.g. freespace structure pointer */
+	xfs_buf_t	*agflbp;/* agfl buffer pointer */
+	xfs_agblock_t	bno;	/* freelist block */
+	xfs_extlen_t	delta;	/* new blocks needed in freelist */
+	int		error;	/* error result code */
+	xfs_extlen_t	longest;/* longest extent in allocation group */
+	xfs_mount_t	*mp;	/* file system mount point structure */
+	xfs_extlen_t	need;	/* total blocks needed in freelist */
+	xfs_perag_t	*pag;	/* per-ag information structure */
+	xfs_alloc_arg_t	targs;	/* local allocation arguments */
+	xfs_trans_t	*tp;	/* transaction pointer */
+
+	mp = args->mp;
+
+	pag = args->pag;
+	tp = args->tp;
+	if (!pag->pagf_init) {
+		if (error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
+				&agbp))
+			return error;
+		if (!pag->pagf_init) {
+			args->agbp = NULL;
+			return 0;
+		}
+	} else
+		agbp = NULL;
+	need = XFS_MIN_FREELIST_PAG(pag, mp);
+	delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
+	/*
+	 * If it looks like there isn't a long enough extent, or enough
+	 * total blocks, reject it.
+	 */
+	longest = (pag->pagf_longest > delta) ?
+		(pag->pagf_longest - delta) :
+		(pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
+	    (args->minleft &&
+	     (int)(pag->pagf_freeblks + pag->pagf_flcount -
+		   need - args->total) <
+	     (int)args->minleft)) {
+		if (agbp)
+			xfs_trans_brelse(tp, agbp);
+		args->agbp = NULL;
+		return 0;
+	}
+	/*
+	 * Get the a.g. freespace buffer.
+	 * Can fail if we're not blocking on locks, and it's held.
+	 */
+	if (agbp == NULL) {
+		if (error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
+				&agbp))
+			return error;
+		if (agbp == NULL) {
+			args->agbp = NULL;
+			return 0;
+		}
+	}
+	/*
+	 * Figure out how many blocks we should have in the freelist.
+	 */
+	agf = XFS_BUF_TO_AGF(agbp);
+	need = XFS_MIN_FREELIST(agf, mp);
+	delta = need > INT_GET(agf->agf_flcount, ARCH_CONVERT) ?
+		(need - INT_GET(agf->agf_flcount, ARCH_CONVERT)) : 0;
+	/*
+	 * If there isn't enough total or single-extent, reject it.
+	 */
+	longest = INT_GET(agf->agf_longest, ARCH_CONVERT);
+	longest = (longest > delta) ? (longest - delta) :
+		(INT_GET(agf->agf_flcount, ARCH_CONVERT) > 0 || longest > 0);
+	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
+	     (args->minleft &&
+		(int)(INT_GET(agf->agf_freeblks, ARCH_CONVERT) +
+		   INT_GET(agf->agf_flcount, ARCH_CONVERT) - need - args->total) <
+	     (int)args->minleft)) {
+		xfs_trans_brelse(tp, agbp);
+		args->agbp = NULL;
+		return 0;
+	}
+	/*
+	 * Make the freelist shorter if it's too long.
+	 */
+	while (INT_GET(agf->agf_flcount, ARCH_CONVERT) > need) {
+		xfs_buf_t	*bp;
+
+		if (error = xfs_alloc_get_freelist(tp, agbp, &bno))
+			return error;
+		if (error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))
+			return error;
+		bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
+		xfs_trans_binval(tp, bp);
+		/*
+		 * Since blocks move to the free list without
+		 * the coordination used in xfs_bmap_finish,
+		 * we can't allow block to be available for reallocation
+		 * and non-transaction writing (user data)
+		 * until we know that the transaction
+		 * that moved it to the free list is
+		 * permanently on disk.  The only way to
+		 * ensure that is to make this transaction
+		 * synchronous.  The one exception to this
+		 * is in the case of wsync-mounted filesystem
+		 * where we know that any block that made it
+		 * onto the freelist won't be seen again in
+		 * the file from which it came since the transactions
+		 * that free metadata blocks or shrink inodes in
+		 * wsync filesystems are all themselves synchronous.
+		 */
+		if (!(mp->m_flags & XFS_MOUNT_WSYNC))
+			xfs_trans_set_sync(tp);
+	}
+	/*
+	 * Initialize the args structure.
+	 */
+	targs.tp = tp;
+	targs.mp = mp;
+	targs.agbp = agbp;
+	targs.agno = args->agno;
+	targs.mod = targs.minleft = targs.wasdel = targs.userdata =
+		targs.minalignslop = 0;
+	targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
+	targs.type = XFS_ALLOCTYPE_THIS_AG;
+	targs.pag = pag;
+	if (error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))
+		return error;
+	/*
+	 * Make the freelist longer if it's too short.
+	 */
+	while (INT_GET(agf->agf_flcount, ARCH_CONVERT) < need) {
+		targs.agbno = 0;
+		targs.maxlen = need - INT_GET(agf->agf_flcount, ARCH_CONVERT);
+		/*
+		 * Allocate as many blocks as possible at once.
+		 */
+		if (error = xfs_alloc_ag_vextent(&targs))
+			return error;
+		/*
+		 * Stop if we run out.  Won't happen if callers are obeying
+		 * the restrictions correctly.  Can happen for free calls
+		 * on a completely full ag.
+		 */
+		if (targs.agbno == NULLAGBLOCK)
+			break;
+		/*
+		 * Put each allocated block on the list.
+		 */
+		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
+			if (error = xfs_alloc_put_freelist(tp, agbp, agflbp,
+					bno))
+				return error;
+		}
+	}
+	args->agbp = agbp;
+	return 0;
+}
+
+/*
+ * Get a block from the freelist.
+ * Returns with the buffer for the block gotten.
+ */
+int				/* error */
+xfs_alloc_get_freelist(
+	xfs_trans_t	*tp,	/* transaction pointer */
+	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
+	xfs_agblock_t	*bnop)	/* block address retrieved from freelist */
+{
+	xfs_agf_t	*agf;	/* a.g. freespace structure */
+	xfs_agfl_t	*agfl;	/* a.g. freelist structure */
+	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
+	xfs_agblock_t	bno;	/* block number returned */
+	int		error;
+#ifdef XFS_ALLOC_TRACE
+	static char	fname[] = "xfs_alloc_get_freelist";
+#endif
+	xfs_mount_t	*mp;	/* mount structure */
+	xfs_perag_t	*pag;	/* per allocation group data */
+
+	agf = XFS_BUF_TO_AGF(agbp);
+	/*
+	 * Freelist is empty, give up.
+	 */
+	if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0) {
+		*bnop = NULLAGBLOCK;
+		return 0;
+	}
+	/*
+	 * Read the array of free blocks.
+	 */
+	mp = tp->t_mountp;
+	if (error = xfs_alloc_read_agfl(mp, tp,
+			INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp))
+		return error;
+	agfl = XFS_BUF_TO_AGFL(agflbp);
+	/*
+	 * Get the block number and update the data structures.
+	 */
+	bno = INT_GET(agfl->agfl_bno[INT_GET(agf->agf_flfirst, ARCH_CONVERT)], ARCH_CONVERT);
+	INT_MOD(agf->agf_flfirst, ARCH_CONVERT, 1);
+	xfs_trans_brelse(tp, agflbp);
+	if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) == XFS_AGFL_SIZE)
+		INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
+	pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)];
+	INT_MOD(agf->agf_flcount, ARCH_CONVERT, -1);
+	xfs_trans_agflist_delta(tp, -1);
+	pag->pagf_flcount--;
+	TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
+	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
+	*bnop = bno;
+	return 0;
+}
+
+/*
+ * Log the given fields from the agf structure.
+ */
+void
+xfs_alloc_log_agf(
+	xfs_trans_t	*tp,	/* transaction pointer */
+	xfs_buf_t	*bp,	/* buffer for a.g. freelist header */
+	int		fields)	/* mask of fields to be logged (XFS_AGF_...) */
+{
+	int	first;		/* first byte offset */
+	int	last;		/* last byte offset */
+	static const short	offsets[] = {
+		offsetof(xfs_agf_t, agf_magicnum),
+		offsetof(xfs_agf_t, agf_versionnum),
+		offsetof(xfs_agf_t, agf_seqno),
+		offsetof(xfs_agf_t, agf_length),
+		offsetof(xfs_agf_t, agf_roots[0]),
+		offsetof(xfs_agf_t, agf_levels[0]),
+		offsetof(xfs_agf_t, agf_flfirst),
+		offsetof(xfs_agf_t, agf_fllast),
+		offsetof(xfs_agf_t, agf_flcount),
+		offsetof(xfs_agf_t, agf_freeblks),
+		offsetof(xfs_agf_t, agf_longest),
+		sizeof(xfs_agf_t)
+	};
+
+	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
+	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
+}
+
+/*
+ * Interface for inode allocation to force the pag data to be initialized.
+ */
+int					/* error */
+xfs_alloc_pagf_init(
+	xfs_mount_t		*mp,	/* file system mount structure */
+	xfs_trans_t		*tp,	/* transaction pointer */
+	xfs_agnumber_t		agno,	/* allocation group number */
+	int			flags)	/* XFS_ALLOC_FLAGS_... */
+{
+	xfs_buf_t			*bp;
+	int			error;
+
+	if (error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))
+		return error;
+	if (bp)
+		xfs_trans_brelse(tp, bp);
+	return 0;
+}
+
+/*
+ * Put the block on the freelist for the allocation group.
+ */
+int					/* error */
+xfs_alloc_put_freelist(
+	xfs_trans_t		*tp,	/* transaction pointer */
+	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
+	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
+	xfs_agblock_t		bno)	/* block being freed */
+{
+	xfs_agf_t		*agf;	/* a.g. freespace structure */
+	xfs_agfl_t		*agfl;	/* a.g. free block array */
+	xfs_agblock_t		*blockp;/* pointer to array entry */
+	int			error;
+#ifdef XFS_ALLOC_TRACE
+	static char		fname[] = "xfs_alloc_put_freelist";
+#endif
+	xfs_mount_t		*mp;	/* mount structure */
+	xfs_perag_t		*pag;	/* per allocation group data */
+
+	agf = XFS_BUF_TO_AGF(agbp);
+	mp = tp->t_mountp;
+
+	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
+			INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp)))
+		return error;
+	agfl = XFS_BUF_TO_AGFL(agflbp);
+        INT_MOD(agf->agf_fllast, ARCH_CONVERT, 1);
+        if (INT_GET(agf->agf_fllast, ARCH_CONVERT) == XFS_AGFL_SIZE)
+		INT_ZERO(agf->agf_fllast, ARCH_CONVERT);
+	pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)];
+	INT_MOD(agf->agf_flcount, ARCH_CONVERT, 1);
+	xfs_trans_agflist_delta(tp, 1);
+	pag->pagf_flcount++;
+	ASSERT(INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE);
+	blockp = &agfl->agfl_bno[INT_GET(agf->agf_fllast, ARCH_CONVERT)];
+	INT_SET(*blockp, ARCH_CONVERT, bno);
+	TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+	xfs_trans_log_buf(tp, agflbp,
+		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
+		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
+			sizeof(xfs_agblock_t) - 1));
+	return 0;
+}
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int					/* error */
+xfs_alloc_read_agf(
+	xfs_mount_t	*mp,		/* mount point structure */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_agnumber_t	agno,		/* allocation group number */
+	int		flags,		/* XFS_ALLOC_FLAG_... */
+	xfs_buf_t	**bpp)		/* buffer for the ag freelist header */
+{
+	xfs_agf_t	*agf;		/* ag freelist header */
+	int		agf_ok;		/* set if agf is consistent */
+	xfs_buf_t	*bp;		/* return value */
+	xfs_daddr_t	d;		/* disk block address */
+	int		error;
+	xfs_perag_t	*pag;		/* per allocation group data */
+
+	ASSERT(agno != NULLAGNUMBER);
+	d = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR);
+	if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1,
+			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
+			&bp))
+		return error;
+	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+	if (!bp) {
+		*bpp = NULL;
+		return 0;
+	}
+	/*
+	 * Validate the magic number of the agf block.
+	 */
+	agf = XFS_BUF_TO_AGF(bp);
+	agf_ok =
+		INT_GET(agf->agf_magicnum, ARCH_CONVERT) == XFS_AGF_MAGIC &&
+		XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT)) &&
+		INT_GET(agf->agf_freeblks, ARCH_CONVERT) <=
+				INT_GET(agf->agf_length, ARCH_CONVERT) &&
+		INT_GET(agf->agf_flfirst, ARCH_CONVERT) < XFS_AGFL_SIZE &&
+		INT_GET(agf->agf_fllast,  ARCH_CONVERT) < XFS_AGFL_SIZE &&
+		INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE;
+	if (XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
+			XFS_RANDOM_ALLOC_READ_AGF)) {
+		xfs_trans_brelse(tp, bp);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	pag = &mp->m_perag[agno];
+	if (!pag->pagf_init) {
+		pag->pagf_freeblks = INT_GET(agf->agf_freeblks, ARCH_CONVERT);
+		pag->pagf_flcount = INT_GET(agf->agf_flcount, ARCH_CONVERT);
+		pag->pagf_longest = INT_GET(agf->agf_longest, ARCH_CONVERT);
+		pag->pagf_levels[XFS_BTNUM_BNOi] =
+			INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT);
+		pag->pagf_levels[XFS_BTNUM_CNTi] =
+			INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT);
+		pag->pagf_init = 1;
+	}
+#ifdef DEBUG
+	else if (!XFS_FORCED_SHUTDOWN(mp)) {
+		ASSERT(pag->pagf_freeblks == INT_GET(agf->agf_freeblks, ARCH_CONVERT));
+		ASSERT(pag->pagf_flcount == INT_GET(agf->agf_flcount, ARCH_CONVERT));
+		ASSERT(pag->pagf_longest == INT_GET(agf->agf_longest, ARCH_CONVERT));
+		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
+		       INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT));
+		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
+		       INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT));
+	}
+#endif
+	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Allocate an extent (variable-size).
+ * Depending on the allocation type, we either look in a single allocation
+ * group or loop over the allocation groups to find the result.
+ */
+int				/* error */
+xfs_alloc_vextent(
+	xfs_alloc_arg_t	*args)	/* allocation argument structure */
+{
+	xfs_agblock_t	agsize;	/* allocation group size */
+	int		error;
+	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
+#ifdef XFS_ALLOC_TRACE
+	static char	fname[] = "xfs_alloc_vextent";
+#endif
+	xfs_extlen_t	minleft;/* minimum left value, temp copy */
+	xfs_mount_t	*mp;	/* mount structure pointer */
+	xfs_agnumber_t	sagno;	/* starting allocation group number */
+	xfs_alloctype_t	type;	/* input allocation type */
+
+	mp = args->mp;
+	type = args->otype = args->type;
+	args->agbno = NULLAGBLOCK;
+	/*
+	 * Just fix this up, for the case where the last a.g. is shorter
+	 * (or there's only one a.g.) and the caller couldn't easily figure
+	 * that out (xfs_bmap_alloc).
+	 */
+	agsize = mp->m_sb.sb_agblocks;
+	if (args->maxlen > agsize)
+		args->maxlen = agsize;
+	if (args->alignment == 0)
+		args->alignment = 1;
+	ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
+	ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
+	ASSERT(args->minlen <= args->maxlen);
+	ASSERT(args->minlen <= agsize);
+	ASSERT(args->mod < args->prod);
+	if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
+	    XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
+	    args->minlen > args->maxlen || args->minlen > agsize ||
+	    args->mod >= args->prod) {
+		args->fsbno = NULLFSBLOCK;
+		TRACE_ALLOC("badargs", args);
+		return 0;
+	}
+	switch (type) {
+	case XFS_ALLOCTYPE_THIS_AG:
+	case XFS_ALLOCTYPE_NEAR_BNO:
+	case XFS_ALLOCTYPE_THIS_BNO:
+		/*
+		 * These three force us into a single a.g.
+		 */
+		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+		mrlock(&mp->m_peraglock, MR_ACCESS, PINOD);
+		args->pag = &mp->m_perag[args->agno];
+		minleft = args->minleft;
+		args->minleft = 0;
+		error = xfs_alloc_fix_freelist(args, 0);
+		args->minleft = minleft;
+		if (error) {
+			TRACE_ALLOC("nofix", args);
+			goto error0;
+		}
+		if (!args->agbp) {
+			mrunlock(&mp->m_peraglock);
+			TRACE_ALLOC("noagbp", args);
+			break;
+		}
+		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+		if (error = xfs_alloc_ag_vextent(args))
+			goto error0;
+		mrunlock(&mp->m_peraglock);
+		break;
+	case XFS_ALLOCTYPE_START_BNO:
+		/*
+		 * Try near allocation first, then anywhere-in-ag after
+		 * the first a.g. fails.
+		 */
+		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+		args->type = XFS_ALLOCTYPE_NEAR_BNO;
+		/* FALLTHROUGH */
+	case XFS_ALLOCTYPE_ANY_AG:
+	case XFS_ALLOCTYPE_START_AG:
+	case XFS_ALLOCTYPE_FIRST_AG:
+		/*
+		 * Rotate through the allocation groups looking for a winner.
+		 */
+		if (type == XFS_ALLOCTYPE_ANY_AG) {
+			/*
+			 * Start with the last place we left off.
+			 */
+			args->agno = sagno = mp->m_agfrotor;
+			args->type = XFS_ALLOCTYPE_THIS_AG;
+			flags = XFS_ALLOC_FLAG_TRYLOCK;
+		} else if (type == XFS_ALLOCTYPE_FIRST_AG) {
+			/*
+			 * Start with allocation group given by bno.
+			 */
+			args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+			args->type = XFS_ALLOCTYPE_THIS_AG;
+			sagno = 0;
+			flags = 0;
+		} else {
+			if (type == XFS_ALLOCTYPE_START_AG)
+				args->type = XFS_ALLOCTYPE_THIS_AG;
+			/*
+			 * Start with the given allocation group.
+			 */
+			args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+			flags = XFS_ALLOC_FLAG_TRYLOCK;
+		}
+		/*
+		 * Loop over allocation groups twice; first time with
+		 * trylock set, second time without.
+		 */
+		for (;;) {
+			mrlock(&mp->m_peraglock, MR_ACCESS, PINOD);
+			args->pag = &mp->m_perag[args->agno];
+			if (error = xfs_alloc_fix_freelist(args, flags)) {
+				TRACE_ALLOC("nofix", args);
+				goto error0;
+			}
+			/*
+			 * If we get a buffer back then the allocation will fly.
+			 */
+			if (args->agbp) {
+				if (error = xfs_alloc_ag_vextent(args))
+					goto error0;
+				mrunlock(&mp->m_peraglock);
+				break;
+			}
+			mrunlock(&mp->m_peraglock);
+			TRACE_ALLOC("loopfailed", args);
+			/*
+			 * Didn't work, figure out the next iteration.
+			 */
+			if (args->agno == sagno &&
+			    type == XFS_ALLOCTYPE_START_BNO)
+				args->type = XFS_ALLOCTYPE_THIS_AG;
+			if (++(args->agno) == mp->m_sb.sb_agcount)
+				args->agno = 0;
+			/* 
+			 * Reached the starting a.g., must either be done
+			 * or switch to non-trylock mode.
+			 */
+			if (args->agno == sagno) {
+				if (flags == 0) {
+					args->agbno = NULLAGBLOCK;
+					TRACE_ALLOC("allfailed", args);
+					break;
+				}
+				flags = 0;
+				if (type == XFS_ALLOCTYPE_START_BNO) {
+					args->agbno = XFS_FSB_TO_AGBNO(mp,
+						args->fsbno);
+					args->type = XFS_ALLOCTYPE_NEAR_BNO;
+				}
+			}
+		}
+		mp->m_agfrotor = (args->agno + 1) % mp->m_sb.sb_agcount;
+		break;
+	default:
+		ASSERT(0);
+		/* NOTREACHED */
+	}
+	if (args->agbno == NULLAGBLOCK)
+		args->fsbno = NULLFSBLOCK;
+	else {
+		args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
+#ifdef DEBUG
+		ASSERT(args->len >= args->minlen);
+		ASSERT(args->len <= args->maxlen);
+		ASSERT(args->agbno % args->alignment == 0);
+		XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
+			args->len);
+#endif
+	}
+	return 0;
+error0:
+	mrunlock(&mp->m_peraglock);
+	return error;
+}
+
+/*
+ * Free an extent.
+ * Just break up the extent address and hand off to xfs_free_ag_extent
+ * after fixing up the freelist.
+ */
+int				/* error */
+xfs_free_extent(
+	xfs_trans_t	*tp,	/* transaction pointer */
+	xfs_fsblock_t	bno,	/* starting block number of extent */
+	xfs_extlen_t	len)	/* length of extent */
+{
+#ifdef DEBUG
+	xfs_agf_t	*agf;	/* a.g. freespace header */
+#endif
+	xfs_alloc_arg_t	args;	/* allocation argument structure */
+	int		error;
+
+	ASSERT(len != 0);
+	args.tp = tp;
+	args.mp = tp->t_mountp;
+	args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
+	ASSERT(args.agno < args.mp->m_sb.sb_agcount);
+	args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
+	args.alignment = 1;
+	args.minlen = args.minleft = args.minalignslop = 0;
+	mrlock(&args.mp->m_peraglock, MR_ACCESS, PINOD);
+	args.pag = &args.mp->m_perag[args.agno];
+	if (error = xfs_alloc_fix_freelist(&args, 0))
+		goto error0;
+#ifdef DEBUG
+	ASSERT(args.agbp != NULL);
+	agf = XFS_BUF_TO_AGF(args.agbp);
+	ASSERT(args.agbno + len <= INT_GET(agf->agf_length, ARCH_CONVERT));
+#endif
+	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
+		len, 0);
+error0:
+	mrunlock(&args.mp->m_peraglock);
+	return error;
+}
diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c
new file mode 100644
index 000000000..c6d0e0fdd
--- /dev/null
+++ b/libxfs/xfs_alloc_btree.c
@@ -0,0 +1,2136 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Free space allocation for XFS.
+ */
+
+#include <xfs.h>
+
+/*
+ * Single level of the xfs_alloc_delete record deletion routine.
+ * Delete record pointed to by cur/level.
+ * Remove the record from its block then rebalance the tree.
+ * Return 0 for error, 1 for done, 2 to go on to the next level.
+ */
+STATIC int				/* error */
+xfs_alloc_delrec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level removing record from */
+	int			*stat)	/* fail/done/go-on */
+{
+	xfs_agf_t		*agf;	/* allocation group freelist header */
+	xfs_alloc_block_t	*block;	/* btree block record/key lives in */
+	xfs_agblock_t		bno;	/* btree block number */
+	xfs_buf_t		*bp;	/* buffer for block */
+	int			error;	/* error return value */
+	int			i;	/* loop index */
+	xfs_alloc_key_t		key;	/* kp points here if block is level 0 */
+	xfs_agblock_t		lbno;	/* left block's block number */
+	xfs_buf_t		*lbp;	/* left block's buffer pointer */
+	xfs_alloc_block_t	*left;	/* left btree block */
+	xfs_alloc_key_t		*lkp;	/* left block key pointer */
+	xfs_alloc_ptr_t		*lpp;	/* left block address pointer */
+	int			lrecs;	/* number of records in left block */
+	xfs_alloc_rec_t		*lrp;	/* left block record pointer */
+	xfs_mount_t		*mp;	/* mount structure */
+	int			ptr;	/* index in btree block for this rec */
+	xfs_agblock_t		rbno;	/* right block's block number */
+	xfs_buf_t		*rbp;	/* right block's buffer pointer */
+	xfs_alloc_block_t	*right;	/* right btree block */
+	xfs_alloc_key_t		*rkp;	/* right block key pointer */
+	xfs_alloc_ptr_t		*rpp;	/* right block address pointer */
+	int			rrecs;	/* number of records in right block */
+	xfs_alloc_rec_t		*rrp;	/* right block record pointer */
+	xfs_btree_cur_t		*tcur;	/* temporary btree cursor */
+
+	/*
+	 * Get the index of the entry being deleted, check for nothing there.
+	 */
+	ptr = cur->bc_ptrs[level];
+	if (ptr == 0) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Get the buffer & block containing the record or key/ptr.
+	 */
+	bp = cur->bc_bufs[level];
+	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, level, bp))
+		return error;
+#endif
+	/*
+	 * Fail if we're off the end of the block.
+	 */
+	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		*stat = 0;
+		return 0;
+	}
+	XFS_STATS_INC(xs_abt_delrec);
+	/*
+	 * It's a nonleaf.  Excise the key and ptr being deleted, by
+	 * sliding the entries past them down one.
+	 * Log the changed areas of the block.
+	 */
+	if (level > 0) {
+		lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
+		lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
+#ifdef DEBUG
+		for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))
+				return error;
+		}
+#endif
+		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+			ovbcopy(&lkp[ptr], &lkp[ptr - 1],
+				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lkp)); /* INT_: mem copy */
+			ovbcopy(&lpp[ptr], &lpp[ptr - 1],
+				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lpp)); /* INT_: mem copy */
+			xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+			xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+		}
+	}
+	/*
+	 * It's a leaf.  Excise the record being deleted, by sliding the
+	 * entries past it down one.  Log the changed areas of the block.
+	 */
+	else {
+		lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
+		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+			ovbcopy(&lrp[ptr], &lrp[ptr - 1],
+				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lrp));
+			xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+		}
+		/*
+		 * If it's the first record in the block, we'll need a key
+		 * structure to pass up to the next level (updkey).
+		 */
+		if (ptr == 1) {
+			key.ar_startblock = lrp->ar_startblock; /* INT_: direct copy */
+			key.ar_blockcount = lrp->ar_blockcount; /* INT_: direct copy */
+			lkp = &key;
+		}
+	}
+	/*
+	 * Decrement and log the number of entries in the block.
+	 */
+	INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1);
+	xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
+	/*
+	 * See if the longest free extent in the allocation group was
+	 * changed by this operation.  True if it's the by-size btree, and
+	 * this is the leaf level, and there is no right sibling block,
+	 * and this was the last record.
+	 */
+	agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+	mp = cur->bc_mp;
+
+	if (level == 0 &&
+	    cur->bc_btnum == XFS_BTNUM_CNT &&
+	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
+	    ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		ASSERT(ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT) + 1);
+		/*
+		 * There are still records in the block.  Grab the size
+		 * from the last one.
+		 */
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+			rrp = XFS_ALLOC_REC_ADDR(block, INT_GET(block->bb_numrecs, ARCH_CONVERT), cur);
+			INT_COPY(agf->agf_longest, rrp->ar_blockcount, ARCH_CONVERT);
+		}
+		/*
+		 * No free extents left.
+		 */
+		else
+			INT_ZERO(agf->agf_longest, ARCH_CONVERT);
+		mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest =
+			INT_GET(agf->agf_longest, ARCH_CONVERT);
+		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+			XFS_AGF_LONGEST);
+	}
+	/*
+	 * Is this the root level?  If so, we're almost done.
+	 */
+	if (level == cur->bc_nlevels - 1) {
+		/*
+		 * If this is the root level,
+		 * and there's only one entry left,
+		 * and it's NOT the leaf level,
+		 * then we can get rid of this level.
+		 */
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == 1 && level > 0) {
+			/*
+			 * lpp is still set to the first pointer in the block.
+			 * Make it the new root of the btree.
+			 */
+			bno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT);
+			INT_COPY(agf->agf_roots[cur->bc_btnum], *lpp, ARCH_CONVERT);
+			INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, -1);
+			mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_levels[cur->bc_btnum]--;
+			/*
+			 * Put this buffer/block on the ag's freelist.
+			 */
+			if (error = xfs_alloc_put_freelist(cur->bc_tp,
+					cur->bc_private.a.agbp, NULL, bno))
+				return error;
+			xfs_trans_agbtree_delta(cur->bc_tp, -1);
+			xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+				XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+			/*
+			 * Update the cursor so there's one fewer level.
+			 */
+			xfs_btree_setbuf(cur, level, 0);
+			cur->bc_nlevels--;
+		} else if (level > 0 &&
+			   (error = xfs_alloc_decrement(cur, level, &i)))
+			return error;
+		*stat = 1;
+		return 0;
+	}
+	/*
+	 * If we deleted the leftmost entry in the block, update the
+	 * key values above us in the tree.
+	 */
+	if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
+		return error;
+	/*
+	 * If the number of records remaining in the block is at least
+	 * the minimum, we're done.
+	 */
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
+		if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
+			return error;
+		*stat = 1;
+		return 0;
+	}
+	/*
+	 * Otherwise, we have to move some records around to keep the
+	 * tree balanced.  Look at the left and right sibling blocks to
+	 * see if we can re-balance by moving only one record.
+	 */
+	rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+	lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+	bno = NULLAGBLOCK;
+	ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
+	/*
+	 * Duplicate the cursor so our btree manipulations here won't
+	 * disrupt the next level up.
+	 */
+	if (error = xfs_btree_dup_cursor(cur, &tcur))
+		return error;
+	/*
+	 * If there's a right sibling, see if it's ok to shift an entry
+	 * out of it.
+	 */
+	if (rbno != NULLAGBLOCK) {
+		/*
+		 * Move the temp cursor to the last entry in the next block.
+		 * Actually any entry but the first would suffice.
+		 */
+		i = xfs_btree_lastrec(tcur, level);
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		if (error = xfs_alloc_increment(tcur, level, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		i = xfs_btree_lastrec(tcur, level);
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * Grab a pointer to the block.
+		 */
+		rbp = tcur->bc_bufs[level];
+		right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+			goto error0;
+#endif
+		/*
+		 * Grab the current block number, for future use.
+		 */
+		bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+		/*
+		 * If right block is full enough so that removing one entry
+		 * won't make it too empty, and left-shifting an entry out
+		 * of right to us works, we're done.
+		 */
+		if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
+		     XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
+			if (error = xfs_alloc_lshift(tcur, level, &i))
+				goto error0;
+			if (i) {
+				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				       XFS_ALLOC_BLOCK_MINRECS(level, cur));
+				xfs_btree_del_cursor(tcur,
+						     XFS_BTREE_NOERROR);
+				if (level > 0 &&
+				    (error = xfs_alloc_decrement(cur, level,
+					    &i)))
+					return error;
+				*stat = 1;
+				return 0;
+			}
+		}
+		/*
+		 * Otherwise, grab the number of records in right for
+		 * future reference, and fix up the temp cursor to point 
+		 * to our block again (last record).
+		 */
+		rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+		if (lbno != NULLAGBLOCK) {
+			i = xfs_btree_firstrec(tcur, level);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			if (error = xfs_alloc_decrement(tcur, level, &i))
+				goto error0;
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		}
+	}
+	/*
+	 * If there's a left sibling, see if it's ok to shift an entry
+	 * out of it.
+	 */
+	if (lbno != NULLAGBLOCK) {
+		/*
+		 * Move the temp cursor to the first entry in the
+		 * previous block.
+		 */
+		i = xfs_btree_firstrec(tcur, level);
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		if (error = xfs_alloc_decrement(tcur, level, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		xfs_btree_firstrec(tcur, level);
+		/*
+		 * Grab a pointer to the block.
+		 */
+		lbp = tcur->bc_bufs[level];
+		left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+			goto error0;
+#endif
+		/*
+		 * Grab the current block number, for future use.
+		 */
+		bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+		/*
+		 * If left block is full enough so that removing one entry
+		 * won't make it too empty, and right-shifting an entry out
+		 * of left to us works, we're done.
+		 */
+		if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
+		     XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
+			if (error = xfs_alloc_rshift(tcur, level, &i))
+				goto error0;
+			if (i) {
+				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				       XFS_ALLOC_BLOCK_MINRECS(level, cur));
+				xfs_btree_del_cursor(tcur,
+						     XFS_BTREE_NOERROR);
+				if (level == 0)
+					cur->bc_ptrs[0]++;
+				*stat = 1;
+				return 0;
+			}
+		}
+		/*
+		 * Otherwise, grab the number of records in right for
+		 * future reference.
+		 */
+		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+	}
+	/*
+	 * Delete the temp cursor, we're done with it.
+	 */
+	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+	/*
+	 * If here, we need to do a join to keep the tree balanced.
+	 */
+	ASSERT(bno != NULLAGBLOCK);
+	/*
+	 * See if we can join with the left neighbor block.
+	 */
+	if (lbno != NULLAGBLOCK &&
+	    lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+		/*
+		 * Set "right" to be the starting block,
+		 * "left" to be the left neighbor.
+		 */
+		rbno = bno;
+		right = block;
+		rbp = bp;
+		if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+				cur->bc_private.a.agno, lbno, 0, &lbp,
+				XFS_ALLOC_BTREE_REF))
+			return error;
+		left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+		if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+			return error;
+	}
+	/*
+	 * If that won't work, see if we can join with the right neighbor block.
+	 */
+	else if (rbno != NULLAGBLOCK &&
+		 rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+		  XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+		/*
+		 * Set "left" to be the starting block,
+		 * "right" to be the right neighbor.
+		 */
+		lbno = bno;
+		left = block;
+		lbp = bp;
+		if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+				cur->bc_private.a.agno, rbno, 0, &rbp,
+				XFS_ALLOC_BTREE_REF))
+			return error;
+		right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+		if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+			return error;
+	}
+	/*
+	 * Otherwise, we can't fix the imbalance.
+	 * Just return.  This is probably a logic error, but it's not fatal.
+	 */
+	else {
+		if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
+			return error;
+		*stat = 1;
+		return 0;
+	}
+	/*
+	 * We're now going to join "left" and "right" by moving all the stuff
+	 * in "right" to "left" and deleting "right".
+	 */
+	if (level > 0) {
+		/*
+		 * It's a non-leaf.  Move keys and pointers.
+		 */
+		lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
+		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))
+				return error;
+		}
+#endif
+		bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */
+		bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */
+		xfs_alloc_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		xfs_alloc_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	} else {
+		/*
+		 * It's a leaf.  Move records.
+		 */
+		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+		bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
+		xfs_alloc_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	}
+	/*
+	 * If we joined with the left neighbor, set the buffer in the
+	 * cursor to the left block, and fix up the index.
+	 */
+	if (bp != lbp) {
+		xfs_btree_setbuf(cur, level, lbp);
+		cur->bc_ptrs[level] += INT_GET(left->bb_numrecs, ARCH_CONVERT);
+	}
+	/*
+	 * If we joined with the right neighbor and there's a level above
+	 * us, increment the cursor at that level.
+	 */
+	else if (level + 1 < cur->bc_nlevels &&
+		 (error = xfs_alloc_increment(cur, level + 1, &i)))
+		return error;
+	/*
+	 * Fix up the number of records in the surviving block.
+	 */
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	/*
+	 * Fix up the right block pointer in the surviving block, and log it.
+	 */
+	left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */
+	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+	/*
+	 * If there is a right sibling now, make it point to the 
+	 * remaining block.
+	 */
+	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		xfs_alloc_block_t	*rrblock;
+		xfs_buf_t			*rrbp;
+
+		if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+				cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
+				&rrbp, XFS_ALLOC_BTREE_REF))
+			return error;
+		rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
+		if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))
+			return error;
+		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
+		xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
+	}
+	/*
+	 * Free the deleting block by putting it on the freelist.
+	 */
+	if (error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+			NULL, rbno))
+		return error;
+	xfs_trans_agbtree_delta(cur->bc_tp, -1);
+	/*
+	 * Adjust the current level's cursor so that we're left referring
+	 * to the right node, after we're done.
+	 * If this leaves the ptr value 0 our caller will fix it up.
+	 */
+	if (level > 0)
+		cur->bc_ptrs[level]--;
+	/* 
+	 * Return value means the next level up has something to do.
+	 */
+	*stat = 2;
+	return 0;
+
+error0:
+	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Insert one record/level.  Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int				/* error */
+xfs_alloc_insrec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level to insert record at */
+	xfs_agblock_t		*bnop,	/* i/o: block number inserted */
+	xfs_alloc_rec_t		*recp,	/* i/o: record data inserted */
+	xfs_btree_cur_t		**curp,	/* output: new cursor replacing cur */
+	int			*stat)	/* output: success/failure */
+{
+	xfs_agf_t		*agf;	/* allocation group freelist header */
+	xfs_alloc_block_t	*block;	/* btree block record/key lives in */
+	xfs_buf_t		*bp;	/* buffer for block */
+	int			error;	/* error return value */
+	int			i;	/* loop index */
+	xfs_alloc_key_t		key;	/* key value being inserted */
+	xfs_alloc_key_t		*kp;	/* pointer to btree keys */
+	xfs_agblock_t		nbno;	/* block number of allocated block */
+	xfs_btree_cur_t		*ncur;	/* new cursor to be used at next lvl */
+	xfs_alloc_key_t		nkey;	/* new key value, from split */
+	xfs_alloc_rec_t		nrec;	/* new record value, for caller */
+	int			optr;	/* old ptr value */
+	xfs_alloc_ptr_t		*pp;	/* pointer to btree addresses */
+	int			ptr;	/* index in btree block for this rec */
+	xfs_alloc_rec_t		*rp;	/* pointer to btree records */
+
+	ASSERT(INT_GET(recp->ar_blockcount, ARCH_CONVERT) > 0);
+	/*
+	 * If we made it to the root level, allocate a new root block
+	 * and we're done.
+	 */
+	if (level >= cur->bc_nlevels) {
+		XFS_STATS_INC(xs_abt_insrec);
+		if (error = xfs_alloc_newroot(cur, &i))
+			return error;
+		*bnop = NULLAGBLOCK;
+		*stat = i;
+		return 0;
+	}
+	/*
+	 * Make a key out of the record data to be inserted, and save it.
+	 */
+	key.ar_startblock = recp->ar_startblock; /* INT_: direct copy */
+	key.ar_blockcount = recp->ar_blockcount; /* INT_: direct copy */
+	optr = ptr = cur->bc_ptrs[level];
+	/*
+	 * If we're off the left edge, return failure.
+	 */
+	if (ptr == 0) {
+		*stat = 0;
+		return 0;
+	}
+	XFS_STATS_INC(xs_abt_insrec);
+	/*
+	 * Get pointers to the btree buffer and block.
+	 */
+	bp = cur->bc_bufs[level];
+	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, level, bp))
+		return error;
+	/* 
+	 * Check that the new entry is being inserted in the right place.
+	 */
+	if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		if (level == 0) {
+			rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
+			xfs_btree_check_rec(cur->bc_btnum, recp, rp);
+		} else {
+			kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
+			xfs_btree_check_key(cur->bc_btnum, &key, kp);
+		}
+	}
+#endif
+	nbno = NULLAGBLOCK;
+	ncur = (xfs_btree_cur_t *)0;
+	/*
+	 * If the block is full, we can't insert the new entry until we
+	 * make the block un-full.
+	 */
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+		/*
+		 * First, try shifting an entry to the right neighbor.
+		 */
+		if (error = xfs_alloc_rshift(cur, level, &i))
+			return error;
+		if (i) {
+			/* nothing */
+		}
+		/*
+		 * Next, try shifting an entry to the left neighbor.
+		 */
+		else {
+			if (error = xfs_alloc_lshift(cur, level, &i))
+				return error;
+			if (i)
+				optr = ptr = cur->bc_ptrs[level];
+			else {
+				/*
+				 * Next, try splitting the current block in
+				 * half. If this works we have to re-set our
+				 * variables because we could be in a
+				 * different block now.
+				 */
+				if (error = xfs_alloc_split(cur, level, &nbno,
+						&nkey, &ncur, &i))
+					return error;
+				if (i) {
+					bp = cur->bc_bufs[level];
+					block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+					if (error =
+						xfs_btree_check_sblock(cur,
+							block, level, bp))
+						return error;
+#endif
+					ptr = cur->bc_ptrs[level];
+					nrec.ar_startblock = nkey.ar_startblock; /* INT_: direct copy */
+					nrec.ar_blockcount = nkey.ar_blockcount; /* INT_: direct copy */
+				}
+				/*
+				 * Otherwise the insert fails.
+				 */
+				else {
+					*stat = 0;
+					return 0;
+				}
+			}
+		}
+	}
+	/*
+	 * At this point we know there's room for our new entry in the block
+	 * we're pointing at.
+	 */
+	if (level > 0) {
+		/*
+		 * It's a non-leaf entry.  Make a hole for the new data
+		 * in the key and ptr regions of the block.
+		 */
+		kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
+		pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
+#ifdef DEBUG
+		for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level))
+				return error;
+		}
+#endif
+		ovbcopy(&kp[ptr - 1], &kp[ptr],
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); /* INT_: copy */
+		ovbcopy(&pp[ptr - 1], &pp[ptr],
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); /* INT_: copy */
+#ifdef DEBUG
+		if (error = xfs_btree_check_sptr(cur, *bnop, level))
+			return error;
+#endif
+		/*
+		 * Now stuff the new data in, bump numrecs and log the new data.
+		 */
+		kp[ptr - 1] = key;
+		INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
+		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+		xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+		xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+#ifdef DEBUG
+		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT))
+			xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
+				kp + ptr);
+#endif
+	} else {
+		/*
+		 * It's a leaf entry.  Make a hole for the new record.
+		 */
+		rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
+		ovbcopy(&rp[ptr - 1], &rp[ptr],
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
+		/*
+		 * Now stuff the new record in, bump numrecs
+		 * and log the new data.
+		 */
+		rp[ptr - 1] = *recp; /* INT_: struct copy */
+		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+		xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+#ifdef DEBUG
+		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT))
+			xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
+				rp + ptr);
+#endif
+	}
+	/*
+	 * Log the new number of records in the btree header.
+	 */
+	xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
+	/*
+	 * If we inserted at the start of a block, update the parents' keys.
+	 */
+	if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
+		return error;
+	/*
+	 * Look to see if the longest extent in the allocation group
+	 * needs to be updated.
+	 */
+
+	agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+	if (level == 0 &&
+	    cur->bc_btnum == XFS_BTNUM_CNT &&
+	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
+	    INT_GET(recp->ar_blockcount, ARCH_CONVERT) > INT_GET(agf->agf_longest, ARCH_CONVERT)) {
+		/*
+		 * If this is a leaf in the by-size btree and there
+		 * is no right sibling block and this block is bigger
+		 * than the previous longest block, update it.
+		 */
+		INT_COPY(agf->agf_longest, recp->ar_blockcount, ARCH_CONVERT);
+		cur->bc_mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest
+			= INT_GET(recp->ar_blockcount, ARCH_CONVERT);
+		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+			XFS_AGF_LONGEST);
+	}
+	/*
+	 * Return the new block number, if any.
+	 * If there is one, give back a record value and a cursor too.
+	 */
+	*bnop = nbno;
+	if (nbno != NULLAGBLOCK) {
+		*recp = nrec; /* INT_: struct copy */
+		*curp = ncur; /* INT_: struct copy */
+	}
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Log header fields from a btree block.
+ */
+STATIC void
+xfs_alloc_log_block(
+	xfs_trans_t		*tp,	/* transaction pointer */
+	xfs_buf_t		*bp,	/* buffer containing btree block */
+	int			fields)	/* mask of fields: XFS_BB_... */
+{
+	int			first;	/* first byte offset logged */
+	int			last;	/* last byte offset logged */
+	static const short	offsets[] = {	/* table of offsets */
+		offsetof(xfs_alloc_block_t, bb_magic),
+		offsetof(xfs_alloc_block_t, bb_level),
+		offsetof(xfs_alloc_block_t, bb_numrecs),
+		offsetof(xfs_alloc_block_t, bb_leftsib),
+		offsetof(xfs_alloc_block_t, bb_rightsib),
+		sizeof(xfs_alloc_block_t)
+	};
+
+	xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
+	xfs_trans_log_buf(tp, bp, first, last);
+}
+
+/*
+ * Log keys from a btree block (nonleaf).
+ */
+STATIC void
+xfs_alloc_log_keys(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_buf_t			*bp,	/* buffer containing btree block */
+	int			kfirst,	/* index of first key to log */
+	int			klast)	/* index of last key to log */
+{
+	xfs_alloc_block_t	*block;	/* btree block to log from */
+	int			first;	/* first byte offset logged */
+	xfs_alloc_key_t		*kp;	/* key pointer in btree block */
+	int			last;	/* last byte offset logged */
+
+	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+	kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
+	first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
+	last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
+	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Log block pointer fields from a btree block (nonleaf).
+ */
+STATIC void
+xfs_alloc_log_ptrs(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_buf_t			*bp,	/* buffer containing btree block */
+	int			pfirst,	/* index of first pointer to log */
+	int			plast)	/* index of last pointer to log */
+{
+	xfs_alloc_block_t	*block;	/* btree block to log from */
+	int			first;	/* first byte offset logged */
+	int			last;	/* last byte offset logged */
+	xfs_alloc_ptr_t		*pp;	/* block-pointer pointer in btree blk */
+
+	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+	pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
+	first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
+	last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
+	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Log records from a btree block (leaf).
+ */
+STATIC void
+xfs_alloc_log_recs(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_buf_t		*bp,	/* buffer containing btree block */
+	int			rfirst,	/* index of first record to log */
+	int			rlast)	/* index of last record to log */
+{
+	xfs_alloc_block_t	*block;	/* btree block to log from */
+	int			first;	/* first byte offset logged */
+	int			last;	/* last byte offset logged */
+	xfs_alloc_rec_t		*rp;	/* record pointer for btree block */
+
+
+	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+	rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
+#ifdef DEBUG
+	{
+		xfs_agf_t	*agf;
+		xfs_alloc_rec_t	*p;
+
+		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+		for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
+			ASSERT(INT_GET(p->ar_startblock, ARCH_CONVERT) + INT_GET(p->ar_blockcount, ARCH_CONVERT) <=
+			       INT_GET(agf->agf_length, ARCH_CONVERT));
+	}
+#endif
+	first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
+	last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
+	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Lookup the record.  The cursor is made to point to it, based on dir.
+ * Return 0 if can't find any such record, 1 for success.
+ */
+STATIC int				/* error */
+xfs_alloc_lookup(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_lookup_t		dir,	/* <=, ==, or >= */
+	int			*stat)	/* success/failure */
+{
+	xfs_agblock_t		agbno;	/* a.g. relative btree block number */
+	xfs_agnumber_t		agno;	/* allocation group number */
+	xfs_alloc_block_t	*block;	/* current btree block */
+	int			diff;	/* difference for the current key */
+	int			error;	/* error return value */
+	int			keyno;	/* current key number */
+	int			level;	/* level in the btree */
+	xfs_mount_t		*mp;	/* file system mount point */
+
+	XFS_STATS_INC(xs_abt_lookup);
+	/*
+	 * Get the allocation group header, and the root block number.
+	 */
+	mp = cur->bc_mp;
+
+	{
+		xfs_agf_t	*agf;	/* a.g. freespace header */
+
+		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+		agno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+		agbno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT);
+	}
+	/*
+	 * Iterate over each level in the btree, starting at the root.
+	 * For each level above the leaves, find the key we need, based
+	 * on the lookup record, then follow the corresponding block
+	 * pointer down to the next level.
+	 */
+	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+		xfs_buf_t	*bp;		/* buffer pointer for btree block */
+		xfs_daddr_t	d;		/* disk address of btree block */
+
+		/*
+		 * Get the disk address we're looking for.
+		 */
+		d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+		/*
+		 * If the old buffer at this level is for a different block,
+		 * throw it away, otherwise just use it.
+		 */
+		bp = cur->bc_bufs[level];
+		if (bp && XFS_BUF_ADDR(bp) != d)
+			bp = (xfs_buf_t *)0;
+		if (!bp) {
+			/*
+			 * Need to get a new buffer.  Read it, then 
+			 * set it in the cursor, releasing the old one.
+			 */
+			if (error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
+					agbno, 0, &bp, XFS_ALLOC_BTREE_REF))
+				return error;
+			xfs_btree_setbuf(cur, level, bp);
+			/*
+			 * Point to the btree block, now that we have the buffer
+			 */
+			block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+			if (error = xfs_btree_check_sblock(cur, block, level,
+					bp))
+				return error;
+		} else
+			block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+		/*
+		 * If we already had a key match at a higher level, we know
+		 * we need to use the first entry in this block.
+		 */
+		if (diff == 0)
+			keyno = 1;
+		/*
+		 * Otherwise we need to search this block.  Do a binary search.
+		 */
+		else {
+			int		high;	/* high entry number */
+			xfs_alloc_key_t	*kkbase;/* base of keys in block */
+			xfs_alloc_rec_t	*krbase;/* base of records in block */
+			int		low;	/* low entry number */
+
+			/*
+			 * Get a pointer to keys or records.
+			 */
+			if (level > 0)
+				kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
+			else
+				krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
+			/*
+			 * Set low and high entry numbers, 1-based.
+			 */
+			low = 1;
+			if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+				/*
+				 * If the block is empty, the tree must
+				 * be an empty leaf.
+				 */
+				ASSERT(level == 0 && cur->bc_nlevels == 1);
+				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+				*stat = 0;
+				return 0;
+			}
+			/*
+			 * Binary search the block.
+			 */
+			while (low <= high) {
+				xfs_extlen_t	blockcount;	/* key value */
+				xfs_agblock_t	startblock;	/* key value */
+
+				XFS_STATS_INC(xs_abt_compare);
+				/*
+				 * keyno is average of low and high.
+				 */
+				keyno = (low + high) >> 1;
+				/*
+				 * Get startblock & blockcount.
+				 */
+				if (level > 0) {
+					xfs_alloc_key_t	*kkp;
+
+					kkp = kkbase + keyno - 1;
+					startblock = INT_GET(kkp->ar_startblock, ARCH_CONVERT);
+					blockcount = INT_GET(kkp->ar_blockcount, ARCH_CONVERT);
+				} else {
+					xfs_alloc_rec_t	*krp;
+
+					krp = krbase + keyno - 1;
+					startblock = INT_GET(krp->ar_startblock, ARCH_CONVERT);
+					blockcount = INT_GET(krp->ar_blockcount, ARCH_CONVERT);
+				}
+				/*
+				 * Compute difference to get next direction.
+				 */
+				if (cur->bc_btnum == XFS_BTNUM_BNO)
+					diff = (int)startblock -
+					       (int)cur->bc_rec.a.ar_startblock;
+				else if (!(diff = (int)blockcount -
+					    (int)cur->bc_rec.a.ar_blockcount))
+					diff = (int)startblock -
+					    (int)cur->bc_rec.a.ar_startblock;
+				/*
+				 * Less than, move right.
+				 */
+				if (diff < 0)
+					low = keyno + 1;
+				/*
+				 * Greater than, move left.
+				 */
+				else if (diff > 0)
+					high = keyno - 1;
+				/*
+				 * Equal, we're done.
+				 */
+				else
+					break;
+			}
+		}
+		/*
+		 * If there are more levels, set up for the next level
+		 * by getting the block number and filling in the cursor.
+		 */
+		if (level > 0) {
+			/*
+			 * If we moved left, need the previous key number,
+			 * unless there isn't one.
+			 */
+			if (diff > 0 && --keyno < 1)
+				keyno = 1;
+			agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
+#ifdef DEBUG
+			if (error = xfs_btree_check_sptr(cur, agbno, level))
+				return error;
+#endif
+			cur->bc_ptrs[level] = keyno;
+		}
+	}
+	/*
+	 * Done with the search.
+	 * See if we need to adjust the results.
+	 */
+	if (dir != XFS_LOOKUP_LE && diff < 0) {
+		keyno++;
+		/*
+		 * If ge search and we went off the end of the block, but it's
+		 * not the last block, we're in the wrong block.
+		 */
+		if (dir == XFS_LOOKUP_GE &&
+		    keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
+		    INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+			int	i;
+
+			cur->bc_ptrs[0] = keyno;
+			if (error = xfs_alloc_increment(cur, 0, &i))
+				return error;
+			XFS_WANT_CORRUPTED_RETURN(i == 1);
+			*stat = 1;
+			return 0;
+		}
+	}
+	else if (dir == XFS_LOOKUP_LE && diff > 0)
+		keyno--;
+	cur->bc_ptrs[0] = keyno;
+	/*
+	 * Return if we succeeded or not.
+	 */
+	if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
+		*stat = 0;
+	else
+		*stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
+	return 0;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int				/* error */
+xfs_alloc_lshift(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level to shift record on */
+	int			*stat)	/* success/failure */
+{
+	int			error;	/* error return value */
+#ifdef DEBUG
+	int			i;	/* loop index */
+#endif
+	xfs_alloc_key_t		key;	/* key value for leaf level upward */
+	xfs_buf_t			*lbp;	/* buffer for left neighbor block */
+	xfs_alloc_block_t	*left;	/* left neighbor btree block */
+	int			nrec;	/* new number of left block entries */
+	xfs_buf_t			*rbp;	/* buffer for right (current) block */
+	xfs_alloc_block_t	*right;	/* right (current) btree block */
+	xfs_alloc_key_t		*rkp;	/* key pointer for right block */
+	xfs_alloc_ptr_t		*rpp;	/* address pointer for right block */
+	xfs_alloc_rec_t		*rrp;	/* record pointer for right block */
+
+	/*
+	 * Set up variables for this block as "right".
+	 */
+	rbp = cur->bc_bufs[level];
+	right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+		return error;
+#endif
+	/*
+	 * If we've got no left sibling then we can't shift an entry left.
+	 */
+	if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * If the cursor entry is the one that would be moved, don't 
+	 * do it... it's too complicated.
+	 */
+	if (cur->bc_ptrs[level] <= 1) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Set up the left neighbor as "left".
+	 */
+	if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+			cur->bc_private.a.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
+			XFS_ALLOC_BTREE_REF))
+		return error;
+	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+	if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+		return error;
+	/*
+	 * If it's full, it can't take another entry.
+	 */
+	if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+		*stat = 0;
+		return 0;
+	}
+	nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+	/*
+	 * If non-leaf, copy a key and a ptr to the left block.
+	 */
+	if (level > 0) {
+		xfs_alloc_key_t	*lkp;	/* key pointer for left block */
+		xfs_alloc_ptr_t	*lpp;	/* address pointer for left block */
+
+		lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
+		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
+		*lkp = *rkp;
+		xfs_alloc_log_keys(cur, lbp, nrec, nrec);
+		lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
+		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+		if (error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level))
+			return error;
+#endif
+		*lpp = *rpp; /* INT_: copy */
+		xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
+		xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
+	}
+	/*
+	 * If leaf, copy a record to the left block.
+	 */
+	else {
+		xfs_alloc_rec_t	*lrp;	/* record pointer for left block */
+
+		lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
+		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+		*lrp = *rrp;
+		xfs_alloc_log_recs(cur, lbp, nrec, nrec);
+		xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
+	}
+	/*
+	 * Bump and log left's numrecs, decrement and log right's numrecs.
+	 */
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
+	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
+	INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
+	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
+	/*
+	 * Slide the contents of right down one entry.
+	 */
+	if (level > 0) {
+#ifdef DEBUG
+		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+					level))
+				return error;
+		}
+#endif
+		ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	} else {
+		ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
+		key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+		rkp = &key;
+	}
+	/*
+	 * Update the parent key values of right.
+	 */
+	if (error = xfs_alloc_updkey(cur, rkp, level + 1))
+		return error;
+	/*
+	 * Slide the cursor value left one.
+	 */
+	cur->bc_ptrs[level]--;
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Allocate a new root block, fill it in.
+ */
+STATIC int				/* error */
+xfs_alloc_newroot(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			*stat)	/* success/failure */
+{
+	int			error;	/* error return value */
+	xfs_agblock_t		lbno;	/* left block number */
+	xfs_buf_t			*lbp;	/* left btree buffer */
+	xfs_alloc_block_t	*left;	/* left btree block */
+	xfs_mount_t		*mp;	/* mount structure */
+	xfs_agblock_t		nbno;	/* new block number */
+	xfs_buf_t			*nbp;	/* new (root) buffer */
+	xfs_alloc_block_t	*new;	/* new (root) btree block */
+	int			nptr;	/* new value for key index, 1 or 2 */
+	xfs_agblock_t		rbno;	/* right block number */
+	xfs_buf_t			*rbp;	/* right btree buffer */
+	xfs_alloc_block_t	*right;	/* right btree block */
+
+	mp = cur->bc_mp;
+
+	ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
+	/*
+	 * Get a buffer from the freelist blocks, for the new root.
+	 */
+	if (error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+			&nbno))
+		return error;
+	/*
+	 * None available, we fail.
+	 */
+	if (nbno == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	xfs_trans_agbtree_delta(cur->bc_tp, 1);
+	nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
+		0);
+	new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
+	/*
+	 * Set the root data in the a.g. freespace structure.
+	 */
+	{
+		xfs_agf_t	*agf;	/* a.g. freespace header */
+		xfs_agnumber_t	seqno;
+
+		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+		INT_SET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT, nbno);
+		INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, 1);
+		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+		mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
+		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+			XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+	}
+	/*
+	 * At the previous root level there are now two blocks: the old
+	 * root, and the new block generated when it was split.
+	 * We don't know which one the cursor is pointing at, so we
+	 * set up variables "left" and "right" for each case.
+	 */
+	lbp = cur->bc_bufs[cur->bc_nlevels - 1];
+	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp))
+		return error;
+#endif
+	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		/*
+		 * Our block is left, pick up the right block.
+		 */
+		lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
+		rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+		if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+				cur->bc_private.a.agno, rbno, 0, &rbp,
+				XFS_ALLOC_BTREE_REF))
+			return error;
+		right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+		if (error = xfs_btree_check_sblock(cur, right,
+				cur->bc_nlevels - 1, rbp))
+			return error;
+		nptr = 1;
+	} else {
+		/*
+		 * Our block is right, pick up the left block.
+		 */
+		rbp = lbp;
+		right = left;
+		rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
+		lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+		if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+				cur->bc_private.a.agno, lbno, 0, &lbp,
+				XFS_ALLOC_BTREE_REF))
+			return error;
+		left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+		if (error = xfs_btree_check_sblock(cur, left,
+				cur->bc_nlevels - 1, lbp))
+			return error;
+		nptr = 2;
+	}
+	/*
+	 * Fill in the new block's btree header and log it.
+	 */
+	INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
+	INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
+	INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
+	INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+        INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+	xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
+	ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
+	/*
+	 * Fill in the key data in the new root.
+	 */
+	{
+		xfs_alloc_key_t		*kp;	/* btree key pointer */
+
+		kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
+		if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
+			kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */
+			kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */
+		} else {
+			xfs_alloc_rec_t	*rp;	/* btree record pointer */
+
+			rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
+			kp[0].ar_startblock = rp->ar_startblock; /* INT_: direct copy */
+			kp[0].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */
+			rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+			kp[1].ar_startblock = rp->ar_startblock; /* INT_: direct copy */
+			kp[1].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */
+		}
+	}
+	xfs_alloc_log_keys(cur, nbp, 1, 2);
+	/*
+	 * Fill in the pointer data in the new root.
+	 */
+	{
+		xfs_alloc_ptr_t		*pp;	/* btree address pointer */
+
+		pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
+		INT_SET(pp[0], ARCH_CONVERT, lbno);
+		INT_SET(pp[1], ARCH_CONVERT, rbno);
+	}
+	xfs_alloc_log_ptrs(cur, nbp, 1, 2);
+	/*
+	 * Fix up the cursor.
+	 */
+	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
+	cur->bc_ptrs[cur->bc_nlevels] = nptr;
+	cur->bc_nlevels++;
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int				/* error */
+xfs_alloc_rshift(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level to shift record on */
+	int			*stat)	/* success/failure */
+{
+	int			error;	/* error return value */
+	int			i;	/* loop index */
+	xfs_alloc_key_t		key;	/* key value for leaf level upward */
+	xfs_buf_t			*lbp;	/* buffer for left (current) block */
+	xfs_alloc_block_t	*left;	/* left (current) btree block */
+	xfs_buf_t			*rbp;	/* buffer for right neighbor block */
+	xfs_alloc_block_t	*right;	/* right neighbor btree block */
+	xfs_alloc_key_t		*rkp;	/* key pointer for right block */
+	xfs_btree_cur_t		*tcur;	/* temporary cursor */
+
+	/*
+	 * Set up variables for this block as "left".
+	 */
+	lbp = cur->bc_bufs[level];
+	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+		return error;
+#endif
+	/*
+	 * If we've got no right sibling then we can't shift an entry right.
+	 */
+	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * If the cursor entry is the one that would be moved, don't
+	 * do it... it's too complicated.
+	 */
+	if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Set up the right neighbor as "right".
+	 */
+	if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+			cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
+			XFS_ALLOC_BTREE_REF))
+		return error;
+	right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+	if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+		return error;
+	/*
+	 * If it's full, it can't take another entry.
+	 */
+	if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Make a hole at the start of the right neighbor block, then
+	 * copy the last left block entry to the hole.
+	 */
+	if (level > 0) {
+		xfs_alloc_key_t	*lkp;	/* key pointer for left block */
+		xfs_alloc_ptr_t	*lpp;	/* address pointer for left block */
+		xfs_alloc_ptr_t	*rpp;	/* address pointer for right block */
+
+		lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
+		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+		for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))
+				return error;
+		}
+#endif
+		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+#ifdef DEBUG
+		if (error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))
+			return error;
+#endif
+		*rkp = *lkp; /* INT_: copy */
+		*rpp = *lpp; /* INT_: copy */
+		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
+	} else {
+		xfs_alloc_rec_t	*lrp;	/* record pointer for left block */
+		xfs_alloc_rec_t	*rrp;	/* record pointer for right block */
+
+		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		*rrp = *lrp;
+		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
+		key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+		rkp = &key;
+		xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
+	}
+	/*
+	 * Decrement and log left's numrecs, bump and log right's numrecs.
+	 */
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
+	INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
+	/*
+	 * Using a temporary cursor, update the parent key values of the
+	 * block on the right.
+	 */
+	if (error = xfs_btree_dup_cursor(cur, &tcur))
+		return error;
+	i = xfs_btree_lastrec(tcur, level);
+	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+	if ((error = xfs_alloc_increment(tcur, level, &i)) ||
+	    (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
+		goto error0;
+	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+	*stat = 1;
+	return 0;
+error0:
+	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and its first record (to be inserted into parent).
+ */
+STATIC int				/* error */
+xfs_alloc_split(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level to split */
+	xfs_agblock_t		*bnop,	/* output: block number allocated */
+	xfs_alloc_key_t		*keyp,	/* output: first key of new block */
+	xfs_btree_cur_t		**curp,	/* output: new cursor */
+	int			*stat)	/* success/failure */
+{
+	int			error;	/* error return value */
+	int			i;	/* loop index/record number */
+	xfs_agblock_t		lbno;	/* left (current) block number */
+	xfs_buf_t			*lbp;	/* buffer for left block */
+	xfs_alloc_block_t	*left;	/* left (current) btree block */
+	xfs_agblock_t		rbno;	/* right (new) block number */
+	xfs_buf_t			*rbp;	/* buffer for right block */
+	xfs_alloc_block_t	*right;	/* right (new) btree block */
+
+	/*
+	 * Allocate the new block from the freelist.
+	 * If we can't do it, we're toast.  Give up.
+	 */
+	if (error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+			&rbno))
+		return error;
+	if (rbno == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	xfs_trans_agbtree_delta(cur->bc_tp, 1);
+	rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
+		rbno, 0);
+	/*
+	 * Set up the new block as "right".
+	 */
+	right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+	/*
+	 * "Left" is the current (according to the cursor) block.
+	 */
+	lbp = cur->bc_bufs[level];
+	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+		return error;
+#endif
+	/*
+	 * Fill in the btree header for the new block.
+	 */
+	INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
+	right->bb_level = left->bb_level; /* INT_: direct copy */
+	INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
+	/*
+	 * Make sure that if there's an odd number of entries now, that
+	 * each new block will have the same number of entries.
+	 */
+	if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
+	    cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
+		INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+	i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+	/*
+	 * For non-leaf blocks, copy keys and addresses over to the new block.
+	 */
+	if (level > 0) {
+		xfs_alloc_key_t	*lkp;	/* left btree key pointer */
+		xfs_alloc_ptr_t	*lpp;	/* left btree address pointer */
+		xfs_alloc_key_t	*rkp;	/* right btree key pointer */
+		xfs_alloc_ptr_t	*rpp;	/* right btree address pointer */
+
+		lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
+		lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
+		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
+		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))
+				return error;
+		}
+#endif
+		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */
+		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));/* INT_: copy */
+		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		*keyp = *rkp;
+	}
+	/*
+	 * For leaf blocks, copy records over to the new block.
+	 */
+	else {
+		xfs_alloc_rec_t	*lrp;	/* left btree record pointer */
+		xfs_alloc_rec_t	*rrp;	/* right btree record pointer */
+
+		lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
+		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		keyp->ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
+		keyp->ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+	}
+	/*
+	 * Find the left block number by looking in the buffer.
+	 * Adjust numrecs, sibling pointers.
+	 */
+	lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
+	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
+	INT_SET(left->bb_rightsib, ARCH_CONVERT, rbno);
+	INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
+	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+	/*
+	 * If there's a block to the new block's right, make that block
+	 * point back to right instead of to left.
+	 */
+	if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		xfs_alloc_block_t	*rrblock;	/* rr btree block */
+		xfs_buf_t			*rrbp;		/* buffer for rrblock */
+
+		if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+				cur->bc_private.a.agno, INT_GET(right->bb_rightsib, ARCH_CONVERT), 0,
+				&rrbp, XFS_ALLOC_BTREE_REF))
+			return error;
+		rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
+		if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))
+			return error;
+		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, rbno);
+		xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
+	}
+	/*
+	 * If the cursor is really in the right block, move it there.
+	 * If it's just pointing past the last entry in left, then we'll
+	 * insert there, so don't change anything in that case.
+	 */
+	if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+		xfs_btree_setbuf(cur, level, rbp);
+		cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+	}
+	/*
+	 * If there are more levels, we'll need another cursor which refers to
+	 * the right block, no matter where this cursor was.
+	 */
+	if (level + 1 < cur->bc_nlevels) {
+		if (error = xfs_btree_dup_cursor(cur, curp))
+			return error;
+		(*curp)->bc_ptrs[level + 1]++;
+	}
+	*bnop = rbno;
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Update keys at all levels from here to the root along the cursor's path.
+ */
+STATIC int				/* error */
+xfs_alloc_updkey(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_alloc_key_t		*keyp,	/* new key value to update to */
+	int			level)	/* starting level for update */
+{
+	int			ptr;	/* index of key in block */
+
+	/*
+	 * Go up the tree from this level toward the root.
+	 * At each level, update the key value to the value input.
+	 * Stop when we reach a level where the cursor isn't pointing
+	 * at the first entry in the block.
+	 */
+	for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+		xfs_alloc_block_t	*block;	/* btree block */
+		xfs_buf_t			*bp;	/* buffer for block */
+#ifdef DEBUG
+		int			error;	/* error return value */
+#endif
+		xfs_alloc_key_t		*kp;	/* ptr to btree block keys */
+
+		bp = cur->bc_bufs[level];
+		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_sblock(cur, block, level, bp))
+			return error;
+#endif
+		ptr = cur->bc_ptrs[level];
+		kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
+		*kp = *keyp;
+		xfs_alloc_log_keys(cur, bp, ptr, ptr);
+	}
+	return 0;
+}
+
+/*
+ * Externally visible routines.
+ */
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int					/* error */
+xfs_alloc_decrement(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level in btree, 0 is leaf */
+	int			*stat)	/* success/failure */
+{
+	xfs_alloc_block_t	*block;	/* btree block */
+	int			error;	/* error return value */
+	int			lev;	/* btree level */
+
+	ASSERT(level < cur->bc_nlevels);
+	/*
+	 * Read-ahead to the left at this level.
+	 */
+	xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+	/*
+	 * Decrement the ptr at this level.  If we're still in the block
+	 * then we're done.
+	 */
+	if (--cur->bc_ptrs[level] > 0) {
+		*stat = 1;
+		return 0;
+	}
+	/*
+	 * Get a pointer to the btree block.
+	 */
+	block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, level,
+			cur->bc_bufs[level]))
+		return error;
+#endif
+	/*
+	 * If we just went off the left edge of the tree, return failure.
+	 */
+	if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * March up the tree decrementing pointers.
+	 * Stop when we don't go off the left edge of a block.
+	 */
+	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+		if (--cur->bc_ptrs[lev] > 0)
+			break;
+		/*
+		 * Read-ahead the left block, we're going to read it 
+		 * in the next loop.
+		 */
+		xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+	}
+	/*
+	 * If we went off the root then we are seriously confused.
+	 */
+	ASSERT(lev < cur->bc_nlevels);
+	/*
+	 * Now walk back down the tree, fixing up the cursor's buffer
+	 * pointers and key numbers.
+	 */
+	for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
+		xfs_agblock_t	agbno;	/* block number of btree block */
+		xfs_buf_t		*bp;	/* buffer pointer for block */
+
+		agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+				cur->bc_private.a.agno, agbno, 0, &bp,
+				XFS_ALLOC_BTREE_REF))
+			return error;
+		lev--;
+		xfs_btree_setbuf(cur, lev, bp);
+		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+		if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+			return error;
+		cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	}
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int					/* error */
+xfs_alloc_delete(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	int		*stat)		/* success/failure */
+{
+	int		error;		/* error return value */
+	int		i;		/* result code */
+	int		level;		/* btree level */
+
+	/*
+	 * Go up the tree, starting at leaf level.
+	 * If 2 is returned then a join was done; go to the next level.
+	 * Otherwise we are done.
+	 */
+	for (level = 0, i = 2; i == 2; level++) {
+		if (error = xfs_alloc_delrec(cur, level, &i))
+			return error;
+	}
+	if (i == 0) {
+		for (level = 1; level < cur->bc_nlevels; level++) {
+			if (cur->bc_ptrs[level] == 0) {
+				if (error = xfs_alloc_decrement(cur, level, &i))
+					return error;
+				break;
+			}
+		}
+	}
+	*stat = i;
+	return 0;
+}
+
+/* 
+ * Get the data from the pointed-to record.
+ */
+int					/* error */
+xfs_alloc_get_rec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_agblock_t		*bno,	/* output: starting block of extent */
+	xfs_extlen_t		*len,	/* output: length of extent */
+	int			*stat)	/* output: success/failure */
+{
+	xfs_alloc_block_t	*block;	/* btree block */
+#ifdef DEBUG
+	int			error;	/* error return value */
+#endif
+	int			ptr;	/* record number */
+
+	ptr = cur->bc_ptrs[0];
+	block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))
+		return error;
+#endif
+	/*
+	 * Off the right end or left end, return failure.
+	 */
+	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Point to the record and extract its data.
+	 */
+	{
+		xfs_alloc_rec_t		*rec;	/* record data */
+
+		rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
+		*bno = INT_GET(rec->ar_startblock, ARCH_CONVERT);
+		*len = INT_GET(rec->ar_blockcount, ARCH_CONVERT);
+	}
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int					/* error */
+xfs_alloc_increment(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level in btree, 0 is leaf */
+	int			*stat)	/* success/failure */
+{
+	xfs_alloc_block_t	*block;	/* btree block */
+	xfs_buf_t			*bp;	/* tree block buffer */
+	int			error;	/* error return value */
+	int			lev;	/* btree level */
+
+	ASSERT(level < cur->bc_nlevels);
+	/*
+	 * Read-ahead to the right at this level.
+	 */
+	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+	/*
+	 * Get a pointer to the btree block.
+	 */
+	bp = cur->bc_bufs[level];
+	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, level, bp))
+		return error;
+#endif
+	/*
+	 * Increment the ptr at this level.  If we're still in the block
+	 * then we're done.
+	 */
+	if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		*stat = 1;
+		return 0;
+	}
+	/*
+	 * If we just went off the right edge of the tree, return failure.
+	 */
+	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * March up the tree incrementing pointers.
+	 * Stop when we don't go off the right edge of a block.
+	 */
+	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+		bp = cur->bc_bufs[lev];
+		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+			return error;
+#endif
+		if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+			break;
+		/*
+		 * Read-ahead the right block, we're going to read it 
+		 * in the next loop.
+		 */
+		xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+	}
+	/*
+	 * If we went off the root then we are seriously confused.
+	 */
+	ASSERT(lev < cur->bc_nlevels);
+	/*
+	 * Now walk back down the tree, fixing up the cursor's buffer
+	 * pointers and key numbers.
+	 */
+	for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+	     lev > level; ) {
+		xfs_agblock_t	agbno;	/* block number of btree block */
+
+		agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+				cur->bc_private.a.agno, agbno, 0, &bp,
+				XFS_ALLOC_BTREE_REF))
+			return error;
+		lev--;
+		xfs_btree_setbuf(cur, lev, bp);
+		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+		if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+			return error;
+		cur->bc_ptrs[lev] = 1;
+	}
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Insert the current record at the point referenced by cur.
+ * The cursor may be inconsistent on return if splits have been done.
+ */
+int					/* error */
+xfs_alloc_insert(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	int		*stat)		/* success/failure */
+{
+	int		error;		/* error return value */
+	int		i;		/* result value, 0 for failure */
+	int		level;		/* current level number in btree */
+	xfs_agblock_t	nbno;		/* new block number (split result) */
+	xfs_btree_cur_t	*ncur;		/* new cursor (split result) */
+	xfs_alloc_rec_t	nrec;		/* record being inserted this level */
+	xfs_btree_cur_t	*pcur;		/* previous level's cursor */
+
+	level = 0;
+	nbno = NULLAGBLOCK;
+	INT_SET(nrec.ar_startblock, ARCH_CONVERT, cur->bc_rec.a.ar_startblock);
+	INT_SET(nrec.ar_blockcount, ARCH_CONVERT, cur->bc_rec.a.ar_blockcount);
+	ncur = (xfs_btree_cur_t *)0;
+	pcur = cur;
+	/*
+	 * Loop going up the tree, starting at the leaf level.
+	 * Stop when we don't get a split block, that must mean that
+	 * the insert is finished with this level.
+	 */
+	do {
+		/*
+		 * Insert nrec/nbno into this level of the tree.
+		 * Note if we fail, nbno will be null.
+		 */
+		if (error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
+				&i)) {
+			if (pcur != cur)
+				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+			return error;
+		}
+		/*
+		 * See if the cursor we just used is trash.
+		 * Can't trash the caller's cursor, but otherwise we should
+		 * if ncur is a new cursor or we're about to be done.
+		 */
+		if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
+			cur->bc_nlevels = pcur->bc_nlevels;
+			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+		}
+		/*
+		 * If we got a new cursor, switch to it.
+		 */
+		if (ncur) {
+			pcur = ncur;
+			ncur = (xfs_btree_cur_t *)0;
+		}
+	} while (nbno != NULLAGBLOCK);
+	*stat = i;
+	return 0;
+}
+
+/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+int					/* error */
+xfs_alloc_lookup_eq(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	xfs_agblock_t	bno,		/* starting block of extent */
+	xfs_extlen_t	len,		/* length of extent */
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.a.ar_startblock = bno;
+	cur->bc_rec.a.ar_blockcount = len;
+	return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_alloc_lookup_ge(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	xfs_agblock_t	bno,		/* starting block of extent */
+	xfs_extlen_t	len,		/* length of extent */
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.a.ar_startblock = bno;
+	cur->bc_rec.a.ar_blockcount = len;
+	return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_alloc_lookup_le(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	xfs_agblock_t	bno,		/* starting block of extent */
+	xfs_extlen_t	len,		/* length of extent */
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.a.ar_startblock = bno;
+	cur->bc_rec.a.ar_blockcount = len;
+	return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur, to the value given by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+int					/* error */
+xfs_alloc_update(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	xfs_extlen_t		len)	/* length of extent */
+{
+	xfs_alloc_block_t	*block;	/* btree block to update */
+	int			error;	/* error return value */
+	int			ptr;	/* current record number (updating) */
+
+	ASSERT(len > 0);
+	/*
+	 * Pick up the a.g. freelist struct and the current block.
+	 */
+	block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))
+		return error;
+#endif
+	/*
+	 * Get the address of the rec to be updated.
+	 */
+	ptr = cur->bc_ptrs[0];
+	{
+		xfs_alloc_rec_t		*rp;	/* pointer to updated record */
+
+		rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
+		/*
+		 * Fill in the new contents and log them.
+		 */
+		INT_SET(rp->ar_startblock, ARCH_CONVERT, bno);
+		INT_SET(rp->ar_blockcount, ARCH_CONVERT, len);
+		xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
+	}
+	/*
+	 * If it's the by-size btree and it's the last leaf block and
+	 * it's the last record... then update the size of the longest
+	 * extent in the a.g., which we cache in the a.g. freelist header.
+	 */
+	if (cur->bc_btnum == XFS_BTNUM_CNT &&
+	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
+	    ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		xfs_agf_t	*agf;	/* a.g. freespace header */
+		xfs_agnumber_t	seqno;
+
+		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+		cur->bc_mp->m_perag[seqno].pagf_longest = len;
+		INT_SET(agf->agf_longest, ARCH_CONVERT, len);
+		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+			XFS_AGF_LONGEST);
+	}
+	/*
+	 * Updating first record in leaf. Pass new key value up to our parent.
+	 */
+	if (ptr == 1) {
+		xfs_alloc_key_t	key;	/* key containing [bno, len] */
+
+		INT_SET(key.ar_startblock, ARCH_CONVERT, bno);
+		INT_SET(key.ar_blockcount, ARCH_CONVERT, len);
+		if (error = xfs_alloc_updkey(cur, &key, 1))
+			return error;
+	}
+	return 0;
+}
diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c
new file mode 100644
index 000000000..f3b02e0b8
--- /dev/null
+++ b/libxfs/xfs_attr_leaf.c
@@ -0,0 +1,1169 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_attr_leaf.c
+ *
+ * Routines to implement leaf blocks of attributes as Btrees of hashed names.
+ */
+
+/*========================================================================
+ * Routines used for growing the Btree.
+ *========================================================================*/
+
+/*
+ * Create the initial contents of a leaf attribute list
+ * or a leaf in a node attribute list.
+ */
+int
+xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
+{
+	xfs_attr_leafblock_t *leaf;
+	xfs_attr_leaf_hdr_t *hdr;
+	xfs_inode_t *dp;
+	xfs_dabuf_t *bp;
+	int error;
+
+	dp = args->dp;
+	ASSERT(dp != NULL);
+	error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
+					    XFS_ATTR_FORK);
+	if (error)
+		return(error);
+	ASSERT(bp != NULL);
+	leaf = bp->data;
+	bzero((char *)leaf, XFS_LBSIZE(dp->i_mount));
+	hdr = &leaf->hdr;
+	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_ATTR_LEAF_MAGIC);
+	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
+	if (INT_GET(hdr->firstused, ARCH_CONVERT) == 0) {
+		INT_SET(hdr->firstused, ARCH_CONVERT,
+			XFS_LBSIZE(dp->i_mount) - XFS_ATTR_LEAF_NAME_ALIGN);
+	}
+
+	INT_SET(hdr->freemap[0].base, ARCH_CONVERT,
+						sizeof(xfs_attr_leaf_hdr_t));
+	INT_SET(hdr->freemap[0].size, ARCH_CONVERT,
+					  INT_GET(hdr->firstused, ARCH_CONVERT)
+					- INT_GET(hdr->freemap[0].base,
+								ARCH_CONVERT));
+
+	xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
+
+	*bpp = bp;
+	return(0);
+}
+
+/*
+ * Split the leaf node, rebalance, then add the new entry.
+ */
+int
+xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
+				   xfs_da_state_blk_t *newblk)
+{
+	xfs_dablk_t blkno;
+	int error;
+
+	/*
+	 * Allocate space for a new leaf node.
+	 */
+	ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
+	error = xfs_da_grow_inode(state->args, &blkno);
+	if (error)
+		return(error);
+	error = xfs_attr_leaf_create(state->args, blkno, &newblk->bp);
+	if (error)
+		return(error);
+	newblk->blkno = blkno;
+	newblk->magic = XFS_ATTR_LEAF_MAGIC;
+
+	/*
+	 * Rebalance the entries across the two leaves.
+	 * NOTE: rebalance() currently depends on the 2nd block being empty.
+	 */
+	xfs_attr_leaf_rebalance(state, oldblk, newblk);
+	error = xfs_da_blk_link(state, oldblk, newblk);
+	if (error)
+		return(error);
+
+	/*
+	 * Save info on "old" attribute for "atomic rename" ops, leaf_add()
+	 * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the
+	 * "new" attrs info.  Will need the "old" info to remove it later.
+	 *
+	 * Insert the "new" entry in the correct block.
+	 */
+	if (state->inleaf)
+		error = xfs_attr_leaf_add(oldblk->bp, state->args);
+	else
+		error = xfs_attr_leaf_add(newblk->bp, state->args);
+
+	/*
+	 * Update last hashval in each block since we added the name.
+	 */
+	oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
+	newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
+	return(error);
+}
+
+/*
+ * Add a name to the leaf attribute list structure.
+ */
+int
+xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
+{
+	xfs_attr_leafblock_t *leaf;
+	xfs_attr_leaf_hdr_t *hdr;
+	xfs_attr_leaf_map_t *map;
+	int tablesize, entsize, sum, tmp, i;
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT((args->index >= 0)
+		&& (args->index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
+	hdr = &leaf->hdr;
+	entsize = xfs_attr_leaf_newentsize(args,
+			   args->trans->t_mountp->m_sb.sb_blocksize, NULL);
+
+	/*
+	 * Search through freemap for first-fit on new name length.
+	 * (may need to figure in size of entry struct too)
+	 */
+	tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1)
+					* sizeof(xfs_attr_leaf_entry_t)
+					+ sizeof(xfs_attr_leaf_hdr_t);
+	map = &hdr->freemap[XFS_ATTR_LEAF_MAPSIZE-1];
+	for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
+		if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
+			sum += INT_GET(map->size, ARCH_CONVERT);
+			continue;
+		}
+		if (INT_GET(map->size, ARCH_CONVERT) == 0)
+			continue;	/* no space in this map */
+		tmp = entsize;
+		if (INT_GET(map->base, ARCH_CONVERT)
+				< INT_GET(hdr->firstused, ARCH_CONVERT))
+			tmp += sizeof(xfs_attr_leaf_entry_t);
+		if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
+			tmp = xfs_attr_leaf_add_work(bp, args, i);
+			return(tmp);
+		}
+		sum += INT_GET(map->size, ARCH_CONVERT);
+	}
+
+	/*
+	 * If there are no holes in the address space of the block,
+	 * and we don't have enough freespace, then compaction will do us
+	 * no good and we should just give up.
+	 */
+	if (!hdr->holes && (sum < entsize))
+		return(XFS_ERROR(ENOSPC));
+
+	/*
+	 * Compact the entries to coalesce free space.
+	 * This may change the hdr->count via dropping INCOMPLETE entries.
+	 */
+	xfs_attr_leaf_compact(args->trans, bp);
+
+	/*
+	 * After compaction, the block is guaranteed to have only one
+	 * free region, in freemap[0].  If it is not big enough, give up.
+	 */
+	if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT)
+				< (entsize + sizeof(xfs_attr_leaf_entry_t)))
+		return(XFS_ERROR(ENOSPC));
+
+	return(xfs_attr_leaf_add_work(bp, args, 0));
+}
+
+/*
+ * Add a name to a leaf attribute list structure.
+ */
+STATIC int
+xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
+{
+	xfs_attr_leafblock_t *leaf;
+	xfs_attr_leaf_hdr_t *hdr;
+	xfs_attr_leaf_entry_t *entry;
+	xfs_attr_leaf_name_local_t *name_loc;
+	xfs_attr_leaf_name_remote_t *name_rmt;
+	xfs_attr_leaf_map_t *map;
+	xfs_mount_t *mp;
+	int tmp, i;
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	hdr = &leaf->hdr;
+	ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
+	ASSERT((args->index >= 0)
+		&& (args->index <= INT_GET(hdr->count, ARCH_CONVERT)));
+
+	/*
+	 * Force open some space in the entry array and fill it in.
+	 */
+	entry = &leaf->entries[args->index];
+	if (args->index < INT_GET(hdr->count, ARCH_CONVERT)) {
+		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - args->index;
+		tmp *= sizeof(xfs_attr_leaf_entry_t);
+		ovbcopy((char *)entry, (char *)(entry+1), tmp);
+		xfs_da_log_buf(args->trans, bp,
+		    XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
+	}
+	INT_MOD(hdr->count, ARCH_CONVERT, 1);
+
+	/*
+	 * Allocate space for the new string (at the end of the run).
+	 */
+	map = &hdr->freemap[mapindex];
+	mp = args->trans->t_mountp;
+	ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+	ASSERT((INT_GET(map->base, ARCH_CONVERT) & 0x3) == 0);
+	ASSERT(INT_GET(map->size, ARCH_CONVERT)
+				>= xfs_attr_leaf_newentsize(args,
+					     mp->m_sb.sb_blocksize, NULL));
+	ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+	ASSERT((INT_GET(map->size, ARCH_CONVERT) & 0x3) == 0);
+	INT_MOD(map->size, ARCH_CONVERT,
+		-xfs_attr_leaf_newentsize(args, mp->m_sb.sb_blocksize, &tmp));
+	INT_SET(entry->nameidx, ARCH_CONVERT,
+					INT_GET(map->base, ARCH_CONVERT)
+				      + INT_GET(map->size, ARCH_CONVERT));
+	INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
+	entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
+	entry->flags |= (args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0;
+	if (args->rename) {
+		entry->flags |= XFS_ATTR_INCOMPLETE;
+		if ((args->blkno2 == args->blkno) &&
+		    (args->index2 <= args->index)) {
+			args->index2++;
+		}
+	}
+	xfs_da_log_buf(args->trans, bp,
+			  XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
+	ASSERT((args->index == 0) || (INT_GET(entry->hashval, ARCH_CONVERT)
+						>= INT_GET((entry-1)->hashval,
+							    ARCH_CONVERT)));
+	ASSERT((args->index == INT_GET(hdr->count, ARCH_CONVERT)-1) ||
+	       (INT_GET(entry->hashval, ARCH_CONVERT)
+			    <= (INT_GET((entry+1)->hashval, ARCH_CONVERT))));
+
+	/*
+	 * Copy the attribute name and value into the new space.
+	 *
+	 * For "remote" attribute values, simply note that we need to 
+	 * allocate space for the "remote" value.  We can't actually
+	 * allocate the extents in this transaction, and we can't decide
+	 * which blocks they should be as we might allocate more blocks
+	 * as part of this transaction (a split operation for example).
+	 */
+	if (entry->flags & XFS_ATTR_LOCAL) {
+		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
+		name_loc->namelen = args->namelen;
+		INT_SET(name_loc->valuelen, ARCH_CONVERT, args->valuelen);
+		bcopy(args->name, (char *)name_loc->nameval, args->namelen);
+		bcopy(args->value, (char *)&name_loc->nameval[args->namelen],
+				   INT_GET(name_loc->valuelen, ARCH_CONVERT));
+	} else {
+		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
+		name_rmt->namelen = args->namelen;
+		bcopy(args->name, (char *)name_rmt->name, args->namelen);
+		entry->flags |= XFS_ATTR_INCOMPLETE;
+		/* just in case */
+		INT_SET(name_rmt->valuelen, ARCH_CONVERT, 0);
+		INT_SET(name_rmt->valueblk, ARCH_CONVERT, 0);
+		args->rmtblkno = 1;
+		args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
+	}
+	xfs_da_log_buf(args->trans, bp,
+	     XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index),
+				   xfs_attr_leaf_entsize(leaf, args->index)));
+
+	/*
+	 * Update the control info for this leaf node
+	 */
+	if (INT_GET(entry->nameidx, ARCH_CONVERT)
+				< INT_GET(hdr->firstused, ARCH_CONVERT)) {
+		INT_SET(hdr->firstused, ARCH_CONVERT,
+					INT_GET(entry->nameidx, ARCH_CONVERT));
+	}
+	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT)
+				>= ((INT_GET(hdr->count, ARCH_CONVERT)
+					* sizeof(*entry))+sizeof(*hdr)));
+	tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1)
+					* sizeof(xfs_attr_leaf_entry_t)
+					+ sizeof(xfs_attr_leaf_hdr_t);
+	map = &hdr->freemap[0];
+	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) {
+		if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
+			INT_MOD(map->base, ARCH_CONVERT,
+					sizeof(xfs_attr_leaf_entry_t));
+			INT_MOD(map->size, ARCH_CONVERT,
+					-sizeof(xfs_attr_leaf_entry_t));
+		}
+	}
+	INT_MOD(hdr->usedbytes, ARCH_CONVERT,
+				xfs_attr_leaf_entsize(leaf, args->index));
+	xfs_da_log_buf(args->trans, bp,
+		XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
+	return(0);
+}
+
+/*
+ * Garbage collect a leaf attribute list block by copying it to a new buffer.
+ */
+STATIC void
+xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
+{
+	xfs_attr_leafblock_t *leaf_s, *leaf_d;
+	xfs_attr_leaf_hdr_t *hdr_s, *hdr_d;
+	xfs_mount_t *mp;
+	char *tmpbuffer;
+
+	mp = trans->t_mountp;
+	tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
+	ASSERT(tmpbuffer != NULL);
+	bcopy(bp->data, tmpbuffer, XFS_LBSIZE(mp));
+	bzero(bp->data, XFS_LBSIZE(mp));
+
+	/*
+	 * Copy basic information
+	 */
+	leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
+	leaf_d = bp->data;
+	hdr_s = &leaf_s->hdr;
+	hdr_d = &leaf_d->hdr;
+	hdr_d->info = hdr_s->info;	/* struct copy */
+	INT_SET(hdr_d->firstused, ARCH_CONVERT, XFS_LBSIZE(mp));
+	/* handle truncation gracefully */
+	if (INT_GET(hdr_d->firstused, ARCH_CONVERT) == 0) {
+		INT_SET(hdr_d->firstused, ARCH_CONVERT,
+				XFS_LBSIZE(mp) - XFS_ATTR_LEAF_NAME_ALIGN);
+	}
+	INT_SET(hdr_d->usedbytes, ARCH_CONVERT, 0);
+	INT_SET(hdr_d->count, ARCH_CONVERT, 0);
+	hdr_d->holes = 0;
+	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT,
+					sizeof(xfs_attr_leaf_hdr_t));
+	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT,
+				INT_GET(hdr_d->firstused, ARCH_CONVERT)
+			      - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+
+	/*
+	 * Copy all entry's in the same (sorted) order,
+	 * but allocate name/value pairs packed and in sequence.
+	 */
+	xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0,
+				(int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
+
+	xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
+
+	kmem_free(tmpbuffer, XFS_LBSIZE(mp));
+}
+
+/*
+ * Redistribute the attribute list entries between two leaf nodes,
+ * taking into account the size of the new entry.
+ *
+ * NOTE: if new block is empty, then it will get the upper half of the
+ * old block.  At present, all (one) callers pass in an empty second block.
+ *
+ * This code adjusts the args->index/blkno and args->index2/blkno2 fields
+ * to match what it is doing in splitting the attribute leaf block.  Those
+ * values are used in "atomic rename" operations on attributes.  Note that
+ * the "new" and "old" values can end up in different blocks.
+ */
+STATIC void
+xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
+				       xfs_da_state_blk_t *blk2)
+{
+	xfs_da_args_t *args;
+	xfs_da_state_blk_t *tmp_blk;
+	xfs_attr_leafblock_t *leaf1, *leaf2;
+	xfs_attr_leaf_hdr_t *hdr1, *hdr2;
+	int count, totallen, max, space, swap;
+
+	/*
+	 * Set up environment.
+	 */
+	ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
+	leaf1 = blk1->bp->data;
+	leaf2 = blk2->bp->data;
+	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	args = state->args;
+
+	/*
+	 * Check ordering of blocks, reverse if it makes things simpler.
+	 *
+	 * NOTE: Given that all (current) callers pass in an empty
+	 * second block, this code should never set "swap".
+	 */
+	swap = 0;
+	if (xfs_attr_leaf_order(blk1->bp, blk2->bp)) {
+		tmp_blk = blk1;
+		blk1 = blk2;
+		blk2 = tmp_blk;
+		leaf1 = blk1->bp->data;
+		leaf2 = blk2->bp->data;
+		swap = 1;
+	}
+	hdr1 = &leaf1->hdr;
+	hdr2 = &leaf2->hdr;
+
+	/*
+	 * Examine entries until we reduce the absolute difference in
+	 * byte usage between the two blocks to a minimum.  Then get
+	 * the direction to copy and the number of elements to move.
+	 *
+	 * "inleaf" is true if the new entry should be inserted into blk1.
+	 * If "swap" is also true, then reverse the sense of "inleaf".
+	 */
+	state->inleaf = xfs_attr_leaf_figure_balance(state, blk1, blk2,
+							    &count, &totallen);
+	if (swap)
+		state->inleaf = !state->inleaf;
+
+	/*
+	 * Move any entries required from leaf to leaf:
+	 */
+	if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
+		/*
+		 * Figure the total bytes to be added to the destination leaf.
+		 */
+		/* number entries being moved */
+		count = INT_GET(hdr1->count, ARCH_CONVERT) - count;
+		space  = INT_GET(hdr1->usedbytes, ARCH_CONVERT) - totallen;
+		space += count * sizeof(xfs_attr_leaf_entry_t);
+
+		/*
+		 * leaf2 is the destination, compact it if it looks tight.
+		 */
+		max  = INT_GET(hdr2->firstused, ARCH_CONVERT)
+						- sizeof(xfs_attr_leaf_hdr_t);
+		max -= INT_GET(hdr2->count, ARCH_CONVERT)
+					* sizeof(xfs_attr_leaf_entry_t);
+		if (space > max) {
+			xfs_attr_leaf_compact(args->trans, blk2->bp);
+		}
+
+		/*
+		 * Move high entries from leaf1 to low end of leaf2.
+		 */
+		xfs_attr_leaf_moveents(leaf1,
+				INT_GET(hdr1->count, ARCH_CONVERT)-count,
+				leaf2, 0, count, state->mp);
+
+		xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+		xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+	} else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
+		/*
+		 * I assert that since all callers pass in an empty
+		 * second buffer, this code should never execute.
+		 */
+
+		/*
+		 * Figure the total bytes to be added to the destination leaf.
+		 */
+		/* number entries being moved */
+		count -= INT_GET(hdr1->count, ARCH_CONVERT);
+		space  = totallen - INT_GET(hdr1->usedbytes, ARCH_CONVERT);
+		space += count * sizeof(xfs_attr_leaf_entry_t);
+
+		/*
+		 * leaf1 is the destination, compact it if it looks tight.
+		 */
+		max  = INT_GET(hdr1->firstused, ARCH_CONVERT)
+						- sizeof(xfs_attr_leaf_hdr_t);
+		max -= INT_GET(hdr1->count, ARCH_CONVERT)
+					* sizeof(xfs_attr_leaf_entry_t);
+		if (space > max) {
+			xfs_attr_leaf_compact(args->trans, blk1->bp);
+		}
+
+		/*
+		 * Move low entries from leaf2 to high end of leaf1.
+		 */
+		xfs_attr_leaf_moveents(leaf2, 0, leaf1,
+				(int)INT_GET(hdr1->count, ARCH_CONVERT), count,
+				state->mp);
+
+		xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+		xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+	}
+
+	/*
+	 * Copy out last hashval in each block for B-tree code.
+	 */
+	blk1->hashval =
+	    INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count,
+				    ARCH_CONVERT)-1].hashval, ARCH_CONVERT);
+	blk2->hashval =
+	    INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count,
+				    ARCH_CONVERT)-1].hashval, ARCH_CONVERT);
+
+	/*
+	 * Adjust the expected index for insertion.
+	 * NOTE: this code depends on the (current) situation that the
+	 * second block was originally empty.
+	 *
+	 * If the insertion point moved to the 2nd block, we must adjust
+	 * the index.  We must also track the entry just following the
+	 * new entry for use in an "atomic rename" operation, that entry
+	 * is always the "old" entry and the "new" entry is what we are
+	 * inserting.  The index/blkno fields refer to the "old" entry,
+	 * while the index2/blkno2 fields refer to the "new" entry.
+	 */
+	if (blk1->index > INT_GET(leaf1->hdr.count, ARCH_CONVERT)) {
+		ASSERT(state->inleaf == 0);
+		blk2->index = blk1->index
+				- INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+		args->index = args->index2 = blk2->index;
+		args->blkno = args->blkno2 = blk2->blkno;
+	} else if (blk1->index == INT_GET(leaf1->hdr.count, ARCH_CONVERT)) {
+		if (state->inleaf) {
+			args->index = blk1->index;
+			args->blkno = blk1->blkno;
+			args->index2 = 0;
+			args->blkno2 = blk2->blkno;
+		} else {
+			blk2->index = blk1->index
+				    - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+			args->index = args->index2 = blk2->index;
+			args->blkno = args->blkno2 = blk2->blkno;
+		}
+	} else {
+		ASSERT(state->inleaf == 1);
+		args->index = args->index2 = blk1->index;
+		args->blkno = args->blkno2 = blk1->blkno;
+	}
+}
+
+/*
+ * Examine entries until we reduce the absolute difference in
+ * byte usage between the two blocks to a minimum.
+ * GROT: Is this really necessary?  With other than a 512 byte blocksize,
+ * GROT: there will always be enough room in either block for a new entry.
+ * GROT: Do a double-split for this case?
+ */
+STATIC int
+xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
+				    xfs_da_state_blk_t *blk1,
+				    xfs_da_state_blk_t *blk2,
+				    int *countarg, int *usedbytesarg)
+{
+	xfs_attr_leafblock_t *leaf1, *leaf2;
+	xfs_attr_leaf_hdr_t *hdr1, *hdr2;
+	xfs_attr_leaf_entry_t *entry;
+	int count, max, index, totallen, half;
+	int lastdelta, foundit, tmp;
+
+	/*
+	 * Set up environment.
+	 */
+	leaf1 = blk1->bp->data;
+	leaf2 = blk2->bp->data;
+	hdr1 = &leaf1->hdr;
+	hdr2 = &leaf2->hdr;
+	foundit = 0;
+	totallen = 0;
+
+	/*
+	 * Examine entries until we reduce the absolute difference in
+	 * byte usage between the two blocks to a minimum.
+	 */
+	max = INT_GET(hdr1->count, ARCH_CONVERT)
+			+ INT_GET(hdr2->count, ARCH_CONVERT);
+	half  = (max+1) * sizeof(*entry);
+	half += INT_GET(hdr1->usedbytes, ARCH_CONVERT)
+				+ INT_GET(hdr2->usedbytes, ARCH_CONVERT)
+				+ xfs_attr_leaf_newentsize(state->args,
+						     state->blocksize, NULL);
+	half /= 2;
+	lastdelta = state->blocksize;
+	entry = &leaf1->entries[0];
+	for (count = index = 0; count < max; entry++, index++, count++) {
+
+#define XFS_ATTR_ABS(A)	(((A) < 0) ? -(A) : (A))
+		/*
+		 * The new entry is in the first block, account for it.
+		 */
+		if (count == blk1->index) {
+			tmp = totallen + sizeof(*entry) +
+				xfs_attr_leaf_newentsize(state->args,
+							 state->blocksize,
+							 NULL);
+			if (XFS_ATTR_ABS(half - tmp) > lastdelta)
+				break;
+			lastdelta = XFS_ATTR_ABS(half - tmp);
+			totallen = tmp;
+			foundit = 1;
+		}
+
+		/*
+		 * Wrap around into the second block if necessary.
+		 */
+		if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
+			leaf1 = leaf2;
+			entry = &leaf1->entries[0];
+			index = 0;
+		}
+
+		/*
+		 * Figure out if next leaf entry would be too much.
+		 */
+		tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1,
+									index);
+		if (XFS_ATTR_ABS(half - tmp) > lastdelta)
+			break;
+		lastdelta = XFS_ATTR_ABS(half - tmp);
+		totallen = tmp;
+#undef XFS_ATTR_ABS
+	}
+
+	/*
+	 * Calculate the number of usedbytes that will end up in lower block.
+	 * If new entry not in lower block, fix up the count.
+	 */
+	totallen -= count * sizeof(*entry);
+	if (foundit) {
+		totallen -= sizeof(*entry) + 
+				xfs_attr_leaf_newentsize(state->args,
+							 state->blocksize,
+							 NULL);
+	}
+
+	*countarg = count;
+	*usedbytesarg = totallen;
+	return(foundit);
+}
+
+/*========================================================================
+ * Routines used for shrinking the Btree.
+ *========================================================================*/
+
+/*
+ * Check a leaf block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ *
+ * GROT: allow for INCOMPLETE entries in calculation.
+ */
+int
+xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
+{
+	xfs_attr_leafblock_t *leaf;
+	xfs_da_state_blk_t *blk;
+	xfs_da_blkinfo_t *info;
+	int count, bytes, forward, error, retval, i;
+	xfs_dablk_t blkno;
+	xfs_dabuf_t *bp;
+
+	/*
+	 * Check for the degenerate case of the block being over 50% full.
+	 * If so, it's not worth even looking to see if we might be able
+	 * to coalesce with a sibling.
+	 */
+	blk = &state->path.blk[ state->path.active-1 ];
+	info = blk->bp->data;
+	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+	leaf = (xfs_attr_leafblock_t *)info;
+	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	bytes = sizeof(xfs_attr_leaf_hdr_t) +
+		count * sizeof(xfs_attr_leaf_entry_t) +
+		INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+	if (bytes > (state->blocksize >> 1)) {
+		*action = 0;	/* blk over 50%, dont try to join */
+		return(0);
+	}
+
+	/*
+	 * Check for the degenerate case of the block being empty.
+	 * If the block is empty, we'll simply delete it, no need to
+	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * to merge with the forward block unless it is NULL.
+	 */
+	if (count == 0) {
+		/*
+		 * Make altpath point to the block we want to keep and
+		 * path point to the block we want to drop (this one).
+		 */
+		forward = (INT_GET(info->forw, ARCH_CONVERT) != 0);
+		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		error = xfs_da_path_shift(state, &state->altpath, forward,
+						 0, &retval);
+		if (error)
+			return(error);
+		if (retval) {
+			*action = 0;
+		} else {
+			*action = 2;
+		}
+		return(0);
+	}
+
+	/*
+	 * Examine each sibling block to see if we can coalesce with
+	 * at least 25% free space to spare.  We need to figure out
+	 * whether to merge with the forward or the backward block.
+	 * We prefer coalescing with the lower numbered sibling so as
+	 * to shrink an attribute list over time.
+	 */
+	/* start with smaller blk num */
+	forward = (INT_GET(info->forw, ARCH_CONVERT)
+					< INT_GET(info->back, ARCH_CONVERT));
+	for (i = 0; i < 2; forward = !forward, i++) {
+		if (forward)
+			blkno = INT_GET(info->forw, ARCH_CONVERT);
+		else
+			blkno = INT_GET(info->back, ARCH_CONVERT);
+		if (blkno == 0)
+			continue;
+		error = xfs_da_read_buf(state->args->trans, state->args->dp,
+					blkno, -1, &bp, XFS_ATTR_FORK);
+		if (error)
+			return(error);
+		ASSERT(bp != NULL);
+
+		leaf = (xfs_attr_leafblock_t *)info;
+		count  = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		bytes  = state->blocksize - (state->blocksize>>2);
+		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+		leaf = bp->data;
+		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+		bytes -= count * sizeof(xfs_attr_leaf_entry_t);
+		bytes -= sizeof(xfs_attr_leaf_hdr_t);
+		xfs_da_brelse(state->args->trans, bp);
+		if (bytes >= 0)
+			break;	/* fits with at least 25% to spare */
+	}
+	if (i >= 2) {
+		*action = 0;
+		return(0);
+	}
+
+	/*
+	 * Make altpath point to the block we want to keep (the lower
+	 * numbered block) and path point to the block we want to drop.
+	 */
+	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	if (blkno < blk->blkno) {
+		error = xfs_da_path_shift(state, &state->altpath, forward,
+						 0, &retval);
+	} else {
+		error = xfs_da_path_shift(state, &state->path, forward,
+						 0, &retval);
+	}
+	if (error)
+		return(error);
+	if (retval) {
+		*action = 0;
+	} else {
+		*action = 1;
+	}
+	return(0);
+}
+
+/*
+ * Move all the attribute list entries from drop_leaf into save_leaf.
+ */
+void
+xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+				       xfs_da_state_blk_t *save_blk)
+{
+	xfs_attr_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf;
+	xfs_attr_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr;
+	xfs_mount_t *mp;
+	char *tmpbuffer;
+
+	/*
+	 * Set up environment.
+	 */
+	mp = state->mp;
+	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
+	drop_leaf = drop_blk->bp->data;
+	save_leaf = save_blk->bp->data;
+	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	drop_hdr = &drop_leaf->hdr;
+	save_hdr = &save_leaf->hdr;
+
+	/*
+	 * Save last hashval from dying block for later Btree fixup.
+	 */
+	drop_blk->hashval =
+		INT_GET(drop_leaf->entries[INT_GET(drop_leaf->hdr.count,
+						ARCH_CONVERT)-1].hashval,
+								ARCH_CONVERT);
+
+	/*
+	 * Check if we need a temp buffer, or can we do it in place.
+	 * Note that we don't check "leaf" for holes because we will
+	 * always be dropping it, toosmall() decided that for us already.
+	 */
+	if (save_hdr->holes == 0) {
+		/*
+		 * dest leaf has no holes, so we add there.  May need
+		 * to make some room in the entry array.
+		 */
+		if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) {
+			xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 0,
+			     (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+		} else {
+			xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf,
+				  INT_GET(save_hdr->count, ARCH_CONVERT),
+				  (int)INT_GET(drop_hdr->count, ARCH_CONVERT),
+				  mp);
+		}
+	} else {
+		/*
+		 * Destination has holes, so we make a temporary copy
+		 * of the leaf and add them both to that.
+		 */
+		tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
+		ASSERT(tmpbuffer != NULL);
+		bzero(tmpbuffer, state->blocksize);
+		tmp_leaf = (xfs_attr_leafblock_t *)tmpbuffer;
+		tmp_hdr = &tmp_leaf->hdr;
+		tmp_hdr->info = save_hdr->info;	/* struct copy */
+		INT_SET(tmp_hdr->count, ARCH_CONVERT, 0);
+		INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
+		if (INT_GET(tmp_hdr->firstused, ARCH_CONVERT) == 0) {
+			INT_SET(tmp_hdr->firstused, ARCH_CONVERT,
+				state->blocksize - XFS_ATTR_LEAF_NAME_ALIGN);
+		}
+		INT_SET(tmp_hdr->usedbytes, ARCH_CONVERT, 0);
+		if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) {
+			xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
+				(int)INT_GET(drop_hdr->count, ARCH_CONVERT),
+				mp);
+			xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf,
+				  INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
+				 (int)INT_GET(save_hdr->count, ARCH_CONVERT),
+				 mp);
+		} else {
+			xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 0,
+				(int)INT_GET(save_hdr->count, ARCH_CONVERT),
+				mp);
+			xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf,
+				INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
+				(int)INT_GET(drop_hdr->count, ARCH_CONVERT),
+				mp);
+		}
+		bcopy((char *)tmp_leaf, (char *)save_leaf, state->blocksize);
+		kmem_free(tmpbuffer, state->blocksize);
+	}
+
+	xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
+					   state->blocksize - 1);
+
+	/*
+	 * Copy out last hashval in each block for B-tree code.
+	 */
+	save_blk->hashval =
+		INT_GET(save_leaf->entries[INT_GET(save_leaf->hdr.count,
+						ARCH_CONVERT)-1].hashval,
+								ARCH_CONVERT);
+}
+
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Move the indicated entries from one leaf to another.
+ * NOTE: this routine modifies both source and destination leaves.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
+			xfs_attr_leafblock_t *leaf_d, int start_d,
+			int count, xfs_mount_t *mp)
+{
+	xfs_attr_leaf_hdr_t *hdr_s, *hdr_d;
+	xfs_attr_leaf_entry_t *entry_s, *entry_d;
+	int desti, tmp, i;
+
+	/*
+	 * Check for nothing to do.
+	 */
+	if (count == 0)
+		return;
+
+	/*
+	 * Set up environment.
+	 */
+	ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	hdr_s = &leaf_s->hdr;
+	hdr_d = &leaf_d->hdr;
+	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0)
+				&& (INT_GET(hdr_s->count, ARCH_CONVERT)
+						< (XFS_LBSIZE(mp)/8)));
+	ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >= 
+		((INT_GET(hdr_s->count, ARCH_CONVERT)
+					* sizeof(*entry_s))+sizeof(*hdr_s)));
+	ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
+	ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= 
+		((INT_GET(hdr_d->count, ARCH_CONVERT)
+					* sizeof(*entry_d))+sizeof(*hdr_d)));
+
+	ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
+	ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
+	ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
+
+	/*
+	 * Move the entries in the destination leaf up to make a hole?
+	 */
+	if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
+		tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
+		tmp *= sizeof(xfs_attr_leaf_entry_t);
+		entry_s = &leaf_d->entries[start_d];
+		entry_d = &leaf_d->entries[start_d + count];
+		ovbcopy((char *)entry_s, (char *)entry_d, tmp);
+	}
+
+	/*
+	 * Copy all entry's in the same (sorted) order,
+	 * but allocate attribute info packed and in sequence.
+	 */
+	entry_s = &leaf_s->entries[start_s];
+	entry_d = &leaf_d->entries[start_d];
+	desti = start_d;
+	for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
+		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT)
+				>= INT_GET(hdr_s->firstused, ARCH_CONVERT));
+		tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
+#ifdef GROT
+		/*
+		 * Code to drop INCOMPLETE entries.  Difficult to use as we
+		 * may also need to change the insertion index.  Code turned
+		 * off for 6.2, should be revisited later.
+		 */
+		if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
+			bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
+			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
+			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
+			entry_d--;	/* to compensate for ++ in loop hdr */
+			desti--;
+			if ((start_s + i) < offset)
+				result++;	/* insertion index adjustment */
+		} else {
+#endif /* GROT */
+			INT_MOD(hdr_d->firstused, ARCH_CONVERT, -tmp);
+			INT_SET(entry_d->hashval, ARCH_CONVERT,
+				    INT_GET(entry_s->hashval, ARCH_CONVERT));
+			INT_SET(entry_d->nameidx, ARCH_CONVERT,
+						INT_GET(hdr_d->firstused,
+								ARCH_CONVERT));
+			entry_d->flags = entry_s->flags;
+			ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp
+							<= XFS_LBSIZE(mp));
+			ovbcopy(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i),
+			      XFS_ATTR_LEAF_NAME(leaf_d, desti), tmp);
+			ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp
+							<= XFS_LBSIZE(mp));
+			bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
+			INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
+			INT_MOD(hdr_d->usedbytes, ARCH_CONVERT, tmp);
+			INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
+			INT_MOD(hdr_d->count, ARCH_CONVERT, 1);
+			tmp = INT_GET(hdr_d->count, ARCH_CONVERT)
+						* sizeof(xfs_attr_leaf_entry_t)
+						+ sizeof(xfs_attr_leaf_hdr_t);
+			ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
+#ifdef GROT
+		}
+#endif /* GROT */
+	}
+
+	/*
+	 * Zero out the entries we just copied.
+	 */
+	if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
+		tmp = count * sizeof(xfs_attr_leaf_entry_t);
+		entry_s = &leaf_s->entries[start_s];
+		ASSERT(((char *)entry_s + tmp) <=
+		       ((char *)leaf_s + XFS_LBSIZE(mp)));
+		bzero((char *)entry_s, tmp);
+	} else {
+		/*
+		 * Move the remaining entries down to fill the hole,
+		 * then zero the entries at the top.
+		 */
+		tmp  = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
+		tmp *= sizeof(xfs_attr_leaf_entry_t);
+		entry_s = &leaf_s->entries[start_s + count];
+		entry_d = &leaf_s->entries[start_s];
+		ovbcopy((char *)entry_s, (char *)entry_d, tmp);
+
+		tmp = count * sizeof(xfs_attr_leaf_entry_t);
+		entry_s = &leaf_s->entries[INT_GET(hdr_s->count,
+							ARCH_CONVERT)];
+		ASSERT(((char *)entry_s + tmp) <=
+		       ((char *)leaf_s + XFS_LBSIZE(mp)));
+		bzero((char *)entry_s, tmp);
+	}
+
+	/*
+	 * Fill in the freemap information
+	 */
+	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT,
+					sizeof(xfs_attr_leaf_hdr_t));
+	INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT,
+				INT_GET(hdr_d->count, ARCH_CONVERT)
+					* sizeof(xfs_attr_leaf_entry_t));
+	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT,
+				INT_GET(hdr_d->firstused, ARCH_CONVERT)
+			      - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+	INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, 0);
+	INT_SET(hdr_d->freemap[2].base, ARCH_CONVERT, 0);
+	INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, 0);
+	INT_SET(hdr_d->freemap[2].size, ARCH_CONVERT, 0);
+	hdr_s->holes = 1;	/* leaf may not be compact */
+}
+
+/*
+ * Compare two leaf blocks "order".
+ * Return 0 unless leaf2 should go before leaf1.
+ */
+int
+xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
+{
+	xfs_attr_leafblock_t *leaf1, *leaf2;
+
+	leaf1 = leaf1_bp->data;
+	leaf2 = leaf2_bp->data;
+	ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC) &&
+	       (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC));
+	if (   (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0)
+	    && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0)
+	    && (   (INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
+		      INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT))
+	        || (INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count,
+				ARCH_CONVERT)-1].hashval, ARCH_CONVERT) <
+		      INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count,
+				ARCH_CONVERT)-1].hashval, ARCH_CONVERT))) ) {
+		return(1);
+	}
+	return(0);
+}
+
+/*
+ * Pick up the last hashvalue from a leaf block.
+ */
+xfs_dahash_t
+xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
+{
+	xfs_attr_leafblock_t *leaf;
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	if (count)
+		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0)
+		return(0);
+	return(INT_GET(leaf->entries[INT_GET(leaf->hdr.count,
+				ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
+}
+
+/*
+ * Calculate the number of bytes used to store the indicated attribute
+ * (whether local or remote only calculate bytes in this block).
+ */
+int
+xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
+{
+	xfs_attr_leaf_name_local_t *name_loc;
+	xfs_attr_leaf_name_remote_t *name_rmt;
+	int size;
+
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+						== XFS_ATTR_LEAF_MAGIC);
+	if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
+		name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, index);
+		size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc->namelen,
+						   INT_GET(name_loc->valuelen,
+								ARCH_CONVERT));
+	} else {
+		name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, index);
+		size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(name_rmt->namelen);
+	}
+	return(size);
+}
+
+/*
+ * Calculate the number of bytes that would be required to store the new
+ * attribute (whether local or remote only calculate bytes in this block).
+ * This routine decides as a side effect whether the attribute will be
+ * a "local" or a "remote" attribute.
+ */
+int
+xfs_attr_leaf_newentsize(xfs_da_args_t *args, int blocksize, int *local)
+{
+	int size;
+
+	size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(args->namelen, args->valuelen);
+	if (size < XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(blocksize)) { 
+		if (local) {
+			*local = 1;
+		}
+	} else {
+		size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(args->namelen);
+		if (local) {
+			*local = 0;
+		}
+	}
+	return(size);
+}
diff --git a/libxfs/xfs_bit.c b/libxfs/xfs_bit.c
new file mode 100644
index 000000000..52ab69a51
--- /dev/null
+++ b/libxfs/xfs_bit.c
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * XFS bit manipulation routines, used in non-realtime code.
+ */
+
+#include <xfs.h>
+
+/*
+ * Index of low bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+const char xfs_lowbit[256] = {
+       -1, 0, 1, 0, 2, 0, 1, 0,			/* 00 .. 07 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 08 .. 0f */
+	4, 0, 1, 0, 2, 0, 1, 0,			/* 10 .. 17 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 18 .. 1f */
+	5, 0, 1, 0, 2, 0, 1, 0,			/* 20 .. 27 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 28 .. 2f */
+	4, 0, 1, 0, 2, 0, 1, 0,			/* 30 .. 37 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 38 .. 3f */
+	6, 0, 1, 0, 2, 0, 1, 0,			/* 40 .. 47 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 48 .. 4f */
+	4, 0, 1, 0, 2, 0, 1, 0,			/* 50 .. 57 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 58 .. 5f */
+	5, 0, 1, 0, 2, 0, 1, 0,			/* 60 .. 67 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 68 .. 6f */
+	4, 0, 1, 0, 2, 0, 1, 0,			/* 70 .. 77 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 78 .. 7f */
+	7, 0, 1, 0, 2, 0, 1, 0,			/* 80 .. 87 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 88 .. 8f */
+	4, 0, 1, 0, 2, 0, 1, 0,			/* 90 .. 97 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* 98 .. 9f */
+	5, 0, 1, 0, 2, 0, 1, 0,			/* a0 .. a7 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* a8 .. af */
+	4, 0, 1, 0, 2, 0, 1, 0,			/* b0 .. b7 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* b8 .. bf */
+	6, 0, 1, 0, 2, 0, 1, 0,			/* c0 .. c7 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* c8 .. cf */
+	4, 0, 1, 0, 2, 0, 1, 0,			/* d0 .. d7 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* d8 .. df */
+	5, 0, 1, 0, 2, 0, 1, 0,			/* e0 .. e7 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* e8 .. ef */
+	4, 0, 1, 0, 2, 0, 1, 0,			/* f0 .. f7 */
+	3, 0, 1, 0, 2, 0, 1, 0,			/* f8 .. ff */
+};
+
+/*
+ * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+const char xfs_highbit[256] = {
+       -1, 0, 1, 1, 2, 2, 2, 2,			/* 00 .. 07 */
+	3, 3, 3, 3, 3, 3, 3, 3,			/* 08 .. 0f */
+	4, 4, 4, 4, 4, 4, 4, 4,			/* 10 .. 17 */
+	4, 4, 4, 4, 4, 4, 4, 4,			/* 18 .. 1f */
+	5, 5, 5, 5, 5, 5, 5, 5,			/* 20 .. 27 */
+	5, 5, 5, 5, 5, 5, 5, 5,			/* 28 .. 2f */
+	5, 5, 5, 5, 5, 5, 5, 5,			/* 30 .. 37 */
+	5, 5, 5, 5, 5, 5, 5, 5,			/* 38 .. 3f */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 40 .. 47 */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 48 .. 4f */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 50 .. 57 */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 58 .. 5f */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 60 .. 67 */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 68 .. 6f */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 70 .. 77 */
+	6, 6, 6, 6, 6, 6, 6, 6,			/* 78 .. 7f */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* 80 .. 87 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* 88 .. 8f */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* 90 .. 97 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* 98 .. 9f */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* a0 .. a7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* a8 .. af */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* b0 .. b7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* b8 .. bf */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* c0 .. c7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* c8 .. cf */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* d0 .. d7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* d8 .. df */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* e0 .. e7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* e8 .. ef */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* f0 .. f7 */
+	7, 7, 7, 7, 7, 7, 7, 7,			/* f8 .. ff */
+};
+
+/*
+ * Count of bits set in byte, 0..8.
+ */
+const char xfs_countbit[256] = {
+	0, 1, 1, 2, 1, 2, 2, 3,			/* 00 .. 07 */
+	1, 2, 2, 3, 2, 3, 3, 4,			/* 08 .. 0f */
+	1, 2, 2, 3, 2, 3, 3, 4,			/* 10 .. 17 */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* 18 .. 1f */
+	1, 2, 2, 3, 2, 3, 3, 4,			/* 20 .. 27 */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* 28 .. 2f */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* 30 .. 37 */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* 38 .. 3f */
+	1, 2, 2, 3, 2, 3, 3, 4,			/* 40 .. 47 */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* 48 .. 4f */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* 50 .. 57 */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* 58 .. 5f */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* 60 .. 67 */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* 68 .. 6f */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* 70 .. 77 */
+	4, 5, 5, 6, 5, 6, 6, 7,			/* 78 .. 7f */
+	1, 2, 2, 3, 2, 3, 3, 4,			/* 80 .. 87 */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* 88 .. 8f */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* 90 .. 97 */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* 98 .. 9f */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* a0 .. a7 */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* a8 .. af */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* b0 .. b7 */
+	4, 5, 5, 6, 5, 6, 6, 7,			/* b8 .. bf */
+	2, 3, 3, 4, 3, 4, 4, 5,			/* c0 .. c7 */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* c8 .. cf */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* d0 .. d7 */
+	4, 5, 5, 6, 5, 6, 6, 7,			/* d8 .. df */
+	3, 4, 4, 5, 4, 5, 5, 6,			/* e0 .. e7 */
+	4, 5, 5, 6, 5, 6, 6, 7,			/* e8 .. ef */
+	4, 5, 5, 6, 5, 6, 6, 7,			/* f0 .. f7 */
+	5, 6, 6, 7, 6, 7, 7, 8,			/* f8 .. ff */
+};
+
+/*
+ * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
+ */
+int
+xfs_highbit32(
+	__uint32_t	v)
+{
+	int		i;
+
+	if (v & 0xffff0000)
+		if (v & 0xff000000)
+			i = 24;
+		else
+			i = 16;
+	else if (v & 0x0000ffff)
+		if (v & 0x0000ff00)
+			i = 8;
+		else
+			i = 0;
+	else
+		return -1;
+	return i + xfs_highbit[(v >> i) & 0xff];
+}
+
+/*
+ * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
+ */
+int
+xfs_lowbit64(
+	__uint64_t	v)
+{
+	int		i;
+#if XFS_64
+	if (v & 0x00000000ffffffff)
+		if (v & 0x000000000000ffff)
+			if (v & 0x00000000000000ff)
+				i = 0;
+			else
+				i = 8;
+		else
+			if (v & 0x0000000000ff0000)
+				i = 16;
+			else
+				i = 24;
+	else if (v & 0xffffffff00000000)
+		if (v & 0x0000ffff00000000)
+			if (v & 0x000000ff00000000)
+				i = 32;
+			else
+				i = 40;
+		else
+			if (v & 0x00ff000000000000)
+				i = 48;
+			else
+				i = 56;
+	else
+		return -1;
+	return i + xfs_lowbit[(v >> i) & 0xff];
+#else
+	__uint32_t	vw;
+
+	if (vw = v) {
+		if (vw & 0x0000ffff)
+			if (vw & 0x000000ff)
+				i = 0;
+			else
+				i = 8;
+		else
+			if (vw & 0x00ff0000)
+				i = 16;
+			else
+				i = 24;
+		return i + xfs_lowbit[(vw >> i) & 0xff];
+	} else if (vw = v >> 32) {
+		if (vw & 0x0000ffff)
+			if (vw & 0x000000ff)
+				i = 32;
+			else
+				i = 40;
+		else
+			if (vw & 0x00ff0000)
+				i = 48;
+			else
+				i = 56;
+		return i + xfs_lowbit[(vw >> (i - 32)) & 0xff];
+	} else
+		return -1;
+#endif
+}
+
+/*
+ * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
+ */
+int
+xfs_highbit64(
+	__uint64_t	v)
+{
+	int		i;
+#if  XFS_64
+	if (v & 0xffffffff00000000)
+		if (v & 0xffff000000000000)
+			if (v & 0xff00000000000000)
+				i = 56;
+			else
+				i = 48;
+		else
+			if (v & 0x0000ff0000000000)
+				i = 40;
+			else
+				i = 32;
+	else if (v & 0x00000000ffffffff)
+		if (v & 0x00000000ffff0000)
+			if (v & 0x00000000ff000000)
+				i = 24;
+			else
+				i = 16;
+		else
+			if (v & 0x000000000000ff00)
+				i = 8;
+			else
+				i = 0;
+	else
+		return -1;
+	return i + xfs_highbit[(v >> i) & 0xff];
+#else
+	__uint32_t	vw;
+
+	if (vw = v >> 32) {
+		if (vw & 0xffff0000)
+			if (vw & 0xff000000)
+				i = 56;
+			else
+				i = 48;
+		else
+			if (vw & 0x0000ff00)
+				i = 40;
+			else
+				i = 32;
+		return i + xfs_highbit[(vw >> (i - 32)) & 0xff];
+	} else if (vw = v) {
+		if (vw & 0xffff0000)
+			if (vw & 0xff000000)
+				i = 24;
+			else
+				i = 16;
+		else
+			if (vw & 0x0000ff00)
+				i = 8;
+			else
+				i = 0;
+		return i + xfs_highbit[(vw >> i) & 0xff];
+	} else
+		return -1;
+#endif
+}
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
new file mode 100644
index 000000000..88e597e72
--- /dev/null
+++ b/libxfs/xfs_bmap.c
@@ -0,0 +1,4511 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+xfs_zone_t		*xfs_bmap_free_item_zone;
+
+/*
+ * Called by xfs_bmapi to update extent list structure and the btree
+ * after allocating space (or doing a delayed allocation).
+ */
+STATIC int				/* error */
+xfs_bmap_add_extent(
+	xfs_inode_t		*ip,	/* incore inode pointer */
+	xfs_extnum_t		idx,	/* extent number to update/insert */
+	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
+	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
+	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
+	int			*logflagsp, /* inode logging flags */
+	int			whichfork, /* data or attr fork */
+	int			rsvd)	/* OK to use reserved data blocks */
+{
+	xfs_btree_cur_t		*cur;	/* btree cursor or null */
+	xfs_filblks_t		da_new; /* new count del alloc blocks used */
+	xfs_filblks_t		da_old; /* old count del alloc blocks used */
+	int			error;	/* error return value */
+#ifdef XFS_BMAP_TRACE
+	static char		fname[] = "xfs_bmap_add_extent";
+#endif
+	xfs_ifork_t		*ifp;	/* inode fork ptr */
+	int			logflags; /* returned value */
+	xfs_extnum_t		nextents; /* number of extents in file now */
+
+	XFS_STATS_INC(xs_add_exlist);
+	cur = *curp;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(idx <= nextents);
+	da_old = da_new = 0;
+	error = 0;
+	/*
+	 * This is the first extent added to a new/empty file.
+	 * Special case this one, so other routines get to assume there are
+	 * already extents in the list.
+	 */
+	if (nextents == 0) {
+		xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new,
+			NULL, whichfork);
+		xfs_bmap_insert_exlist(ip, 0, 1, new, whichfork);
+		ASSERT(cur == NULL);
+		ifp->if_lastex = 0;
+		if (!ISNULLSTARTBLOCK(new->br_startblock)) {
+			XFS_IFORK_NEXT_SET(ip, whichfork, 1);
+			logflags = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+		} else
+			logflags = 0;
+	}
+	/*
+	 * Any kind of new delayed allocation goes here.
+	 */
+	else if (ISNULLSTARTBLOCK(new->br_startblock)) {
+		if (cur)
+			ASSERT((cur->bc_private.b.flags &
+				XFS_BTCUR_BPRV_WASDEL) == 0);
+		if (error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new,
+				&logflags, rsvd))
+			goto done;
+	}
+	/*
+	 * Real allocation off the end of the file.
+	 */
+	else if (idx == nextents) {
+		if (cur)
+			ASSERT((cur->bc_private.b.flags &
+				XFS_BTCUR_BPRV_WASDEL) == 0);
+		if (error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
+				&logflags, whichfork))
+			goto done;
+	} else {
+		xfs_bmbt_irec_t	prev;	/* old extent at offset idx */
+
+		/*
+		 * Get the record referred to by idx.
+		 */
+		xfs_bmbt_get_all(&ifp->if_u1.if_extents[idx], &prev);
+		/*
+		 * If it's a real allocation record, and the new allocation ends
+		 * after the start of the referred to record, then we're filling
+		 * in a delayed or unwritten allocation with a real one, or
+		 * converting real back to unwritten.
+		 */
+		if (!ISNULLSTARTBLOCK(new->br_startblock) &&
+		    new->br_startoff + new->br_blockcount > prev.br_startoff) {
+			if (prev.br_state != XFS_EXT_UNWRITTEN && 
+			    ISNULLSTARTBLOCK(prev.br_startblock)) {
+				da_old = STARTBLOCKVAL(prev.br_startblock);
+				if (cur)
+					ASSERT(cur->bc_private.b.flags &
+						XFS_BTCUR_BPRV_WASDEL);
+				if (error = xfs_bmap_add_extent_delay_real(ip,
+					idx, &cur, new, &da_new, first, flist,
+					&logflags, rsvd))
+					goto done;
+			} else if (new->br_state == XFS_EXT_NORM) {
+				ASSERT(new->br_state == XFS_EXT_NORM);
+				if (error = xfs_bmap_add_extent_unwritten_real(
+					ip, idx, &cur, new, &logflags))
+					goto done;
+			} else {
+				ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
+				if (error = xfs_bmap_add_extent_unwritten_real(
+					ip, idx, &cur, new, &logflags))
+					goto done;
+			}
+			ASSERT(*curp == cur || *curp == NULL);
+		}
+		/*
+		 * Otherwise we're filling in a hole with an allocation.
+		 */
+		else {
+			if (cur)
+				ASSERT((cur->bc_private.b.flags &
+					XFS_BTCUR_BPRV_WASDEL) == 0);
+			if (error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
+					new, &logflags, whichfork))
+				goto done;
+		}
+	}
+
+	ASSERT(*curp == cur || *curp == NULL);
+	/*
+	 * Convert to a btree if necessary.
+	 */
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
+		int	tmp_logflags;	/* partial log flag return val */
+
+		ASSERT(cur == NULL);
+		error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first,
+			flist, &cur, da_old > 0, &tmp_logflags, whichfork);
+		logflags |= tmp_logflags;
+		if (error)
+			goto done;
+	}
+	/*
+	 * Adjust for changes in reserved delayed indirect blocks.
+	 * Nothing to do for disk quotas here.
+	 */
+	if (da_old || da_new) {
+		xfs_filblks_t	nblks;
+
+		nblks = da_new;
+		if (cur)
+			nblks += cur->bc_private.b.allocated;
+		ASSERT(nblks <= da_old);
+		if (nblks < da_old)
+			xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+				(int)(da_old - nblks), rsvd);
+	}
+	/*
+	 * Clear out the allocated field, done with it now in any case.
+	 */
+	if (cur) {
+		cur->bc_private.b.allocated = 0;
+		*curp = cur;
+	}
+done:
+#ifdef XFSDEBUG
+	if (!error)
+		xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
+#endif
+	*logflagsp = logflags;
+	return error;
+}
+
+/*
+ * Called by xfs_bmap_add_extent to handle cases converting a delayed
+ * allocation to a real allocation.
+ */
+STATIC int				/* error */
+xfs_bmap_add_extent_delay_real(
+	xfs_inode_t		*ip,	/* incore inode pointer */
+	xfs_extnum_t		idx,	/* extent number to update/insert */
+	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
+	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	xfs_filblks_t		*dnew,	/* new delayed-alloc indirect blocks */
+	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
+	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
+	int			*logflagsp, /* inode logging flags */
+	int			rsvd)	/* OK to use reserved data block allocation */
+{
+	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
+	xfs_btree_cur_t		*cur;	/* btree cursor */
+	int			diff;	/* temp value */
+	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
+	int			error;	/* error return value */
+#ifdef XFS_BMAP_TRACE
+	static char		fname[] = "xfs_bmap_add_extent_delay_real";
+#endif
+	int			i;	/* temp state */
+	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
+	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
+					/* left is 0, right is 1, prev is 2 */
+	int			rval;	/* return value (logging flags) */
+	int			state = 0;/* state bits, accessed thru macros */
+	xfs_filblks_t		temp;	/* value for dnew calculations */
+	xfs_filblks_t		temp2;	/* value for dnew calculations */
+	int			tmp_rval;	/* partial logging flags */
+	enum {				/* bit number definitions for state */
+		LEFT_CONTIG,	RIGHT_CONTIG,
+		LEFT_FILLING,	RIGHT_FILLING,
+		LEFT_DELAY,	RIGHT_DELAY,
+		LEFT_VALID,	RIGHT_VALID
+	};
+
+#define	LEFT		r[0]
+#define	RIGHT		r[1]
+#define	PREV		r[2]
+#define	MASK(b)		(1 << (b))
+#define	MASK2(a,b)	(MASK(a) | MASK(b))
+#define	MASK3(a,b,c)	(MASK2(a,b) | MASK(c))
+#define	MASK4(a,b,c,d)	(MASK3(a,b,c) | MASK(d))
+#define	STATE_SET(b,v)	((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
+#define	STATE_TEST(b)	(state & MASK(b))
+#define	STATE_SET_TEST(b,v)	((v) ? ((state |= MASK(b)), 1) : \
+				       ((state &= ~MASK(b)), 0))
+#define	SWITCH_STATE		\
+	(state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
+
+	/*
+	 * Set up a bunch of variables to make the tests simpler.
+	 */
+	cur = *curp;
+	base = ip->i_df.if_u1.if_extents;
+	ep = &base[idx];
+	xfs_bmbt_get_all(ep, &PREV);
+	new_endoff = new->br_startoff + new->br_blockcount;
+	ASSERT(PREV.br_startoff <= new->br_startoff);
+	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
+	/*
+	 * Set flags determining what part of the previous delayed allocation
+	 * extent is being replaced by a real allocation.
+	 */
+	STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff);
+	STATE_SET(RIGHT_FILLING,
+		PREV.br_startoff + PREV.br_blockcount == new_endoff);
+	/*
+	 * Check and set flags if this segment has a left neighbor.
+	 * Don't set contiguous if the combined extent would be too large.
+	 */
+	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
+		xfs_bmbt_get_all(ep - 1, &LEFT);
+		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
+	}
+	STATE_SET(LEFT_CONTIG, 
+		STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
+		LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
+		LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
+		LEFT.br_state == new->br_state &&
+		LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN);
+	/*
+	 * Check and set flags if this segment has a right neighbor.
+	 * Don't set contiguous if the combined extent would be too large.
+	 * Also check for all-three-contiguous being too large.
+	 */
+	if (STATE_SET_TEST(RIGHT_VALID,
+			idx <
+			ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
+		xfs_bmbt_get_all(ep + 1, &RIGHT);
+		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
+	}
+	STATE_SET(RIGHT_CONTIG, 
+		STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
+		new_endoff == RIGHT.br_startoff &&
+		new->br_startblock + new->br_blockcount ==
+		    RIGHT.br_startblock &&
+		new->br_state == RIGHT.br_state &&
+		new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
+		((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) !=
+		  MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) ||
+		 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
+		     <= MAXEXTLEN));
+	error = 0;
+	/*
+	 * Switch out based on the FILLING and CONTIG state bits.
+	 */
+	switch (SWITCH_STATE) {
+
+	case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+		/*
+		 * Filling in all of a previously delayed allocation extent.
+		 * The left and right neighbors are both contiguous with new.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep - 1,
+			LEFT.br_blockcount + PREV.br_blockcount +
+			RIGHT.br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
+			XFS_DATA_FORK);
+		xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx - 1;
+		ip->i_d.di_nextents--;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+					RIGHT.br_startblock,
+					RIGHT.br_blockcount, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_delete(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_decrement(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+					LEFT.br_startblock,
+					LEFT.br_blockcount +
+					PREV.br_blockcount +
+					RIGHT.br_blockcount, LEFT.br_state))
+				goto done;
+		}
+		*dnew = 0;
+		break;
+
+	case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
+		/*
+		 * Filling in all of a previously delayed allocation extent.
+		 * The left neighbor is contiguous, the right is not.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep - 1,
+			LEFT.br_blockcount + PREV.br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx - 1;
+		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
+			XFS_DATA_FORK);
+		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		if (cur == NULL)
+			rval = XFS_ILOG_DEXT;
+		else {
+			rval = 0;
+			if (error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
+					LEFT.br_startblock, LEFT.br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+					LEFT.br_startblock,
+					LEFT.br_blockcount +
+					PREV.br_blockcount, LEFT.br_state))
+				goto done;
+		}
+		*dnew = 0;
+		break;
+
+	case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
+		/*
+		 * Filling in all of a previously delayed allocation extent.
+		 * The right neighbor is contiguous, the left is not.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_startblock(ep, new->br_startblock);
+		xfs_bmbt_set_blockcount(ep,
+			PREV.br_blockcount + RIGHT.br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx;
+		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
+			XFS_DATA_FORK);
+		xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
+		if (cur == NULL)
+			rval = XFS_ILOG_DEXT;
+		else {
+			rval = 0;
+			if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+					RIGHT.br_startblock,
+					RIGHT.br_blockcount, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, PREV.br_startoff,
+					new->br_startblock,
+					PREV.br_blockcount +
+					RIGHT.br_blockcount, PREV.br_state))
+				goto done;
+		}
+		*dnew = 0;
+		break;
+
+	case MASK2(LEFT_FILLING, RIGHT_FILLING):
+		/*
+		 * Filling in all of a previously delayed allocation extent.
+		 * Neither the left nor right neighbors are contiguous with
+		 * the new one.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_startblock(ep, new->br_startblock);
+		xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx;
+		ip->i_d.di_nextents++;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+					new->br_startblock, new->br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 0);
+			cur->bc_rec.b.br_state = XFS_EXT_NORM;
+			if (error = xfs_bmbt_insert(cur, &i))
+				goto done;
+			ASSERT(i == 1);
+		}
+		*dnew = 0;
+		break;
+
+	case MASK2(LEFT_FILLING, LEFT_CONTIG):
+		/*
+		 * Filling in the first part of a previous delayed allocation.
+		 * The left neighbor is contiguous.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep - 1,
+			LEFT.br_blockcount + new->br_blockcount);
+		xfs_bmbt_set_startoff(ep,
+			PREV.br_startoff + new->br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		temp = PREV.br_blockcount - new->br_blockcount;
+		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep, temp);
+		ip->i_df.if_lastex = idx - 1;
+		if (cur == NULL)
+			rval = XFS_ILOG_DEXT;
+		else {
+			rval = 0;
+			if (error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
+					LEFT.br_startblock, LEFT.br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+					LEFT.br_startblock,
+					LEFT.br_blockcount +
+					new->br_blockcount,
+					LEFT.br_state))
+				goto done;
+		}
+		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+			STARTBLOCKVAL(PREV.br_startblock));
+		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
+			XFS_DATA_FORK);
+		*dnew = temp;
+		break;
+
+	case MASK(LEFT_FILLING):
+		/*
+		 * Filling in the first part of a previous delayed allocation.
+		 * The left neighbor is not contiguous.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+		xfs_bmbt_set_startoff(ep, new_endoff);
+		temp = PREV.br_blockcount - new->br_blockcount;
+		xfs_bmbt_set_blockcount(ep, temp);
+		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+			XFS_DATA_FORK);
+		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx;
+		ip->i_d.di_nextents++;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+					new->br_startblock, new->br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 0);
+			cur->bc_rec.b.br_state = XFS_EXT_NORM;
+			if (error = xfs_bmbt_insert(cur, &i))
+				goto done;
+			ASSERT(i == 1);
+		}
+		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
+			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+					first, flist, &cur, 1, &tmp_rval,
+					XFS_DATA_FORK);
+			rval |= tmp_rval;
+			if (error)
+				goto done;
+		}
+		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+			STARTBLOCKVAL(PREV.br_startblock) -
+			(cur ? cur->bc_private.b.allocated : 0));
+		base = ip->i_df.if_u1.if_extents;
+		ep = &base[idx + 1];
+		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+		xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
+			XFS_DATA_FORK);
+		*dnew = temp;
+		break;
+
+	case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
+		/*
+		 * Filling in the last part of a previous delayed allocation.
+		 * The right neighbor is contiguous with the new allocation.
+		 */
+		temp = PREV.br_blockcount - new->br_blockcount;
+		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep, temp);
+		xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
+			new->br_blockcount + RIGHT.br_blockcount, 
+			RIGHT.br_state);
+		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx + 1;
+		if (cur == NULL)
+			rval = XFS_ILOG_DEXT;
+		else {
+			rval = 0;
+			if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+					RIGHT.br_startblock,
+					RIGHT.br_blockcount, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, new->br_startoff,
+					new->br_startblock,
+					new->br_blockcount +
+					RIGHT.br_blockcount,
+					RIGHT.br_state))
+				goto done;
+		}
+		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+			STARTBLOCKVAL(PREV.br_startblock));
+		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
+			XFS_DATA_FORK);
+		*dnew = temp;
+		break;
+
+	case MASK(RIGHT_FILLING):
+		/*
+		 * Filling in the last part of a previous delayed allocation.
+		 * The right neighbor is not contiguous.
+		 */
+		temp = PREV.br_blockcount - new->br_blockcount;
+		xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep, temp);
+		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
+			new, NULL, XFS_DATA_FORK);
+		xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx + 1;
+		ip->i_d.di_nextents++;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+					new->br_startblock, new->br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 0);
+			cur->bc_rec.b.br_state = XFS_EXT_NORM;
+			if (error = xfs_bmbt_insert(cur, &i))
+				goto done;
+			ASSERT(i == 1);
+		}
+		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
+			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+				first, flist, &cur, 1, &tmp_rval,
+				XFS_DATA_FORK);
+			rval |= tmp_rval;
+			if (error)
+				goto done;
+		}
+		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+			STARTBLOCKVAL(PREV.br_startblock) -
+			(cur ? cur->bc_private.b.allocated : 0));
+		base = ip->i_df.if_u1.if_extents;
+		ep = &base[idx];
+		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		*dnew = temp;
+		break;
+
+	case 0:
+		/*
+		 * Filling in the middle part of a previous delayed allocation.
+		 * Contiguity is impossible here.
+		 * This case is avoided almost all the time.
+		 */
+		temp = new->br_startoff - PREV.br_startoff;
+		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep, temp);
+		r[0] = *new;
+		r[1].br_startoff = new_endoff;
+		temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
+		r[1].br_blockcount = temp2;
+		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+			XFS_DATA_FORK);
+		xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx + 1;
+		ip->i_d.di_nextents++;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+					new->br_startblock, new->br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 0);
+			cur->bc_rec.b.br_state = XFS_EXT_NORM;
+			if (error = xfs_bmbt_insert(cur, &i))
+				goto done;
+			ASSERT(i == 1);
+		}
+		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
+			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+					first, flist, &cur, 1, &tmp_rval,
+					XFS_DATA_FORK);
+			rval |= tmp_rval;
+			if (error)
+				goto done;
+		}
+		temp = xfs_bmap_worst_indlen(ip, temp);
+		temp2 = xfs_bmap_worst_indlen(ip, temp2);
+		diff = (int)(temp + temp2 - STARTBLOCKVAL(PREV.br_startblock) -
+			(cur ? cur->bc_private.b.allocated : 0));
+		if (diff > 0 &&
+		    xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -diff, rsvd)) {
+			/*
+			 * Ick gross gag me with a spoon.
+			 */
+			ASSERT(0);	/* want to see if this ever happens! */
+			while (diff > 0) {
+				if (temp) {
+					temp--;
+					diff--;
+					if (!diff ||
+					    !xfs_mod_incore_sb(ip->i_mount,
+						    XFS_SBS_FDBLOCKS, -diff, rsvd))
+						break;
+				}
+				if (temp2) {
+					temp2--;
+					diff--;
+					if (!diff ||
+					    !xfs_mod_incore_sb(ip->i_mount,
+						    XFS_SBS_FDBLOCKS, -diff, rsvd))
+						break;
+				}
+			}
+		}
+		base = ip->i_df.if_u1.if_extents;
+		ep = &base[idx];
+		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_startblock(ep + 2, NULLSTARTBLOCK((int)temp2));
+		xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
+			XFS_DATA_FORK);
+		*dnew = temp + temp2;
+		break;
+
+	case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+	case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+	case MASK2(LEFT_FILLING, RIGHT_CONTIG):
+	case MASK2(RIGHT_FILLING, LEFT_CONTIG):
+	case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
+	case MASK(LEFT_CONTIG):
+	case MASK(RIGHT_CONTIG):
+		/*
+		 * These cases are all impossible.
+		 */
+		ASSERT(0);
+	}
+	*curp = cur;
+done:
+	*logflagsp = rval;
+	return error;
+#undef	LEFT
+#undef	RIGHT
+#undef	PREV
+#undef	MASK
+#undef	MASK2
+#undef	MASK3
+#undef	MASK4
+#undef	STATE_SET
+#undef	STATE_TEST
+#undef	STATE_SET_TEST
+#undef	SWITCH_STATE
+}
+
+/*
+ * Called by xfs_bmap_add_extent to handle cases converting an unwritten
+ * allocation to a real allocation or vice versa.
+ */
+STATIC int				/* error */
+xfs_bmap_add_extent_unwritten_real(
+	xfs_inode_t		*ip,	/* incore inode pointer */
+	xfs_extnum_t		idx,	/* extent number to update/insert */
+	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
+	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	int			*logflagsp) /* inode logging flags */
+{
+	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
+	xfs_btree_cur_t		*cur;	/* btree cursor */
+	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
+	int			error;	/* error return value */
+#ifdef XFS_BMAP_TRACE
+	static char		fname[] = "xfs_bmap_add_extent_unwritten_real";
+#endif
+	int			i;	/* temp state */
+	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
+	xfs_exntst_t		newext;	/* new extent state */
+	xfs_exntst_t		oldext;	/* old extent state */
+	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
+					/* left is 0, right is 1, prev is 2 */
+	int			rval;	/* return value (logging flags) */
+	int			state = 0;/* state bits, accessed thru macros */
+	enum {				/* bit number definitions for state */
+		LEFT_CONTIG,	RIGHT_CONTIG,
+		LEFT_FILLING,	RIGHT_FILLING,
+		LEFT_DELAY,	RIGHT_DELAY,
+		LEFT_VALID,	RIGHT_VALID
+	};
+
+#define	LEFT		r[0]
+#define	RIGHT		r[1]
+#define	PREV		r[2]
+#define	MASK(b)		(1 << (b))
+#define	MASK2(a,b)	(MASK(a) | MASK(b))
+#define	MASK3(a,b,c)	(MASK2(a,b) | MASK(c))
+#define	MASK4(a,b,c,d)	(MASK3(a,b,c) | MASK(d))
+#define	STATE_SET(b,v)	((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
+#define	STATE_TEST(b)	(state & MASK(b))
+#define	STATE_SET_TEST(b,v)	((v) ? ((state |= MASK(b)), 1) : \
+				       ((state &= ~MASK(b)), 0))
+#define	SWITCH_STATE		\
+	(state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
+
+	/*
+	 * Set up a bunch of variables to make the tests simpler.
+	 */
+	error = 0;
+	cur = *curp;
+	base = ip->i_df.if_u1.if_extents;
+	ep = &base[idx];
+	xfs_bmbt_get_all(ep, &PREV);
+	newext = new->br_state;
+	oldext = (newext == XFS_EXT_UNWRITTEN) ?
+		XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
+	ASSERT(PREV.br_state == oldext);
+	new_endoff = new->br_startoff + new->br_blockcount;
+	ASSERT(PREV.br_startoff <= new->br_startoff);
+	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
+	/*
+	 * Set flags determining what part of the previous oldext allocation
+	 * extent is being replaced by a newext allocation.
+	 */
+	STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff);
+	STATE_SET(RIGHT_FILLING,
+		PREV.br_startoff + PREV.br_blockcount == new_endoff);
+	/*
+	 * Check and set flags if this segment has a left neighbor.
+	 * Don't set contiguous if the combined extent would be too large.
+	 */
+	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
+		xfs_bmbt_get_all(ep - 1, &LEFT);
+		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
+	}
+	STATE_SET(LEFT_CONTIG, 
+		STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
+		LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
+		LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
+		LEFT.br_state == newext &&
+		LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN);
+	/*
+	 * Check and set flags if this segment has a right neighbor.
+	 * Don't set contiguous if the combined extent would be too large.
+	 * Also check for all-three-contiguous being too large.
+	 */
+	if (STATE_SET_TEST(RIGHT_VALID,
+			idx <
+			ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
+		xfs_bmbt_get_all(ep + 1, &RIGHT);
+		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
+	}
+	STATE_SET(RIGHT_CONTIG, 
+		STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
+		new_endoff == RIGHT.br_startoff &&
+		new->br_startblock + new->br_blockcount ==
+		    RIGHT.br_startblock &&
+		newext == RIGHT.br_state &&
+		new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
+		((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) !=
+		  MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) ||
+		 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
+		     <= MAXEXTLEN));
+	/*
+	 * Switch out based on the FILLING and CONTIG state bits.
+	 */
+	switch (SWITCH_STATE) {
+
+	case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+		/*
+		 * Setting all of a previous oldext extent to newext.
+		 * The left and right neighbors are both contiguous with new.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep - 1,
+			LEFT.br_blockcount + PREV.br_blockcount +
+			RIGHT.br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
+			XFS_DATA_FORK);
+		xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx - 1;
+		ip->i_d.di_nextents -= 2;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+					RIGHT.br_startblock,
+					RIGHT.br_blockcount, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_delete(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_decrement(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_delete(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_decrement(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+				LEFT.br_startblock,
+				LEFT.br_blockcount + PREV.br_blockcount +
+				RIGHT.br_blockcount, LEFT.br_state))
+				goto done;
+		}
+		break;
+
+	case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
+		/*
+		 * Setting all of a previous oldext extent to newext.
+		 * The left neighbor is contiguous, the right is not.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep - 1,
+			LEFT.br_blockcount + PREV.br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx - 1;
+		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
+			XFS_DATA_FORK);
+		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		ip->i_d.di_nextents--;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+					PREV.br_startblock, PREV.br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_delete(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_decrement(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+				LEFT.br_startblock,
+				LEFT.br_blockcount + PREV.br_blockcount,
+				LEFT.br_state))
+				goto done;
+		}
+		break;
+
+	case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
+		/*
+		 * Setting all of a previous oldext extent to newext.
+		 * The right neighbor is contiguous, the left is not.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep,
+			PREV.br_blockcount + RIGHT.br_blockcount);
+		xfs_bmbt_set_state(ep, newext);
+		xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx;
+		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
+			XFS_DATA_FORK);
+		xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
+		ip->i_d.di_nextents--;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+					RIGHT.br_startblock,
+					RIGHT.br_blockcount, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_delete(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_decrement(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, new->br_startoff,
+				new->br_startblock,
+				new->br_blockcount + RIGHT.br_blockcount,
+				newext))
+				goto done;
+		}
+		break;
+
+	case MASK2(LEFT_FILLING, RIGHT_FILLING):
+		/*
+		 * Setting all of a previous oldext extent to newext.
+		 * Neither the left nor right neighbors are contiguous with
+		 * the new one.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_state(ep, newext);
+		xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx;
+		if (cur == NULL)
+			rval = XFS_ILOG_DEXT;
+		else {
+			rval = 0;
+			if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+					new->br_startblock, new->br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, new->br_startoff,
+				new->br_startblock, new->br_blockcount,
+				newext))
+				goto done;
+		}
+		break;
+
+	case MASK2(LEFT_FILLING, LEFT_CONTIG):
+		/*
+		 * Setting the first part of a previous oldext extent to newext.
+		 * The left neighbor is contiguous.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep - 1,
+			LEFT.br_blockcount + new->br_blockcount);
+		xfs_bmbt_set_startoff(ep,
+			PREV.br_startoff + new->br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_startblock(ep,
+			new->br_startblock + new->br_blockcount);
+		xfs_bmbt_set_blockcount(ep,
+			PREV.br_blockcount - new->br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx - 1;
+		if (cur == NULL)
+			rval = XFS_ILOG_DEXT;
+		else {
+			rval = 0;
+			if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+					PREV.br_startblock, PREV.br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur,
+				PREV.br_startoff + new->br_blockcount,
+				PREV.br_startblock + new->br_blockcount,
+				PREV.br_blockcount - new->br_blockcount,
+				oldext))
+				goto done;
+			if (error = xfs_bmbt_decrement(cur, 0, &i))
+				goto done;
+			if (xfs_bmbt_update(cur, LEFT.br_startoff,
+				LEFT.br_startblock,
+				LEFT.br_blockcount + new->br_blockcount,
+				LEFT.br_state))
+				goto done;
+		}
+		break;
+
+	case MASK(LEFT_FILLING):
+		/*
+		 * Setting the first part of a previous oldext extent to newext.
+		 * The left neighbor is not contiguous.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+		ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
+		xfs_bmbt_set_startoff(ep, new_endoff);
+		xfs_bmbt_set_blockcount(ep,
+			PREV.br_blockcount - new->br_blockcount);
+		xfs_bmbt_set_startblock(ep,
+			new->br_startblock + new->br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+			XFS_DATA_FORK);
+		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx;
+		ip->i_d.di_nextents++;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+					PREV.br_startblock, PREV.br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur,
+				PREV.br_startoff + new->br_blockcount,
+				PREV.br_startblock + new->br_blockcount,
+				PREV.br_blockcount - new->br_blockcount,
+				oldext))
+				goto done;
+			cur->bc_rec.b = *new;
+			if (error = xfs_bmbt_insert(cur, &i))
+				goto done;
+			ASSERT(i == 1);
+		}
+		break;
+
+	case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
+		/*
+		 * Setting the last part of a previous oldext extent to newext.
+		 * The right neighbor is contiguous with the new allocation.
+		 */
+		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep,
+			PREV.br_blockcount - new->br_blockcount);
+		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
+			new->br_blockcount + RIGHT.br_blockcount, newext);
+		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx + 1;
+		if (cur == NULL)
+			rval = XFS_ILOG_DEXT;
+		else {
+			rval = 0;
+			if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+					PREV.br_startblock,
+					PREV.br_blockcount, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, PREV.br_startoff,
+				PREV.br_startblock, 
+				PREV.br_blockcount - new->br_blockcount,
+				oldext))
+				goto done;
+			if (error = xfs_bmbt_increment(cur, 0, &i))
+				goto done;
+			if (error = xfs_bmbt_update(cur, new->br_startoff,
+				new->br_startblock,
+				new->br_blockcount + RIGHT.br_blockcount,
+				newext))
+				goto done;
+		}
+		break;
+
+	case MASK(RIGHT_FILLING):
+		/*
+		 * Setting the last part of a previous oldext extent to newext.
+		 * The right neighbor is not contiguous.
+		 */
+		xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep,
+			PREV.br_blockcount - new->br_blockcount);
+		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
+			new, NULL, XFS_DATA_FORK);
+		xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx + 1;
+		ip->i_d.di_nextents++;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+					PREV.br_startblock, PREV.br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_update(cur, PREV.br_startoff,
+				PREV.br_startblock, 
+				PREV.br_blockcount - new->br_blockcount,
+				oldext))
+				goto done;
+			if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+					new->br_startblock, new->br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 0);
+			cur->bc_rec.b.br_state = XFS_EXT_NORM;
+			if (error = xfs_bmbt_insert(cur, &i))
+				goto done;
+			ASSERT(i == 1);
+		}
+		break;
+
+	case 0:
+		/*
+		 * Setting the middle part of a previous oldext extent to
+		 * newext.  Contiguity is impossible here.
+		 * One extent becomes three extents.
+		 */
+		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep,
+			new->br_startoff - PREV.br_startoff);
+		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		r[0] = *new;
+		r[1].br_startoff = new_endoff;
+		r[1].br_blockcount =
+			PREV.br_startoff + PREV.br_blockcount - new_endoff;
+		r[1].br_startblock = new->br_startblock + new->br_blockcount;
+		r[1].br_state = oldext;
+		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+			XFS_DATA_FORK);
+		xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx + 1;
+		ip->i_d.di_nextents += 2;
+		if (cur == NULL)
+			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+		else {
+			rval = XFS_ILOG_CORE;
+			if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+					PREV.br_startblock, PREV.br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+			/* new right extent - oldext */
+			if (error = xfs_bmbt_update(cur, r[1].br_startoff,
+				r[1].br_startblock, r[1].br_blockcount,
+				r[1].br_state))
+				goto done;
+			/* new left extent - oldext */
+			PREV.br_blockcount =
+				new->br_startoff - PREV.br_startoff;
+			cur->bc_rec.b = PREV;
+			if (error = xfs_bmbt_insert(cur, &i))
+				goto done;
+			ASSERT(i == 1);
+			if (error = xfs_bmbt_increment(cur, 0, &i))
+				goto done;
+			ASSERT(i == 1);
+			/* new middle extent - newext */
+			cur->bc_rec.b = *new;
+			if (error = xfs_bmbt_insert(cur, &i))
+				goto done;
+			ASSERT(i == 1);
+		}
+		break;
+
+	case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+	case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+	case MASK2(LEFT_FILLING, RIGHT_CONTIG):
+	case MASK2(RIGHT_FILLING, LEFT_CONTIG):
+	case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
+	case MASK(LEFT_CONTIG):
+	case MASK(RIGHT_CONTIG):
+		/*
+		 * These cases are all impossible.
+		 */
+		ASSERT(0);
+	}
+	*curp = cur;
+done:
+	*logflagsp = rval;
+	return error;
+#undef	LEFT
+#undef	RIGHT
+#undef	PREV
+#undef	MASK
+#undef	MASK2
+#undef	MASK3
+#undef	MASK4
+#undef	STATE_SET
+#undef	STATE_TEST
+#undef	STATE_SET_TEST
+#undef	SWITCH_STATE
+}
+
+/*
+ * Called by xfs_bmap_add_extent to handle cases converting a hole
+ * to a delayed allocation.
+ */
+/*ARGSUSED*/
+STATIC int				/* error */
+xfs_bmap_add_extent_hole_delay(
+	xfs_inode_t		*ip,	/* incore inode pointer */
+	xfs_extnum_t		idx,	/* extent number to update/insert */
+	xfs_btree_cur_t		*cur,	/* if null, not a btree */
+	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	int			*logflagsp, /* inode logging flags */
+	int			rsvd)		/* OK to allocate reserved blocks */
+{
+	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
+	xfs_bmbt_rec_t		*ep;	/* extent list entry for idx */
+#ifdef XFS_BMAP_TRACE
+	static char		fname[] = "xfs_bmap_add_extent_hole_delay";
+#endif
+	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
+	xfs_filblks_t		newlen;	/* new indirect size */
+	xfs_filblks_t		oldlen;	/* old indirect size */
+	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
+	int			state;  /* state bits, accessed thru macros */
+	xfs_filblks_t		temp;	/* temp for indirect calculations */
+	enum {				/* bit number definitions for state */
+		LEFT_CONTIG,	RIGHT_CONTIG,
+		LEFT_DELAY,	RIGHT_DELAY,
+		LEFT_VALID,	RIGHT_VALID
+	};
+
+#define	MASK(b)			(1 << (b))
+#define	MASK2(a,b)		(MASK(a) | MASK(b))
+#define	STATE_SET(b,v)		((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
+#define	STATE_TEST(b)		(state & MASK(b))
+#define	STATE_SET_TEST(b,v)	((v) ? ((state |= MASK(b)), 1) : \
+				       ((state &= ~MASK(b)), 0))
+#define	SWITCH_STATE		(state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
+
+	base = ip->i_df.if_u1.if_extents;
+	ep = &base[idx];
+	state = 0;
+	ASSERT(ISNULLSTARTBLOCK(new->br_startblock));
+	/*
+	 * Check and set flags if this segment has a left neighbor
+	 */
+	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
+		xfs_bmbt_get_all(ep - 1, &left);
+		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
+	}
+	/*
+	 * Check and set flags if the current (right) segment exists.
+	 * If it doesn't exist, we're converting the hole at end-of-file.
+	 */
+	if (STATE_SET_TEST(RIGHT_VALID,
+			   idx <
+			   ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
+		xfs_bmbt_get_all(ep, &right);
+		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(right.br_startblock));
+	}
+	/*
+	 * Set contiguity flags on the left and right neighbors.
+	 * Don't let extents get too large, even if the pieces are contiguous.
+	 */
+	STATE_SET(LEFT_CONTIG, 
+		STATE_TEST(LEFT_VALID) && STATE_TEST(LEFT_DELAY) &&
+		left.br_startoff + left.br_blockcount == new->br_startoff &&
+		left.br_blockcount + new->br_blockcount <= MAXEXTLEN);
+	STATE_SET(RIGHT_CONTIG,
+		STATE_TEST(RIGHT_VALID) && STATE_TEST(RIGHT_DELAY) &&
+		new->br_startoff + new->br_blockcount == right.br_startoff &&
+		new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
+		(!STATE_TEST(LEFT_CONTIG) ||
+		 (left.br_blockcount + new->br_blockcount +
+		     right.br_blockcount <= MAXEXTLEN)));
+	/*
+	 * Switch out based on the contiguity flags.
+	 */
+	switch (SWITCH_STATE) {
+
+	case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
+		/*
+		 * New allocation is contiguous with delayed allocations
+		 * on the left and on the right.
+		 * Merge all three into a single extent list entry.
+		 */
+		temp = left.br_blockcount + new->br_blockcount +
+			right.br_blockcount;
+		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep - 1, temp);
+		oldlen = STARTBLOCKVAL(left.br_startblock) +
+			STARTBLOCKVAL(new->br_startblock) +
+			STARTBLOCKVAL(right.br_startblock);
+		newlen = xfs_bmap_worst_indlen(ip, temp);
+		xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
+		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
+			XFS_DATA_FORK);
+		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx - 1;
+		break;
+
+	case MASK(LEFT_CONTIG):
+		/*
+		 * New allocation is contiguous with a delayed allocation
+		 * on the left.
+		 * Merge the new allocation with the left neighbor.
+		 */
+		temp = left.br_blockcount + new->br_blockcount;
+		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		xfs_bmbt_set_blockcount(ep - 1, temp);
+		oldlen = STARTBLOCKVAL(left.br_startblock) +
+			STARTBLOCKVAL(new->br_startblock);
+		newlen = xfs_bmap_worst_indlen(ip, temp);
+		xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
+		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
+			XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx - 1;
+		break;
+
+	case MASK(RIGHT_CONTIG):
+		/*
+		 * New allocation is contiguous with a delayed allocation
+		 * on the right.
+		 * Merge the new allocation with the right neighbor.
+		 */
+		xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+		temp = new->br_blockcount + right.br_blockcount;
+		oldlen = STARTBLOCKVAL(new->br_startblock) +
+			STARTBLOCKVAL(right.br_startblock);
+		newlen = xfs_bmap_worst_indlen(ip, temp);
+		xfs_bmbt_set_allf(ep, new->br_startoff,
+			NULLSTARTBLOCK((int)newlen), temp, right.br_state); 
+		xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx;
+		break;
+
+	case 0:
+		/*
+		 * New allocation is not contiguous with another
+		 * delayed allocation.
+		 * Insert a new entry.
+		 */
+		oldlen = newlen = 0;
+		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
+			XFS_DATA_FORK);
+		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+		ip->i_df.if_lastex = idx;
+		break;
+	}
+	if (oldlen != newlen) {
+		ASSERT(oldlen > newlen);
+		xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+			(int)(oldlen - newlen), rsvd);
+		/*
+		 * Nothing to do for disk quota accounting here.
+		 */
+	}
+	*logflagsp = 0;
+	return 0;
+#undef	MASK
+#undef	MASK2
+#undef	STATE_SET
+#undef	STATE_TEST
+#undef	STATE_SET_TEST
+#undef	SWITCH_STATE
+}
+
+/*
+ * Called by xfs_bmap_add_extent to handle cases converting a hole
+ * to a real allocation.
+ */
+STATIC int				/* error */
+xfs_bmap_add_extent_hole_real(
+	xfs_inode_t		*ip,	/* incore inode pointer */
+	xfs_extnum_t		idx,	/* extent number to update/insert */
+	xfs_btree_cur_t		*cur,	/* if null, not a btree */
+	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
+	int			*logflagsp, /* inode logging flags */
+	int			whichfork) /* data or attr fork */
+{
+	xfs_bmbt_rec_t		*ep;	/* pointer to extent entry ins. point */
+	int			error;	/* error return value */
+#ifdef XFS_BMAP_TRACE
+	static char		fname[] = "xfs_bmap_add_extent_hole_real";
+#endif
+	int			i;	/* temp state */
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
+	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
+	int			state;	/* state bits, accessed thru macros */
+	enum {				/* bit number definitions for state */
+		LEFT_CONTIG,	RIGHT_CONTIG,
+		LEFT_DELAY,	RIGHT_DELAY,
+		LEFT_VALID,	RIGHT_VALID
+	};
+
+#define	MASK(b)			(1 << (b))
+#define	MASK2(a,b)		(MASK(a) | MASK(b))
+#define	STATE_SET(b,v)		((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
+#define	STATE_TEST(b)		(state & MASK(b))
+#define	STATE_SET_TEST(b,v)	((v) ? ((state |= MASK(b)), 1) : \
+				       ((state &= ~MASK(b)), 0))
+#define	SWITCH_STATE		(state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
+	ep = &ifp->if_u1.if_extents[idx];
+	state = 0;
+	/*
+	 * Check and set flags if this segment has a left neighbor.
+	 */
+	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
+		xfs_bmbt_get_all(ep - 1, &left);
+		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
+	}
+	/*
+	 * Check and set flags if this segment has a current value.
+	 * Not true if we're inserting into the "hole" at eof.
+	 */
+	if (STATE_SET_TEST(RIGHT_VALID,
+			   idx <
+			   ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
+		xfs_bmbt_get_all(ep, &right);
+		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(right.br_startblock));
+	}
+	/*
+	 * We're inserting a real allocation between "left" and "right".
+	 * Set the contiguity flags.  Don't let extents get too large.
+	 */
+	STATE_SET(LEFT_CONTIG, 
+		STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
+		left.br_startoff + left.br_blockcount == new->br_startoff &&
+		left.br_startblock + left.br_blockcount == new->br_startblock &&
+		left.br_state == new->br_state &&
+		left.br_blockcount + new->br_blockcount <= MAXEXTLEN);
+	STATE_SET(RIGHT_CONTIG,
+		STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
+		new->br_startoff + new->br_blockcount == right.br_startoff &&
+		new->br_startblock + new->br_blockcount ==
+		    right.br_startblock &&
+		new->br_state == right.br_state &&
+		new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
+		(!STATE_TEST(LEFT_CONTIG) ||
+		 left.br_blockcount + new->br_blockcount +
+		     right.br_blockcount <= MAXEXTLEN));
+
+	/*
+	 * Select which case we're in here, and implement it.
+	 */
+	switch (SWITCH_STATE) {
+
+	case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
+		/*
+		 * New allocation is contiguous with real allocations on the
+		 * left and on the right.
+		 * Merge all three into a single extent list entry.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+			whichfork);
+		xfs_bmbt_set_blockcount(ep - 1,
+			left.br_blockcount + new->br_blockcount +
+			right.br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
+			whichfork);
+		xfs_bmap_trace_delete(fname, "LC|RC", ip,
+			idx, 1, whichfork);
+		xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
+		ifp->if_lastex = idx - 1;
+		XFS_IFORK_NEXT_SET(ip, whichfork,
+			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+		if (cur == NULL) {
+			*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+			return 0;
+		}
+		*logflagsp = XFS_ILOG_CORE;
+		if (error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
+				right.br_startblock, right.br_blockcount, &i))
+			return error;
+		ASSERT(i == 1);
+		if (error = xfs_bmbt_delete(cur, 0, &i))
+			return error;
+		ASSERT(i == 1);
+		if (error = xfs_bmbt_decrement(cur, 0, &i))
+			return error;
+		ASSERT(i == 1);
+		error = xfs_bmbt_update(cur, left.br_startoff,
+				left.br_startblock,
+				left.br_blockcount + new->br_blockcount +
+				right.br_blockcount, left.br_state);
+		return error;
+
+	case MASK(LEFT_CONTIG):
+		/*
+		 * New allocation is contiguous with a real allocation
+		 * on the left.
+		 * Merge the new allocation with the left neighbor.
+		 */
+		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork);
+		xfs_bmbt_set_blockcount(ep - 1,
+			left.br_blockcount + new->br_blockcount);
+		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
+		ifp->if_lastex = idx - 1;
+		if (cur == NULL) {
+			*logflagsp = XFS_ILOG_FEXT(whichfork);
+			return 0;
+		}
+		*logflagsp = 0;
+		if (error = xfs_bmbt_lookup_eq(cur, left.br_startoff,
+				left.br_startblock, left.br_blockcount, &i))
+			return error;
+		ASSERT(i == 1);
+		error = xfs_bmbt_update(cur, left.br_startoff,
+				left.br_startblock,
+				left.br_blockcount + new->br_blockcount,
+				left.br_state);
+		return error;
+
+	case MASK(RIGHT_CONTIG):
+		/*
+		 * New allocation is contiguous with a real allocation
+		 * on the right.
+		 * Merge the new allocation with the right neighbor.
+		 */
+		xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork);
+		xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
+			new->br_blockcount + right.br_blockcount,
+			right.br_state);
+		xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork);
+		ifp->if_lastex = idx;
+		if (cur == NULL) {
+			*logflagsp = XFS_ILOG_FEXT(whichfork);
+			return 0;
+		}
+		*logflagsp = 0;
+		if (error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
+				right.br_startblock, right.br_blockcount, &i))
+			return error;
+		ASSERT(i == 1);
+		error = xfs_bmbt_update(cur, new->br_startoff,
+				new->br_startblock,
+				new->br_blockcount + right.br_blockcount,
+				right.br_state);
+		return error;
+
+	case 0:
+		/*
+		 * New allocation is not contiguous with another
+		 * real allocation.
+		 * Insert a new entry.
+		 */
+		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
+			whichfork);
+		xfs_bmap_insert_exlist(ip, idx, 1, new, whichfork);
+		ifp->if_lastex = idx;
+		XFS_IFORK_NEXT_SET(ip, whichfork,
+			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+		if (cur == NULL) {
+			*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+			return 0;
+		}
+		*logflagsp = XFS_ILOG_CORE;
+		if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+				new->br_startblock, new->br_blockcount, &i))
+			return error;
+		ASSERT(i == 0);
+		cur->bc_rec.b.br_state = new->br_state;
+		if (error = xfs_bmbt_insert(cur, &i))
+			return error;
+		ASSERT(i == 1);
+		return 0;
+	}
+#undef	MASK
+#undef	MASK2
+#undef	STATE_SET
+#undef	STATE_TEST
+#undef	STATE_SET_TEST
+#undef	SWITCH_STATE
+	/* NOTREACHED */
+	ASSERT(0);
+	return 0; /* keep gcc quite */
+}
+
+#define XFS_ALLOC_GAP_UNITS	4
+
+/*
+ * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
+ * It figures out where to ask the underlying allocator to put the new extent.
+ */
+STATIC int				/* error */
+xfs_bmap_alloc(
+	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+{
+	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
+	xfs_alloctype_t	atype;		/* type for allocation routines */
+	int		error;		/* error return value */
+	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
+	xfs_mount_t	*mp;		/* mount point structure */
+	int		nullfb;		/* true if ap->firstblock isn't set */
+	int		rt;		/* true if inode is realtime */
+#ifdef __KERNEL__
+	xfs_extlen_t	prod;		/* product factor for allocators */
+	xfs_extlen_t	ralen;		/* realtime allocation length */
+#endif
+
+#define	ISLEGAL(x,y)	\
+	(rt ? \
+		(x) < mp->m_sb.sb_rblocks : \
+		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
+		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
+		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
+
+	/*
+	 * Set up variables.
+	 */
+	mp = ap->ip->i_mount;
+	nullfb = ap->firstblock == NULLFSBLOCK;
+	rt = (ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata;
+	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
+#ifdef __KERNEL__
+	if (rt) {
+		xfs_extlen_t	extsz;		/* file extent size for rt */
+		xfs_fileoff_t	nexto;		/* next file offset */
+		xfs_extlen_t	orig_alen;	/* original ap->alen */
+		xfs_fileoff_t	orig_end;	/* original off+len */
+		xfs_fileoff_t	orig_off;	/* original ap->off */
+		xfs_extlen_t	mod_off;	/* modulus calculations */
+		xfs_fileoff_t	prevo;		/* previous file offset */
+		xfs_rtblock_t	rtx;		/* realtime extent number */
+		xfs_extlen_t	temp;		/* temp for rt calculations */
+
+		/*
+		 * Set prod to match the realtime extent size.
+		 */
+		if (!(extsz = ap->ip->i_d.di_extsize))
+			extsz = mp->m_sb.sb_rextsize;
+		prod = extsz / mp->m_sb.sb_rextsize;
+		orig_off = ap->off;
+		orig_alen = ap->alen;
+		orig_end = orig_off + orig_alen;
+		/*
+		 * If the file offset is unaligned vs. the extent size
+		 * we need to align it.  This will be possible unless
+		 * the file was previously written with a kernel that didn't
+		 * perform this alignment.
+		 */
+		mod_off = do_mod(orig_off, extsz);
+		if (mod_off) {
+			ap->alen += mod_off;
+			ap->off -= mod_off;
+		}
+		/*
+		 * Same adjustment for the end of the requested area.
+		 */
+		if (temp = (ap->alen % extsz))
+			ap->alen += extsz - temp;
+		/*
+		 * If the previous block overlaps with this proposed allocation
+		 * then move the start forward without adjusting the length.
+		 */
+		prevo =
+			ap->prevp->br_startoff == NULLFILEOFF ?
+				0 :
+				(ap->prevp->br_startoff +
+				 ap->prevp->br_blockcount);
+		if (ap->off != orig_off && ap->off < prevo)
+			ap->off = prevo;
+		/*
+		 * If the next block overlaps with this proposed allocation
+		 * then move the start back without adjusting the length,
+		 * but not before offset 0.
+		 * This may of course make the start overlap previous block,
+		 * and if we hit the offset 0 limit then the next block
+		 * can still overlap too.
+		 */
+		nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ? 
+			NULLFILEOFF : ap->gotp->br_startoff;
+		if (!ap->eof &&
+		    ap->off + ap->alen != orig_end &&
+		    ap->off + ap->alen > nexto)
+			ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
+		/*
+		 * If we're now overlapping the next or previous extent that
+		 * means we can't fit an extsz piece in this hole.  Just move
+		 * the start forward to the first legal spot and set
+		 * the length so we hit the end.
+		 */
+		if ((ap->off != orig_off && ap->off < prevo) ||
+		    (ap->off + ap->alen != orig_end &&
+		     ap->off + ap->alen > nexto)) {
+			ap->off = prevo;
+			ap->alen = nexto - prevo;
+		}
+		/*
+		 * If the result isn't a multiple of rtextents we need to
+		 * remove blocks until it is.
+		 */
+		if (temp = (ap->alen % mp->m_sb.sb_rextsize)) {
+			/*
+			 * We're not covering the original request, or
+			 * we won't be able to once we fix the length.
+			 */
+			if (orig_off < ap->off ||
+			    orig_end > ap->off + ap->alen ||
+			    ap->alen - temp < orig_alen)
+				return XFS_ERROR(EINVAL);
+			/*
+			 * Try to fix it by moving the start up.
+			 */
+			if (ap->off + temp <= orig_off) {
+				ap->alen -= temp;
+				ap->off += temp;
+			}
+			/*
+			 * Try to fix it by moving the end in.
+			 */
+			else if (ap->off + ap->alen - temp >= orig_end)
+				ap->alen -= temp;
+			/*
+			 * Set the start to the minimum then trim the length.
+			 */
+			else {
+				ap->alen -= orig_off - ap->off;
+				ap->off = orig_off;
+				ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
+			}
+			/*
+			 * Result doesn't cover the request, fail it.
+			 */
+			if (orig_off < ap->off || orig_end > ap->off + ap->alen)
+				return XFS_ERROR(EINVAL);
+		}
+		ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+		/*
+		 * If the offset & length are not perfectly aligned
+		 * then kill prod, it will just get us in trouble.
+		 */
+		if (do_mod(ap->off, extsz) || ap->alen % extsz)
+			prod = 1;
+		/*
+		 * Set ralen to be the actual requested length in rtextents.
+		 */
+		ralen = ap->alen / mp->m_sb.sb_rextsize;
+		/*
+		 * If the old value was close enough to MAXEXTLEN that
+		 * we rounded up to it, cut it back so it's legal again.
+		 * Note that if it's a really large request (bigger than
+		 * MAXEXTLEN), we don't hear about that number, and can't
+		 * adjust the starting point to match it.
+		 */
+		if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
+			ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
+		/*
+		 * If it's an allocation to an empty file at offset 0,
+		 * pick an extent that will space things out in the rt area.
+		 */
+		if (ap->eof && ap->off == 0) {
+			error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
+			if (error)
+				return error;
+			ap->rval = rtx * mp->m_sb.sb_rextsize;
+		} else
+			ap->rval = 0;
+	}
+#else
+	if (rt)
+		ap->rval = 0;
+#endif	/* __KERNEL__ */
+	else if (nullfb)
+		ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+	else
+		ap->rval = ap->firstblock;
+	/*
+	 * If allocating at eof, and there's a previous real block,
+	 * try to use it's last block as our starting point.
+	 */
+	if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF &&
+	    !ISNULLSTARTBLOCK(ap->prevp->br_startblock) &&
+	    ISLEGAL(ap->prevp->br_startblock + ap->prevp->br_blockcount,
+		    ap->prevp->br_startblock)) {
+		ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount;
+		/*
+		 * Adjust for the gap between prevp and us.
+		 */
+		adjust = ap->off -
+			(ap->prevp->br_startoff + ap->prevp->br_blockcount);
+		if (adjust &&
+		    ISLEGAL(ap->rval + adjust, ap->prevp->br_startblock))
+			ap->rval += adjust;
+	}
+	/*
+	 * If not at eof, then compare the two neighbor blocks.
+	 * Figure out whether either one gives us a good starting point,
+	 * and pick the better one.
+	 */
+	else if (!ap->eof) {
+		xfs_fsblock_t	gotbno;		/* right side block number */
+		xfs_fsblock_t	gotdiff;	/* right side difference */
+		xfs_fsblock_t	prevbno;	/* left side block number */
+		xfs_fsblock_t	prevdiff;	/* left side difference */
+
+		/*
+		 * If there's a previous (left) block, select a requested
+		 * start block based on it.
+		 */
+		if (ap->prevp->br_startoff != NULLFILEOFF &&
+		    !ISNULLSTARTBLOCK(ap->prevp->br_startblock) &&
+		    (prevbno = ap->prevp->br_startblock +
+			       ap->prevp->br_blockcount) &&
+		    ISLEGAL(prevbno, ap->prevp->br_startblock)) {
+			/*
+			 * Calculate gap to end of previous block.
+			 */
+			adjust = prevdiff = ap->off -
+				(ap->prevp->br_startoff +
+				 ap->prevp->br_blockcount);
+			/*
+			 * Figure the startblock based on the previous block's
+			 * end and the gap size.
+			 * Heuristic!
+			 * If the gap is large relative to the piece we're
+			 * allocating, or using it gives us an illegal block
+			 * number, then just use the end of the previous block.
+			 */
+			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
+			    ISLEGAL(prevbno + prevdiff,
+				    ap->prevp->br_startblock))
+				prevbno += adjust;
+			else
+				prevdiff += adjust;
+			/*
+			 * If the firstblock forbids it, can't use it, 
+			 * must use default.
+			 */
+			if (!rt && !nullfb &&
+			    XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
+				prevbno = NULLFSBLOCK;
+		}
+		/*
+		 * No previous block or can't follow it, just default.
+		 */
+		else
+			prevbno = NULLFSBLOCK;
+		/*
+		 * If there's a following (right) block, select a requested
+		 * start block based on it.
+		 */
+		if (!ISNULLSTARTBLOCK(ap->gotp->br_startblock)) {
+			/*
+			 * Calculate gap to start of next block.
+			 */
+			adjust = gotdiff = ap->gotp->br_startoff - ap->off;
+			/*
+			 * Figure the startblock based on the next block's
+			 * start and the gap size.
+			 */
+			gotbno = ap->gotp->br_startblock;
+			/*
+			 * Heuristic!
+			 * If the gap is large relative to the piece we're
+			 * allocating, or using it gives us an illegal block
+			 * number, then just use the start of the next block
+			 * offset by our length.
+			 */
+			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
+			    ISLEGAL(gotbno - gotdiff, gotbno))
+				gotbno -= adjust;
+			else if (ISLEGAL(gotbno - ap->alen, gotbno)) {
+				gotbno -= ap->alen;
+				gotdiff += adjust - ap->alen;
+			} else
+				gotdiff += adjust;
+			/*
+			 * If the firstblock forbids it, can't use it, 
+			 * must use default.
+			 */
+			if (!rt && !nullfb &&
+			    XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
+				gotbno = NULLFSBLOCK;
+		}
+		/*
+		 * No next block, just default.
+		 */
+		else
+			gotbno = NULLFSBLOCK;
+		/*
+		 * If both valid, pick the better one, else the only good
+		 * one, else ap->rval is already set (to 0 or the inode block).
+		 */
+		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
+			ap->rval = prevdiff <= gotdiff ? prevbno : gotbno;
+		else if (prevbno != NULLFSBLOCK)
+			ap->rval = prevbno;
+		else if (gotbno != NULLFSBLOCK)
+			ap->rval = gotbno;
+	}
+	/*
+	 * If allowed, use ap->rval; otherwise must use firstblock since
+	 * it's in the right allocation group.
+	 */
+	if (nullfb || rt || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
+		;
+	else
+		ap->rval = ap->firstblock;
+	/*
+	 * Realtime allocation, done through xfs_rtallocate_extent.
+	 */
+	if (rt) {
+#ifndef __KERNEL__
+		ASSERT(0);
+#else
+		xfs_rtblock_t	rtb;
+
+		atype = ap->rval == 0 ?
+			XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
+		do_div(ap->rval, mp->m_sb.sb_rextsize);
+		rtb = ap->rval;
+		ap->alen = ralen;
+		if (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
+				&ralen, atype, ap->wasdel, prod, &rtb))
+			return error;
+		if (rtb == NULLFSBLOCK && prod > 1 &&
+		    (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
+						   ap->alen, &ralen, atype,
+						   ap->wasdel, 1, &rtb)))
+			return error;
+		ap->rval = rtb;
+		if (ap->rval != NULLFSBLOCK) {
+			ap->rval *= mp->m_sb.sb_rextsize;
+			ralen *= mp->m_sb.sb_rextsize;
+			ap->alen = ralen;
+			ap->ip->i_d.di_nblocks += ralen;
+			xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+			if (ap->wasdel)
+				ap->ip->i_delayed_blks -= ralen;
+			/*
+			 * Adjust the disk quota also. This was reserved 
+			 * earlier.
+			 */
+			if (XFS_IS_QUOTA_ON(mp) &&
+			    ap->ip->i_ino != mp->m_sb.sb_uquotino &&
+			    ap->ip->i_ino != mp->m_sb.sb_pquotino)
+				xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+					ap->wasdel ?
+						XFS_TRANS_DQ_DELRTBCOUNT :
+						XFS_TRANS_DQ_RTBCOUNT,
+					(long)ralen);
+		} else
+			ap->alen = 0;
+#endif	/* __KERNEL__ */
+	}
+	/*
+	 * Normal allocation, done through xfs_alloc_vextent.
+	 */
+	else {
+		xfs_agnumber_t	ag;
+		xfs_alloc_arg_t	args;
+		xfs_extlen_t	blen;
+		xfs_extlen_t	delta;
+		int		isaligned;
+		xfs_extlen_t	longest;
+		xfs_extlen_t	need;
+		xfs_extlen_t	nextminlen;
+		int		notinit;
+		xfs_perag_t	*pag;
+		xfs_agnumber_t	startag;
+		int		tryagain;
+
+		tryagain = isaligned = 0;
+		args.tp = ap->tp;
+		args.mp = mp;
+		args.fsbno = ap->rval;
+		args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
+		blen = 0;
+		if (nullfb) {
+			args.type = XFS_ALLOCTYPE_START_BNO;
+			args.total = ap->total;
+			/*
+			 * Find the longest available space.
+			 * We're going to try for the whole allocation at once.
+			 */
+			startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
+			notinit = 0;
+			mrlock(&mp->m_peraglock, MR_ACCESS, PINOD);
+			while (blen < ap->alen) {
+				pag = &mp->m_perag[ag];
+				if (!pag->pagf_init &&
+				    (error = xfs_alloc_pagf_init(mp, args.tp,
+					    ag, XFS_ALLOC_FLAG_TRYLOCK))) {
+					mrunlock(&mp->m_peraglock);
+					return error;
+				}
+				/*
+				 * See xfs_alloc_fix_freelist...
+				 */
+				if (pag->pagf_init) {
+					need = XFS_MIN_FREELIST_PAG(pag, mp);
+					delta = need > pag->pagf_flcount ?
+						need - pag->pagf_flcount : 0;
+					longest = (pag->pagf_longest > delta) ?
+						(pag->pagf_longest - delta) :
+						(pag->pagf_flcount > 0 ||
+						 pag->pagf_longest > 0);
+					if (blen < longest)
+						blen = longest;
+				} else
+					notinit = 1;
+				if (++ag == mp->m_sb.sb_agcount) 
+					ag = 0;
+				if (ag == startag)
+					break;
+			}
+			mrunlock(&mp->m_peraglock);
+			/* 
+			 * Since the above loop did a BUF_TRYLOCK, it is
+			 * possible that there is space for this request.
+			 */ 
+			if (notinit || blen < ap->minlen)
+				args.minlen = ap->minlen;	
+			/*
+			 * If the best seen length is less than the request
+			 * length, use the best as the minimum.
+			 */
+			else if (blen < ap->alen)
+				args.minlen = blen;
+			/*
+			 * Otherwise we've seen an extent as big as alen,
+			 * use that as the minimum.
+			 */
+			else 
+				args.minlen = ap->alen;
+		} else if (ap->low) {
+			args.type = XFS_ALLOCTYPE_FIRST_AG;
+			args.total = args.minlen = ap->minlen;
+		} else {
+			args.type = XFS_ALLOCTYPE_NEAR_BNO;
+			args.total = ap->total;
+			args.minlen = ap->minlen;
+		}
+		if (ap->ip->i_d.di_extsize) {
+			args.prod = ap->ip->i_d.di_extsize;
+			if (args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))
+				args.mod = (xfs_extlen_t)(args.prod - args.mod);
+		} else if (mp->m_sb.sb_blocksize >= NBPP) {
+			args.prod = 1;
+			args.mod = 0;
+		} else {
+			args.prod = NBPP >> mp->m_sb.sb_blocklog;
+			if (args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod)))
+				args.mod = (xfs_extlen_t)(args.prod - args.mod);
+		}
+		/*
+		 * If we are not low on available data blocks, and the 
+		 * underlying logical volume manager is a stripe, and
+		 * the file offset is zero then try to allocate data 
+		 * blocks on stripe unit boundary.
+		 * NOTE: ap->aeof is only set if the allocation length
+		 * is >= the stripe unit and the allocation offset is
+		 * at the end of file. 
+		 */ 
+		if (!ap->low && ap->aeof) {
+			if (!ap->off) {
+				args.alignment = mp->m_dalign;
+				atype = args.type;
+				isaligned = 1;
+				/*
+				 * Adjust for alignment
+				 */
+				if (blen > args.alignment && blen <= ap->alen) 
+					args.minlen = blen - args.alignment;
+				args.minalignslop = 0;
+			} else {
+				/*
+			 	 * First try an exact bno allocation.
+				 * If it fails then do a near or start bno
+				 * allocation with alignment turned on.
+			 	 */
+				atype = args.type;
+				tryagain = 1;
+				args.type = XFS_ALLOCTYPE_THIS_BNO;
+				args.alignment = 1;
+				/*
+				 * Compute the minlen+alignment for the
+				 * next case.  Set slop so that the value
+				 * of minlen+alignment+slop doesn't go up
+				 * between the calls.
+				 */
+				if (blen > mp->m_dalign && blen <= ap->alen) 
+					nextminlen = blen - mp->m_dalign;
+				else
+					nextminlen = args.minlen;
+				if (nextminlen + mp->m_dalign > args.minlen + 1)
+					args.minalignslop =
+						nextminlen + mp->m_dalign -
+						args.minlen - 1;
+				else
+					args.minalignslop = 0;
+			}
+		} else {
+			args.alignment = 1;
+			args.minalignslop = 0;
+		}
+		args.minleft = ap->minleft;
+		args.wasdel = ap->wasdel;
+		args.isfl = 0;
+		args.userdata = ap->userdata;
+		if (error = xfs_alloc_vextent(&args))
+			return error;
+		if (tryagain && args.fsbno == NULLFSBLOCK) {
+			/*
+			 * Exact allocation failed. Now try with alignment
+			 * turned on.
+			 */
+                        args.type = atype;
+                        args.fsbno = ap->rval;
+                        args.alignment = mp->m_dalign;
+			args.minlen = nextminlen;
+			args.minalignslop = 0;
+			isaligned = 1;
+                        if (error = xfs_alloc_vextent(&args))
+                                return error;
+                }
+		if (isaligned && args.fsbno == NULLFSBLOCK) {
+			/* 
+			 * allocation failed, so turn off alignment and
+			 * try again.
+			 */
+			args.type = atype;
+			args.fsbno = ap->rval;
+			args.alignment = 0;
+			if (error = xfs_alloc_vextent(&args))
+				return error;
+		}
+		if (args.fsbno == NULLFSBLOCK && nullfb &&
+		    args.minlen > ap->minlen) {
+			args.minlen = ap->minlen;
+			args.type = XFS_ALLOCTYPE_START_BNO;
+			args.fsbno = ap->rval;
+			if (error = xfs_alloc_vextent(&args))
+				return error;
+		}
+		if (args.fsbno == NULLFSBLOCK && nullfb) {
+			args.fsbno = 0;
+			args.type = XFS_ALLOCTYPE_FIRST_AG;
+			args.total = ap->minlen;
+			args.minleft = 0;
+			if (error = xfs_alloc_vextent(&args))
+				return error;
+			ap->low = 1;
+		}
+		if (args.fsbno != NULLFSBLOCK) {
+			ap->firstblock = ap->rval = args.fsbno;
+			ASSERT(nullfb || fb_agno == args.agno ||
+			       (ap->low && fb_agno < args.agno));
+			ap->alen = args.len;
+			ap->ip->i_d.di_nblocks += args.len;
+			xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+			if (ap->wasdel)
+				ap->ip->i_delayed_blks -= args.len;
+			/*
+			 * Adjust the disk quota also. This was reserved 
+			 * earlier.
+			 */
+			if (XFS_IS_QUOTA_ON(mp) &&
+			    ap->ip->i_ino != mp->m_sb.sb_uquotino &&
+			    ap->ip->i_ino != mp->m_sb.sb_pquotino)
+				xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+					ap->wasdel ?
+						XFS_TRANS_DQ_DELBCOUNT :
+						XFS_TRANS_DQ_BCOUNT,
+					(long)args.len);
+		} else {
+			ap->rval = NULLFSBLOCK;
+			ap->alen = 0;
+		}
+	}
+	return 0;
+#undef	ISLEGAL
+}
+
+/*
+ * Transform a btree format file with only one leaf node, where the
+ * extents list will fit in the inode, into an extents format file.
+ * Since the extent list is already in-core, all we have to do is
+ * give up the space for the btree root and pitch the leaf block.
+ */
+STATIC int				/* error */
+xfs_bmap_btree_to_extents(
+	xfs_trans_t		*tp,	/* transaction pointer */
+	xfs_inode_t		*ip,	/* incore inode pointer */
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			*logflagsp, /* inode logging flags */
+	int			whichfork,  /* data or attr fork */
+	int			async)      /* xaction can be async */
+{
+	/* REFERENCED */
+	xfs_bmbt_block_t	*cblock;/* child btree block */
+	xfs_fsblock_t		cbno;	/* child block number */
+	xfs_buf_t			*cbp;	/* child block's buffer */
+	int			error;	/* error return value */
+	xfs_ifork_t		*ifp;	/* inode fork data */
+	xfs_mount_t		*mp;	/* mount point structure */
+	xfs_bmbt_ptr_t		*pp;	/* ptr to block address */
+	xfs_bmbt_block_t	*rblock;/* root btree block */
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
+	rblock = ifp->if_broot;
+	ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) == 1);
+	ASSERT(INT_GET(rblock->bb_numrecs, ARCH_CONVERT) == 1);
+	ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1);
+	mp = ip->i_mount;
+	pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
+	*logflagsp = 0;
+#ifdef DEBUG
+	if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), 1))
+		return error;
+#endif
+	cbno = INT_GET(*pp, ARCH_CONVERT);
+	if (error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
+			XFS_BMAP_BTREE_REF))
+		return error;
+	cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
+	if (error = xfs_btree_check_lblock(cur, cblock, 0, cbp))
+		return error;
+	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
+	if (!async)
+		xfs_trans_set_sync(tp);
+	ip->i_d.di_nblocks--;
+	if (XFS_IS_QUOTA_ON(mp) &&
+	    ip->i_ino != mp->m_sb.sb_uquotino &&
+	    ip->i_ino != mp->m_sb.sb_pquotino)
+		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+	xfs_trans_binval(tp, cbp);
+	if (cur->bc_bufs[0] == cbp)
+		cur->bc_bufs[0] = NULL;
+	xfs_iroot_realloc(ip, -1, whichfork);
+	ASSERT(ifp->if_broot == NULL);
+	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
+	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+	*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+	return 0;
+}
+
+/*
+ * Called by xfs_bmapi to update extent list structure and the btree
+ * after removing space (or undoing a delayed allocation).
+ */
+STATIC int				/* error */
+xfs_bmap_del_extent(
+	xfs_inode_t		*ip,	/* incore inode pointer */
+	xfs_trans_t		*tp,	/* current transaction pointer */
+	xfs_extnum_t		idx,	/* extent number to update/delete */
+	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
+	xfs_btree_cur_t		*cur,	/* if null, not a btree */
+	xfs_bmbt_irec_t		*del,	/* data to remove from extent list */
+	int			iflags,	/* input flags */	    
+	int			*logflagsp, /* inode logging flags */
+	int			whichfork, /* data or attr fork */
+	int			rsvd)	/* OK to allocate reserved blocks */
+{
+	xfs_filblks_t		da_new;	/* new delay-alloc indirect blocks */
+	xfs_filblks_t		da_old;	/* old delay-alloc indirect blocks */
+	xfs_fsblock_t		del_endblock;	/* first block past del */
+	xfs_fileoff_t		del_endoff;	/* first offset past del */
+	int			delay;	/* current block is delayed allocated */
+	int			do_fx;	/* free extent at end of routine */
+	xfs_bmbt_rec_t		*ep;	/* current extent entry pointer */
+	int			error;	/* error return value */
+	int			flags;	/* inode logging flags */
+#ifdef XFS_BMAP_TRACE
+	static char		fname[] = "xfs_bmap_del_extent";
+#endif
+	xfs_bmbt_irec_t		got;	/* current extent entry */
+	xfs_fileoff_t		got_endoff;	/* first offset past got */
+	int			i;	/* temp state */
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	xfs_mount_t		*mp;	/* mount structure */
+	xfs_filblks_t		nblks;	/* quota/sb block count */
+	xfs_bmbt_irec_t		new;	/* new record to be inserted */
+	/* REFERENCED */
+	xfs_extnum_t		nextents;	/* number of extents in list */
+	uint			qfield;	/* quota field to update */
+	xfs_filblks_t		temp;	/* for indirect length calculations */
+	xfs_filblks_t		temp2;	/* for indirect length calculations */
+	
+	XFS_STATS_INC(xs_del_exlist);
+	mp = ip->i_mount;	
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(idx >= 0 && idx < nextents);
+	ASSERT(del->br_blockcount > 0);
+	ep = &ifp->if_u1.if_extents[idx];
+	xfs_bmbt_get_all(ep, &got);
+	ASSERT(got.br_startoff <= del->br_startoff);
+	del_endoff = del->br_startoff + del->br_blockcount;
+	got_endoff = got.br_startoff + got.br_blockcount;
+	ASSERT(got_endoff >= del_endoff);
+	delay = ISNULLSTARTBLOCK(got.br_startblock);
+	ASSERT(ISNULLSTARTBLOCK(del->br_startblock) == delay);
+	flags = 0;
+	qfield = 0;
+	error = 0;
+	/*
+	 * If deleting a real allocation, must free up the disk space.
+	 */
+	if (!delay) {
+		flags = XFS_ILOG_CORE;
+		/*
+		 * Realtime allocation.  Free it and record di_nblocks update.
+		 */
+		if (whichfork == XFS_DATA_FORK &&
+		    (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+			xfs_fsblock_t	bno;
+			xfs_filblks_t	len;
+
+			ASSERT(do_mod(del->br_blockcount,
+				      mp->m_sb.sb_rextsize) == 0);
+			ASSERT(do_mod(del->br_startblock,
+				      mp->m_sb.sb_rextsize) == 0);
+			bno = del->br_startblock;
+			do_div(bno, mp->m_sb.sb_rextsize);
+			len = del->br_blockcount;
+			do_div(len, mp->m_sb.sb_rextsize);
+			if (error = xfs_rtfree_extent(ip->i_transp, bno,
+					(xfs_extlen_t)len))
+				goto done;
+			do_fx = 0;
+			nblks = len * mp->m_sb.sb_rextsize;
+			if (XFS_IS_QUOTA_ON(mp) &&
+			    ip->i_ino != mp->m_sb.sb_uquotino &&
+			    ip->i_ino != mp->m_sb.sb_pquotino)
+				qfield = XFS_TRANS_DQ_RTBCOUNT;
+		}
+		/*
+		 * Ordinary allocation.
+		 */
+		else {
+			do_fx = 1;
+			nblks = del->br_blockcount;
+			if (XFS_IS_QUOTA_ON(mp) &&
+			    ip->i_ino != mp->m_sb.sb_uquotino &&
+			    ip->i_ino != mp->m_sb.sb_pquotino)
+				qfield = XFS_TRANS_DQ_BCOUNT;
+			/*
+			 * If we're freeing meta-data, then the transaction
+			 * that frees the blocks must be synchronous.  This
+			 * ensures that noone can reuse the blocks before
+			 * they are permanently free.  For regular data
+			 * it is the callers responsibility to make the
+			 * data permanently inaccessible before calling
+			 * here to free it.
+			 */
+			if (iflags & XFS_BMAPI_METADATA)
+				xfs_trans_set_sync(tp);
+		}
+		/*
+		 * Set up del_endblock and cur for later.
+		 */
+		del_endblock = del->br_startblock + del->br_blockcount;
+		if (cur) {
+			if (error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+					got.br_startblock, got.br_blockcount,
+					&i))
+				goto done;
+			ASSERT(i == 1);
+		}
+		da_old = da_new = 0;
+	} else {
+		da_old = STARTBLOCKVAL(got.br_startblock);
+		da_new = 0;
+		nblks = 0;
+		do_fx = 0;
+	}
+	/*
+	 * Set flag value to use in switch statement.
+	 * Left-contig is 2, right-contig is 1.
+	 */
+	switch (((got.br_startoff == del->br_startoff) << 1) |
+		(got_endoff == del_endoff)) {
+	case 3:
+		/*
+		 * Matches the whole extent.  Delete the entry.
+		 */
+		xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork);
+		xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
+		ifp->if_lastex = idx;
+		if (delay)
+			break;
+		XFS_IFORK_NEXT_SET(ip, whichfork,
+			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+		flags |= XFS_ILOG_CORE;
+		if (!cur) {
+			flags |= XFS_ILOG_FEXT(whichfork);
+			break;
+		}
+		if (error = xfs_bmbt_delete(cur, iflags & XFS_BMAPI_ASYNC, &i))
+			goto done;
+		ASSERT(i == 1);
+		break;
+
+	case 2:
+		/*
+		 * Deleting the first part of the extent.
+		 */
+		xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork);
+		xfs_bmbt_set_startoff(ep, del_endoff);
+		temp = got.br_blockcount - del->br_blockcount;
+		xfs_bmbt_set_blockcount(ep, temp);
+		ifp->if_lastex = idx;
+		if (delay) {
+			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+				da_old);
+			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+			xfs_bmap_trace_post_update(fname, "2", ip, idx,
+				whichfork);
+			da_new = temp;
+			break;
+		}
+		xfs_bmbt_set_startblock(ep, del_endblock);
+		xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork);
+		if (!cur) {
+			flags |= XFS_ILOG_FEXT(whichfork);
+			break;
+		}
+		if (error = xfs_bmbt_update(cur, del_endoff, del_endblock,
+				got.br_blockcount - del->br_blockcount,
+				got.br_state))
+			goto done;
+		break;
+
+	case 1:
+		/*
+		 * Deleting the last part of the extent.
+		 */
+		temp = got.br_blockcount - del->br_blockcount;
+		xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork);
+		xfs_bmbt_set_blockcount(ep, temp);
+		ifp->if_lastex = idx;
+		if (delay) {
+			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+				da_old);
+			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+			xfs_bmap_trace_post_update(fname, "1", ip, idx,
+				whichfork);
+			da_new = temp;
+			break;
+		}
+		xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork);
+		if (!cur) {
+			flags |= XFS_ILOG_FEXT(whichfork);
+			break;
+		}
+		if (error = xfs_bmbt_update(cur, got.br_startoff,
+				got.br_startblock,
+				got.br_blockcount - del->br_blockcount,
+				got.br_state))
+			goto done;
+		break;
+	
+	case 0:
+		/*
+		 * Deleting the middle of the extent.
+		 */
+		temp = del->br_startoff - got.br_startoff;
+		xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork);
+		xfs_bmbt_set_blockcount(ep, temp);
+		new.br_startoff = del_endoff;
+		temp2 = got_endoff - del_endoff;
+		new.br_blockcount = temp2;
+		new.br_state = got.br_state;
+		if (!delay) {
+			new.br_startblock = del_endblock;
+			flags |= XFS_ILOG_CORE;
+			if (cur) {
+				if (error = xfs_bmbt_update(cur,
+						got.br_startoff,
+						got.br_startblock, temp,
+						got.br_state))
+					goto done;
+				if (error = xfs_bmbt_increment(cur, 0, &i))
+					goto done;
+				cur->bc_rec.b = new;
+				error = xfs_bmbt_insert(cur, &i);
+				if (error && error != ENOSPC)
+					goto done;
+				/*
+				 * If get no-space back from btree insert,
+				 * it tried a split, and we have a zero
+				 * block reservation.
+				 * Fix up our state and return the error.
+				 */
+				if (error == ENOSPC) {
+					/* 
+					 * Reset the cursor, don't trust
+					 * it after any insert operation.
+					 */
+					if (error = xfs_bmbt_lookup_eq(cur,
+							got.br_startoff,
+							got.br_startblock,
+							temp, &i))
+						goto done;
+					ASSERT(i == 1);
+					/*
+					 * Update the btree record back
+					 * to the original value.
+					 */
+					if (error = xfs_bmbt_update(cur,
+							got.br_startoff,
+							got.br_startblock,
+							got.br_blockcount,
+							got.br_state))
+						goto done;
+					/*
+					 * Reset the extent record back
+					 * to the original value.
+					 */
+					xfs_bmbt_set_blockcount(ep,
+						got.br_blockcount);
+					flags = 0;
+					error = XFS_ERROR(ENOSPC);
+					goto done;
+				}
+				ASSERT(i == 1);
+			} else
+				flags |= XFS_ILOG_FEXT(whichfork);
+			XFS_IFORK_NEXT_SET(ip, whichfork,
+				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+		} else {
+			ASSERT(whichfork == XFS_DATA_FORK);
+			temp = xfs_bmap_worst_indlen(ip, temp);
+			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+			temp2 = xfs_bmap_worst_indlen(ip, temp2);
+			new.br_startblock = NULLSTARTBLOCK((int)temp2);
+			da_new = temp + temp2;
+			while (da_new > da_old) {
+				if (temp) {
+					temp--;
+					da_new--;
+					xfs_bmbt_set_startblock(ep,
+						NULLSTARTBLOCK((int)temp));
+				}
+				if (da_new == da_old)
+					break;
+				if (temp2) {
+					temp2--;
+					da_new--;
+					new.br_startblock = 
+						NULLSTARTBLOCK((int)temp2);
+				}
+			}
+		}
+		xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork);
+		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL,
+			whichfork);
+		xfs_bmap_insert_exlist(ip, idx + 1, 1, &new, whichfork);
+		ifp->if_lastex = idx + 1;
+		break;
+	}
+	/*
+	 * If we need to, add to list of extents to delete.
+	 */
+	if (do_fx)
+		xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
+			mp);
+	/*
+	 * Adjust inode # blocks in the file.
+	 */
+	if (nblks)
+		ip->i_d.di_nblocks -= nblks;
+	/*
+	 * Adjust quota data.
+	 */
+	if (qfield)
+		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
+	/*
+	 * Account for change in delayed indirect blocks.
+	 * Nothing to do for disk quota accounting here.
+	 */
+	ASSERT(da_old >= da_new);
+	if (da_old > da_new) 
+		xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new),
+			rsvd);
+done:
+	*logflagsp = flags;
+	return error;
+}
+
+/*
+ * Remove the entry "free" from the free item list.  Prev points to the
+ * previous entry, unless "free" is the head of the list.
+ */
+STATIC void
+xfs_bmap_del_free(
+	xfs_bmap_free_t		*flist,	/* free item list header */
+	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
+	xfs_bmap_free_item_t	*free)	/* list item to be freed */
+{
+	if (prev)
+		prev->xbfi_next = free->xbfi_next;
+	else
+		flist->xbf_first = free->xbfi_next;
+	flist->xbf_count--;
+	kmem_zone_free(xfs_bmap_free_item_zone, free);
+}
+
+/*
+ * Remove count entries from the extents array for inode "ip", starting
+ * at index "idx".  Copies the remaining items down over the deleted ones,
+ * and gives back the excess memory.
+ */
+STATIC void
+xfs_bmap_delete_exlist(
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_extnum_t	idx,		/* starting delete index */
+	xfs_extnum_t	count,		/* count of items to delete */
+	int		whichfork)	/* data or attr fork */
+{
+	xfs_bmbt_rec_t	*base;		/* base of extent list */
+	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	xfs_extnum_t	nextents;	/* number of extents in list after */
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	base = ifp->if_u1.if_extents;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - count;
+	ovbcopy(&base[idx + count], &base[idx],
+		(nextents - idx) * sizeof(*base));
+	xfs_iext_realloc(ip, -count, whichfork);
+}
+
+/*
+ * Convert an extents-format file into a btree-format file.
+ * The new file will have a root block (in the inode) and a single child block.
+ */
+STATIC int					/* error */
+xfs_bmap_extents_to_btree(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_inode_t		*ip,		/* incore inode pointer */
+	xfs_fsblock_t		*firstblock,	/* first-block-allocated */
+	xfs_bmap_free_t		*flist,		/* blocks freed in xaction */
+	xfs_btree_cur_t		**curp,		/* cursor returned to caller */
+	int			wasdel,		/* converting a delayed alloc */
+	int			*logflagsp,	/* inode logging flags */
+	int			whichfork)	/* data or attr fork */
+{
+	xfs_bmbt_block_t	*ablock;	/* allocated (child) bt block */
+	xfs_buf_t			*abp;		/* buffer for ablock */
+	xfs_alloc_arg_t		args;		/* allocation arguments */
+	xfs_bmbt_rec_t		*arp;		/* child record pointer */
+	xfs_bmbt_block_t	*block;		/* btree root block */
+	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
+	xfs_bmbt_rec_t		*ep;		/* extent list pointer */
+	int			error;		/* error return value */
+	xfs_extnum_t		i;		/* extent list index */
+	xfs_ifork_t		*ifp;		/* inode fork pointer */
+	xfs_bmbt_key_t		*kp;		/* root block key pointer */
+	xfs_mount_t		*mp;		/* mount structure */
+	xfs_extnum_t		nextents;	/* extent list size */
+	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
+	ASSERT(ifp->if_ext_max ==
+	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+	/*
+	 * Make space in the inode incore.
+	 */
+	xfs_iroot_realloc(ip, 1, whichfork);
+	ifp->if_flags |= XFS_IFBROOT;
+	/*
+	 * Fill in the root.
+	 */
+	block = ifp->if_broot;
+	INT_SET(block->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+	INT_SET(block->bb_level, ARCH_CONVERT, 1);
+	INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+	INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
+        INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+	/*
+	 * Need a cursor.  Can't allocate until bb_level is filled in.
+	 */
+	mp = ip->i_mount;
+	cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
+		whichfork);
+	cur->bc_private.b.firstblock = *firstblock;
+	cur->bc_private.b.flist = flist;
+	cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
+	/*
+	 * Convert to a btree with two levels, one record in root.
+	 */
+	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
+	args.tp = tp;
+	args.mp = mp;
+	if (*firstblock == NULLFSBLOCK) {
+		args.type = XFS_ALLOCTYPE_START_BNO;
+		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
+	} else if (flist->xbf_low) {
+		args.type = XFS_ALLOCTYPE_START_BNO;
+		args.fsbno = *firstblock;
+	} else {
+		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		args.fsbno = *firstblock;
+	}
+	args.minlen = args.maxlen = args.prod = 1;
+	args.total = args.minleft = args.alignment = args.mod = args.isfl =
+		args.minalignslop = 0;
+	args.wasdel = wasdel;
+	*logflagsp = 0;
+	if (error = xfs_alloc_vextent(&args)) {
+		xfs_iroot_realloc(ip, -1, whichfork);
+		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+		return error;
+	}
+	/*
+	 * Allocation can't fail, the space was reserved.
+	 */
+	ASSERT(args.fsbno != NULLFSBLOCK);
+	ASSERT(*firstblock == NULLFSBLOCK ||
+	       args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
+	       (flist->xbf_low &&
+	        args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
+	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
+	cur->bc_private.b.allocated++;
+	ip->i_d.di_nblocks++;
+	if (XFS_IS_QUOTA_ON(mp) &&
+	    ip->i_ino != mp->m_sb.sb_uquotino &&
+	    ip->i_ino != mp->m_sb.sb_pquotino)
+		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
+	/*
+	 * Fill in the child block.
+	 */
+	ablock = XFS_BUF_TO_BMBT_BLOCK(abp);
+	INT_SET(ablock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+	INT_ZERO(ablock->bb_level, ARCH_CONVERT);
+	INT_ZERO(ablock->bb_numrecs, ARCH_CONVERT);
+	INT_SET(ablock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
+        INT_SET(ablock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	for (ep = ifp->if_u1.if_extents, i = 0; i < nextents; i++, ep++) {
+		if (!ISNULLSTARTBLOCK(xfs_bmbt_get_startblock(ep))) {
+			*arp++ = *ep;
+			INT_MOD(ablock->bb_numrecs, ARCH_CONVERT, +1);
+		}
+	}
+	ASSERT(INT_GET(ablock->bb_numrecs, ARCH_CONVERT) == XFS_IFORK_NEXTENTS(ip, whichfork));
+	/*
+	 * Fill in the root key and pointer.
+	 */
+	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
+	INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(arp));
+	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+	INT_SET(*pp, ARCH_CONVERT, args.fsbno);
+	/*
+	 * Do all this logging at the end so that 
+	 * the root is at the right level.
+	 */
+	xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS);
+	xfs_bmbt_log_recs(cur, abp, 1, INT_GET(ablock->bb_numrecs, ARCH_CONVERT));
+	ASSERT(*curp == NULL);
+	*curp = cur;
+	*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
+	return 0;
+}
+
+/*
+ * Insert new item(s) in the extent list for inode "ip".
+ * Count new items are inserted at offset idx.
+ */
+STATIC void
+xfs_bmap_insert_exlist(
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_extnum_t	idx,		/* starting index of new items */
+	xfs_extnum_t	count,		/* number of inserted items */
+	xfs_bmbt_irec_t	*new,		/* items to insert */
+	int		whichfork)	/* data or attr fork */
+{
+	xfs_bmbt_rec_t	*base;		/* extent list base */
+	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	xfs_extnum_t	nextents;	/* extent list size */
+	xfs_extnum_t	to;		/* extent list index */
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	xfs_iext_realloc(ip, count, whichfork);
+	base = ifp->if_u1.if_extents;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ovbcopy(&base[idx], &base[idx + count],
+		(nextents - (idx + count)) * sizeof(*base));
+	for (to = idx; to < idx + count; to++, new++)
+		xfs_bmbt_set_all(&base[to], new);
+}
+
+/*
+ * Convert a local file to an extents file.
+ * This code is out of bounds for data forks of regular files,
+ * since the file data needs to get logged so things will stay consistent.
+ * (The bmap-level manipulations are ok, though).
+ */
+STATIC int				/* error */
+xfs_bmap_local_to_extents(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_fsblock_t	*firstblock,	/* first block allocated in xaction */
+	xfs_extlen_t	total,		/* total blocks needed by transaction */
+	int		*logflagsp,	/* inode logging flags */
+	int		whichfork)	/* data or attr fork */
+{
+	int		error;		/* error return value */
+	int		flags;		/* logging flags returned */
+#ifdef XFS_BMAP_TRACE
+	static char	fname[] = "xfs_bmap_local_to_extents";
+#endif
+	xfs_ifork_t	*ifp;		/* inode fork pointer */
+
+	/*
+	 * We don't want to deal with the case of keeping inode data inline yet.
+	 * So sending the data fork of a regular inode is illegal.
+	 */
+	ASSERT(!((ip->i_d.di_mode & IFMT) == IFREG && 
+		 whichfork == XFS_DATA_FORK));
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+	flags = 0;
+	error = 0;
+	if (ifp->if_bytes) {
+		xfs_alloc_arg_t	args;	/* allocation arguments */
+		xfs_buf_t		*bp;	/* buffer for extent list block */
+		xfs_bmbt_rec_t	*ep;	/* extent list pointer */
+
+		args.tp = tp;
+		args.mp = ip->i_mount;
+		ASSERT(ifp->if_flags & XFS_IFINLINE);
+		/*
+		 * Allocate a block.  We know we need only one, since the
+		 * file currently fits in an inode.
+		 */
+		if (*firstblock == NULLFSBLOCK) {
+			args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
+			args.type = XFS_ALLOCTYPE_START_BNO;
+		} else {
+			args.fsbno = *firstblock;
+			args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		}
+		args.total = total;
+		args.mod = args.minleft = args.alignment = args.wasdel =
+			args.isfl = args.minalignslop = 0;
+		args.minlen = args.maxlen = args.prod = 1;
+		if (error = xfs_alloc_vextent(&args))
+			goto done;
+		/* 
+		 * Can't fail, the space was reserved.
+		 */
+		ASSERT(args.fsbno != NULLFSBLOCK);
+		ASSERT(args.len == 1);
+		*firstblock = args.fsbno;
+		bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
+		bcopy(ifp->if_u1.if_data, (char *)XFS_BUF_PTR(bp),
+			ifp->if_bytes);
+		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
+		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
+		xfs_iext_realloc(ip, 1, whichfork);
+		ep = ifp->if_u1.if_extents;
+		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
+		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
+		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
+		ip->i_d.di_nblocks = 1;
+		if (XFS_IS_QUOTA_ON(args.mp) &&
+		    ip->i_ino != args.mp->m_sb.sb_uquotino &&
+		    ip->i_ino != args.mp->m_sb.sb_pquotino)
+			xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
+				1L);
+		flags |= XFS_ILOG_FEXT(whichfork);
+	} else
+		ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
+	ifp->if_flags &= ~XFS_IFINLINE;
+	ifp->if_flags |= XFS_IFEXTENTS;
+	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+	flags |= XFS_ILOG_CORE;
+done:
+	*logflagsp = flags;
+	return error;
+}
+
+xfs_bmbt_rec_t *			/* pointer to found extent entry */
+xfs_bmap_do_search_extents(
+	xfs_bmbt_rec_t	*base,		/* base of extent list */
+	xfs_extnum_t	lastx,		/* last extent index used */
+	xfs_extnum_t	nextents,	/* extent list size */
+	xfs_fileoff_t	bno,		/* block number searched for */
+	int		*eofp,		/* out: end of file found */
+	xfs_extnum_t	*lastxp,	/* out: last extent index */
+	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
+	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
+{
+	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
+	xfs_bmbt_irec_t	got;		/* extent list entry, decoded */
+	int		high;		/* high index of binary search */
+	int		low;		/* low index of binary search */
+
+	if (lastx != NULLEXTNUM && lastx < nextents)
+		ep = base + lastx;
+	else
+		ep = NULL;
+	prevp->br_startoff = NULLFILEOFF;
+	if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) &&
+	    bno < got.br_startoff +
+		  (got.br_blockcount = xfs_bmbt_get_blockcount(ep)))
+		*eofp = 0;
+	else if (ep && lastx < nextents - 1 &&
+		 bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) &&
+		 bno < got.br_startoff +
+		       (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) {
+		lastx++;
+		ep++;
+		*eofp = 0;
+	} else if (nextents == 0)
+		*eofp = 1;
+	else if (bno == 0 &&
+		 (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) {
+		ep = base;
+		lastx = 0;
+		got.br_blockcount = xfs_bmbt_get_blockcount(ep);
+		*eofp = 0;
+	} else {
+		/* binary search the extents array */
+		low = 0;
+		high = nextents - 1;
+		while (low <= high) {
+			XFS_STATS_INC(xs_cmp_exlist);
+			lastx = (low + high) >> 1;
+			ep = base + lastx;
+			got.br_startoff = xfs_bmbt_get_startoff(ep);
+			got.br_blockcount = xfs_bmbt_get_blockcount(ep);
+			if (bno < got.br_startoff)
+				high = lastx - 1;
+			else if (bno >= got.br_startoff + got.br_blockcount)
+				low = lastx + 1;
+			else {
+				got.br_startblock = xfs_bmbt_get_startblock(ep);
+				got.br_state = xfs_bmbt_get_state(ep);
+				*eofp = 0;
+				*lastxp = lastx;
+				*gotp = got;
+				return ep;
+			}
+		}
+		if (bno >= got.br_startoff + got.br_blockcount) {
+			lastx++;
+			if (lastx == nextents) {
+				*eofp = 1;
+				got.br_startblock = xfs_bmbt_get_startblock(ep);
+				got.br_state = xfs_bmbt_get_state(ep);
+				*prevp = got;
+				ep = NULL;
+			} else {
+				*eofp = 0;
+				xfs_bmbt_get_all(ep, prevp);
+				ep++;
+				got.br_startoff = xfs_bmbt_get_startoff(ep);
+				got.br_blockcount = xfs_bmbt_get_blockcount(ep);
+			}
+		} else {
+			*eofp = 0;
+			if (ep > base)
+				xfs_bmbt_get_all(ep - 1, prevp);
+		}
+	}
+	if (ep) {
+		got.br_startblock = xfs_bmbt_get_startblock(ep);
+		got.br_state = xfs_bmbt_get_state(ep);
+	}
+	*lastxp = lastx;
+	*gotp = got;
+	return ep;
+}
+
+/*
+ * Search the extents list for the inode, for the extent containing bno.
+ * If bno lies in a hole, point to the next entry.  If bno lies past eof,
+ * *eofp will be set, and *prevp will contain the last entry (null if none).
+ * Else, *lastxp will be set to the index of the found
+ * entry; *gotp will contain the entry.
+ */
+STATIC xfs_bmbt_rec_t *                 /* pointer to found extent entry */
+xfs_bmap_search_extents(
+        xfs_inode_t     *ip,            /* incore inode pointer */
+        xfs_fileoff_t   bno,            /* block number searched for */
+        int             whichfork,      /* data or attr fork */
+        int             *eofp,          /* out: end of file found */
+        xfs_extnum_t    *lastxp,        /* out: last extent index */
+        xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
+        xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
+{ 
+	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	xfs_bmbt_rec_t  *base;          /* base of extent list */
+	xfs_extnum_t    lastx;          /* last extent index used */
+        xfs_extnum_t    nextents;       /* extent list size */
+
+	XFS_STATS_INC(xs_look_exlist);
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	lastx = ifp->if_lastex;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	base = &ifp->if_u1.if_extents[0];
+
+	return xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
+					  lastxp, gotp, prevp);
+}
+
+/*
+ * Compute the worst-case number of indirect blocks that will be used
+ * for ip's delayed extent of length "len".
+ */
+STATIC xfs_filblks_t
+xfs_bmap_worst_indlen(
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_filblks_t	len)		/* delayed extent length */
+{
+	int		level;		/* btree level number */
+	int		maxrecs;	/* maximum record count at this level */
+	xfs_mount_t	*mp;		/* mount structure */
+	xfs_filblks_t	rval;		/* return value */
+
+	mp = ip->i_mount;
+	maxrecs = mp->m_bmap_dmxr[0];
+	for (level = 0, rval = 0;
+	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
+	     level++) {
+		len += maxrecs - 1;
+		do_div(len, maxrecs);
+		rval += len;
+		if (len == 1)
+			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+				level - 1;
+		if (level == 0)
+			maxrecs = mp->m_bmap_dmxr[1];
+	}
+	return rval;
+}
+
+/*
+ * Add the extent to the list of extents to be free at transaction end.
+ * The list is maintained sorted (by block number).
+ */
+/* ARGSUSED */
+void
+xfs_bmap_add_free(
+	xfs_fsblock_t		bno,		/* fs block number of extent */
+	xfs_filblks_t		len,		/* length of extent */
+	xfs_bmap_free_t		*flist,		/* list of extents */
+	xfs_mount_t		*mp)		/* mount point structure */
+{
+	xfs_bmap_free_item_t	*cur;		/* current (next) element */
+	xfs_bmap_free_item_t	*new;		/* new element */
+	xfs_bmap_free_item_t	*prev;		/* previous element */
+#ifdef DEBUG
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+
+	ASSERT(bno != NULLFSBLOCK);
+	ASSERT(len > 0);
+	ASSERT(len <= MAXEXTLEN);
+	ASSERT(!ISNULLSTARTBLOCK(bno));
+	agno = XFS_FSB_TO_AGNO(mp, bno);
+	agbno = XFS_FSB_TO_AGBNO(mp, bno);
+	ASSERT(agno < mp->m_sb.sb_agcount);
+	ASSERT(agbno < mp->m_sb.sb_agblocks);
+	ASSERT(len < mp->m_sb.sb_agblocks);
+	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
+#endif
+	ASSERT(xfs_bmap_free_item_zone != NULL);
+	new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
+	new->xbfi_startblock = bno;
+	new->xbfi_blockcount = (xfs_extlen_t)len;
+	for (prev = NULL, cur = flist->xbf_first;
+	     cur != NULL;
+	     prev = cur, cur = cur->xbfi_next) {
+		if (cur->xbfi_startblock >= bno)
+			break;
+	}
+	if (prev)
+		prev->xbfi_next = new;
+	else
+		flist->xbf_first = new;
+	new->xbfi_next = cur;
+	flist->xbf_count++;
+}
+
+/* 
+ * Compute and fill in the value of the maximum depth of a bmap btree
+ * in this filesystem.  Done once, during mount.
+ */
+void
+xfs_bmap_compute_maxlevels(
+	xfs_mount_t	*mp,		/* file system mount structure */
+	int		whichfork)	/* data or attr fork */
+{
+	int		level;		/* btree level */
+	uint		maxblocks;	/* max blocks at this level */
+	uint		maxleafents;	/* max leaf entries possible */
+	int		maxrootrecs;	/* max records in root block */
+	int		minleafrecs;	/* min records in leaf block */
+	int		minnoderecs;	/* min records in node block */
+	int		sz;		/* root block size */
+
+	/*
+	 * The maximum number of extents in a file, hence the maximum
+	 * number of leaf entries, is controlled by the type of di_nextents
+	 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
+	 * (a signed 16-bit number, xfs_aextnum_t).
+	 */
+	maxleafents = (whichfork == XFS_DATA_FORK) ? MAXEXTNUM : MAXAEXTNUM;
+	minleafrecs = mp->m_bmap_dmnr[0];
+	minnoderecs = mp->m_bmap_dmnr[1];
+	sz = (whichfork == XFS_DATA_FORK) ?
+		mp->m_attroffset :
+		mp->m_sb.sb_inodesize - mp->m_attroffset;
+	maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
+	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+	for (level = 1; maxblocks > 1; level++) {
+		if (maxblocks <= maxrootrecs)
+			maxblocks = 1;
+		else
+			maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+	}
+	mp->m_bm_maxlevels[whichfork] = level;
+}
+
+/*
+ * Returns the file-relative block number of the first unused block(s)
+ * in the file with at least "len" logically contiguous blocks free.
+ * This is the lowest-address hole if the file has holes, else the first block
+ * past the end of file.
+ * Return 0 if the file is currently local (in-inode).
+ */
+int						/* error */
+xfs_bmap_first_unused(
+	xfs_trans_t	*tp,			/* transaction pointer */
+	xfs_inode_t	*ip,			/* incore inode */
+	xfs_extlen_t	len,			/* size of hole to find */
+	xfs_fileoff_t	*first_unused,		/* unused block */
+	int		whichfork)		/* data or attr fork */
+{
+	xfs_bmbt_rec_t	*base;			/* base of extent array */
+	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
+	int		error;			/* error return value */
+	xfs_ifork_t	*ifp;			/* inode fork pointer */
+	xfs_fileoff_t	lastaddr;		/* last block number seen */
+	xfs_fileoff_t	lowest;			/* lowest useful block */
+	xfs_fileoff_t	max;			/* starting useful block */
+	xfs_fileoff_t	off;			/* offset for this block */
+	xfs_extnum_t	nextents;		/* number of extent entries */
+
+	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
+	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
+	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+		*first_unused = 0;
+		return 0;
+	}
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+	    (error = xfs_iread_extents(tp, ip, whichfork)))
+		return error;
+	lowest = *first_unused;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	base = &ifp->if_u1.if_extents[0];
+	for (lastaddr = 0, max = lowest, ep = base;
+	     ep < &base[nextents];
+	     ep++) {
+		off = xfs_bmbt_get_startoff(ep);
+		/*
+		 * See if the hole before this extent will work.
+		 */
+		if (off >= lowest + len && off - max >= len) {
+			*first_unused = max;
+			return 0;
+		}
+		lastaddr = off + xfs_bmbt_get_blockcount(ep);
+		max = XFS_FILEOFF_MAX(lastaddr, lowest);
+	}
+	*first_unused = max;
+	return 0;
+}
+
+/*
+ * Returns the file-relative block number of the last block + 1 before
+ * last_block (input value) in the file.
+ * This is not based on i_size, it is based on the extent list.
+ * Returns 0 for local files, as they do not have an extent list.
+ */
+int						/* error */
+xfs_bmap_last_before(
+	xfs_trans_t	*tp,			/* transaction pointer */
+	xfs_inode_t	*ip,			/* incore inode */
+	xfs_fileoff_t	*last_block,		/* last block */
+	int		whichfork)		/* data or attr fork */
+{
+	xfs_fileoff_t	bno;			/* input file offset */
+	int		eof;			/* hit end of file */
+	xfs_bmbt_rec_t	*ep;			/* pointer to last extent */
+	int		error;			/* error return value */
+	xfs_bmbt_irec_t	got;			/* current extent value */
+	xfs_ifork_t	*ifp;			/* inode fork pointer */
+	xfs_extnum_t	lastx;			/* last extent used */
+	xfs_bmbt_irec_t	prev;			/* previous extent value */
+
+	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+	       return XFS_ERROR(EIO);
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+		*last_block = 0;
+		return 0;
+	}
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+	    (error = xfs_iread_extents(tp, ip, whichfork)))
+		return error;
+	bno = *last_block - 1;
+	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+		&prev);
+	if (eof || xfs_bmbt_get_startoff(ep) > bno) {
+		if (prev.br_startoff == NULLFILEOFF)
+			*last_block = 0;
+		else
+			*last_block = prev.br_startoff + prev.br_blockcount;
+	}
+	/*
+	 * Otherwise *last_block is already the right answer.
+	 */
+	return 0;
+}
+
+/*
+ * Returns the file-relative block number of the first block past eof in
+ * the file.  This is not based on i_size, it is based on the extent list.
+ * Returns 0 for local files, as they do not have an extent list.
+ */
+int						/* error */
+xfs_bmap_last_offset(
+	xfs_trans_t	*tp,			/* transaction pointer */
+	xfs_inode_t	*ip,			/* incore inode */
+	xfs_fileoff_t	*last_block,		/* last block */
+	int		whichfork)		/* data or attr fork */
+{
+	xfs_bmbt_rec_t	*base;			/* base of extent array */
+	xfs_bmbt_rec_t	*ep;			/* pointer to last extent */
+	int		error;			/* error return value */
+	xfs_ifork_t	*ifp;			/* inode fork pointer */
+	xfs_extnum_t	nextents;		/* number of extent entries */
+
+	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+	       return XFS_ERROR(EIO);
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+		*last_block = 0;
+		return 0;
+	}
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+	    (error = xfs_iread_extents(tp, ip, whichfork)))
+		return error;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	if (!nextents) {
+		*last_block = 0;
+		return 0;
+	}
+	base = &ifp->if_u1.if_extents[0];
+	ASSERT(base != NULL);
+	ep = &base[nextents - 1];
+	*last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep);
+	return 0;
+}
+
+/*
+ * Returns whether the selected fork of the inode has exactly one
+ * block or not.  For the data fork we check this matches di_size,
+ * implying the file's range is 0..bsize-1.
+ */
+int					/* 1=>1 block, 0=>otherwise */
+xfs_bmap_one_block(
+	xfs_inode_t	*ip,		/* incore inode */
+	int		whichfork)	/* data or attr fork */
+{
+	xfs_bmbt_rec_t	*ep;		/* ptr to fork's extent */
+	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	int		rval;		/* return value */
+	xfs_bmbt_irec_t	s;		/* internal version of extent */
+
+#ifndef DEBUG
+	if (whichfork == XFS_DATA_FORK)
+		return ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize;
+#endif	/* !DEBUG */
+	if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
+		return 0;
+	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+		return 0;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	ep = ifp->if_u1.if_extents;
+	xfs_bmbt_get_all(ep, &s);
+	rval = s.br_startoff == 0 && s.br_blockcount == 1;
+	if (rval && whichfork == XFS_DATA_FORK)
+		ASSERT(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
+	return rval;
+}
+
+/*
+ * Read in the extents to if_extents.
+ * All inode fields are set up by caller, we just traverse the btree
+ * and copy the records in. If the file system cannot contain unwritten
+ * extents, the records are checked for no "state" flags.
+ */
+int					/* error */
+xfs_bmap_read_extents(
+	xfs_trans_t		*tp,	/* transaction pointer */
+	xfs_inode_t		*ip,	/* incore inode */
+	int			whichfork) /* data or attr fork */
+{
+	xfs_bmbt_block_t	*block;	/* current btree block */
+	xfs_fsblock_t		bno;	/* block # of "block" */
+	xfs_buf_t			*bp;	/* buffer for "block" */
+	int			error;	/* error return value */
+	xfs_exntfmt_t		exntf;	/* XFS_EXTFMT_NOSTATE, if checking */
+#ifdef XFS_BMAP_TRACE
+	static char		fname[] = "xfs_bmap_read_extents";
+#endif
+	xfs_extnum_t		i;	/* index into the extents list */
+	xfs_ifork_t		*ifp;	/* fork structure */
+	int			level;	/* btree level, for checking */
+	xfs_mount_t		*mp;	/* file system mount structure */
+	xfs_bmbt_ptr_t		*pp;	/* pointer to block address */
+	/* REFERENCED */
+	xfs_extnum_t		room;	/* number of entries there's room for */
+	xfs_bmbt_rec_t		*trp;	/* target record pointer */
+
+	bno = NULLFSBLOCK;
+	mp = ip->i_mount;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
+					XFS_EXTFMT_INODE(ip);
+	block = ifp->if_broot;
+	/*
+	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+	 */
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+	level = INT_GET(block->bb_level, ARCH_CONVERT);
+	pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
+	ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
+	ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
+	ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
+	bno = INT_GET(*pp, ARCH_CONVERT);
+	/*
+	 * Go down the tree until leaf level is reached, following the first
+	 * pointer (leftmost) at each level.
+	 */
+	while (level-- > 0) {
+		if (error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+				XFS_BMAP_BTREE_REF))
+			return error;
+		block = XFS_BUF_TO_BMBT_BLOCK(bp);
+		XFS_WANT_CORRUPTED_GOTO(
+			XFS_BMAP_SANITY_CHECK(mp, block, level),
+			error0);
+		if (level == 0)
+			break;
+		pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
+			1, mp->m_bmap_dmxr[1]);
+		XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), error0);
+		bno = INT_GET(*pp, ARCH_CONVERT);
+		xfs_trans_brelse(tp, bp);
+	}
+	/*
+	 * Here with bp and block set to the leftmost leaf node in the tree.
+	 */
+	room = ifp->if_bytes / (uint)sizeof(*trp);
+	trp = ifp->if_u1.if_extents;
+	i = 0;
+	/*
+	 * Loop over all leaf nodes.  Copy information to the extent list.
+	 */
+	for (;;) {
+		xfs_bmbt_rec_t	*frp;
+		xfs_fsblock_t	nextbno;
+		xfs_extnum_t	num_recs;
+
+
+		num_recs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+		if (i + num_recs > room) {
+			ASSERT(i + num_recs <= room);
+			xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+	"corrupt dinode %Lu, (btree extents). Unmount and run xfs_repair.",
+			ip->i_ino);
+			goto error0;
+		}
+		XFS_WANT_CORRUPTED_GOTO(
+			XFS_BMAP_SANITY_CHECK(mp, block, 0),
+			error0);
+		/*
+		 * Read-ahead the next leaf block, if any.
+		 */
+		nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+		if (nextbno != NULLFSBLOCK)
+			xfs_btree_reada_bufl(mp, nextbno, 1);
+		/*
+		 * Copy records into the extent list.
+		 */
+		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+			block, 1, mp->m_bmap_dmxr[0]);
+		bcopy(frp, trp, num_recs * sizeof(*frp));
+		if (exntf == XFS_EXTFMT_NOSTATE) {
+			/*
+			 * Check all attribute bmap btree records and
+			 * any "older" data bmap btree records for a 
+			 * set bit in the "extent flag" position.
+			 */
+			if (xfs_check_nostate_extents(trp, num_recs)) {
+				goto error0;
+			}
+		}
+		trp += num_recs;
+		i += num_recs;
+		xfs_trans_brelse(tp, bp);
+		bno = nextbno;
+		/*
+		 * If we've reached the end, stop.
+		 */
+		if (bno == NULLFSBLOCK)
+			break;
+		if (error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+				XFS_BMAP_BTREE_REF))
+			return error;
+		block = XFS_BUF_TO_BMBT_BLOCK(bp);
+	}
+	ASSERT(i == ifp->if_bytes / (uint)sizeof(*trp));
+	ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
+	xfs_bmap_trace_exlist(fname, ip, i, whichfork);
+	return 0;
+error0:
+	xfs_trans_brelse(tp, bp);
+	return XFS_ERROR(EFSCORRUPTED);
+}
+
+/*
+ * Map file blocks to filesystem blocks.
+ * File range is given by the bno/len pair.
+ * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
+ * into a hole or past eof.
+ * Only allocates blocks from a single allocation group,
+ * to avoid locking problems.
+ * The returned value in "firstblock" from the first call in a transaction
+ * must be remembered and presented to subsequent calls in "firstblock".
+ * An upper bound for the number of blocks to be allocated is supplied to
+ * the first call in "total"; if no allocation group has that many free
+ * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
+ */
+int					/* error */
+xfs_bmapi(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*ip,		/* incore inode */
+	xfs_fileoff_t	bno,		/* starting file offs. mapped */
+	xfs_filblks_t	len,		/* length to map in file */
+	int		flags,		/* XFS_BMAPI_... */
+	xfs_fsblock_t	*firstblock,	/* first allocated block
+					   controls a.g. for allocs */
+	xfs_extlen_t	total,		/* total blocks needed */
+	xfs_bmbt_irec_t	*mval,		/* output: map values */
+	int		*nmap,		/* i/o: mval size/count */
+	xfs_bmap_free_t	*flist)		/* i/o: list extents to free */
+{
+	xfs_fsblock_t	abno;		/* allocated block number */
+	xfs_extlen_t	alen;		/* allocated extent length */
+	xfs_fileoff_t	aoff;		/* allocated file offset */
+	xfs_bmalloca_t	bma;		/* args for xfs_bmap_alloc */
+	int		contig;		/* allocation must be one extent */
+	xfs_btree_cur_t	*cur;		/* bmap btree cursor */
+	char		delay;		/* this request is for delayed alloc */
+	xfs_fileoff_t	end;		/* end of mapped file region */
+	int		eof;		/* we've hit the end of extent list */
+	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
+	int		error;		/* error return */
+	char		exact;		/* don't do all of wasdelayed extent */
+	xfs_bmbt_irec_t	got;		/* current extent list record */
+	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	xfs_extlen_t	indlen;		/* indirect blocks length */
+	char		inhole;		/* current location is hole in file */
+	xfs_extnum_t	lastx;		/* last useful extent number */
+	int		logflags;	/* flags for transaction logging */
+	xfs_extlen_t	minleft;	/* min blocks left after allocation */
+	xfs_extlen_t	minlen;		/* min allocation size */
+	xfs_mount_t	*mp;		/* xfs mount structure */
+	int		n;		/* current extent index */
+	int		nallocs;	/* number of extents alloc\'d */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	xfs_fileoff_t	obno;		/* old block number (offset) */
+	xfs_bmbt_irec_t	prev;		/* previous extent list record */
+	int		stateless;	/* ignore state flag set */
+	int		tmp_logflags;	/* temp flags holder */
+	char		trim;		/* output trimmed to match range */
+	char		userdata;	/* allocating non-metadata */
+	char		wasdelay;	/* old extent was delayed */
+	int		whichfork;	/* data or attr fork */
+	char		wr;		/* this is a write request */
+	int		rsvd;		/* OK to allocate reserved blocks */
+#ifdef DEBUG
+	xfs_fileoff_t	orig_bno;	/* original block number value */
+	int		orig_flags;	/* original flags arg value */
+	xfs_filblks_t	orig_len;	/* original value of len arg */
+	xfs_bmbt_irec_t	*orig_mval;	/* original value of mval */
+	int		orig_nmap;	/* original value of *nmap */
+
+	orig_bno = bno;
+	orig_len = len;
+	orig_flags = flags;
+	orig_mval = mval;
+	orig_nmap = *nmap;
+#endif
+	ASSERT(*nmap >= 1);
+	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE));
+	whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+		XFS_ATTR_FORK : XFS_DATA_FORK;
+	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+		return XFS_ERROR(EFSCORRUPTED);
+	mp = ip->i_mount;
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(ifp->if_ext_max ==
+	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+	if (wr = (flags & XFS_BMAPI_WRITE) != 0)
+		XFS_STATS_INC(xs_blk_mapw);
+	else
+		XFS_STATS_INC(xs_blk_mapr);
+	delay = (flags & XFS_BMAPI_DELAY) != 0;
+	trim = (flags & XFS_BMAPI_ENTIRE) == 0;
+	userdata = (flags & XFS_BMAPI_METADATA) == 0;
+	exact = (flags & XFS_BMAPI_EXACT) != 0;
+	rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
+	contig = (flags & XFS_BMAPI_CONTIG) != 0;
+	/*
+	 * stateless is used to combine extents which
+	 * differ only due to the state of the extents.
+	 * This technique is used from xfs_getbmap()
+	 * when the caller does not wish to see the
+	 * separation (which is the default).
+	 *
+	 * This technique is also used when writing a 
+	 * buffer which has been partially written,
+	 * (usually by being flushed during a chunkread),
+	 * to ensure one write takes place. This also
+	 * prevents a change in the xfs inode extents at
+	 * this time, intentionally. This change occurs
+	 * on completion of the write operation, in
+	 * xfs_strat_comp(), where the xfs_bmapi() call
+	 * is transactioned, and the extents combined.
+	 */
+	stateless = (flags & XFS_BMAPI_IGSTATE) != 0;
+	if (stateless && wr)	/* if writing unwritten space, no */
+		wr = 0;		/* allocations are allowed */
+	ASSERT(wr || !delay);
+	logflags = 0;
+	nallocs = 0;
+	cur = NULL;
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+		ASSERT(wr && tp);
+		if (error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
+				&logflags, whichfork))
+			goto error0;
+	}
+	if (wr && *firstblock == NULLFSBLOCK) {
+		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
+			minleft = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
+		else
+			minleft = 1;
+	} else
+		minleft = 0;
+	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+	    (error = xfs_iread_extents(tp, ip, whichfork)))
+		goto error0;
+	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+		&prev);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	n = 0;
+	end = bno + len;
+	obno = bno;
+	bma.ip = NULL;
+	while (bno < end && n < *nmap) {
+		/* 
+		 * Reading past eof, act as though there's a hole
+		 * up to end.
+		 */
+		if (eof && !wr)
+			got.br_startoff = end;
+		inhole = eof || got.br_startoff > bno;
+		wasdelay = wr && !inhole && !delay &&
+			ISNULLSTARTBLOCK(got.br_startblock);
+		/*
+		 * First, deal with the hole before the allocated space 
+		 * that we found, if any.
+		 */
+		if (wr && (inhole || wasdelay)) {
+			/*
+			 * For the wasdelay case, we could also just
+			 * allocate the stuff asked for in this bmap call
+			 * but that wouldn't be as good.
+			 */
+			if (wasdelay && !exact) {
+				alen = (xfs_extlen_t)got.br_blockcount;
+				aoff = got.br_startoff;
+				if (lastx != NULLEXTNUM && lastx) {
+					ep = &ifp->if_u1.if_extents[lastx - 1];
+					xfs_bmbt_get_all(ep, &prev);
+				}
+			} else if (wasdelay) {
+				alen = (xfs_extlen_t)
+					XFS_FILBLKS_MIN(len,
+						(got.br_startoff +
+						 got.br_blockcount) - bno);
+				aoff = bno;
+			} else {
+				alen = (xfs_extlen_t)
+					XFS_FILBLKS_MIN(len, MAXEXTLEN);
+				if (!eof)
+					alen = (xfs_extlen_t)
+						XFS_FILBLKS_MIN(alen,
+							got.br_startoff - bno);
+				aoff = bno;
+			}
+			minlen = contig ? alen : 1;
+			if (delay) {
+				indlen = (xfs_extlen_t)
+					xfs_bmap_worst_indlen(ip, alen);
+				ASSERT(indlen > 0);
+				/*
+				 * Make a transaction-less quota reservation for
+				 * delayed allocation blocks. This number gets
+				 * adjusted later.
+				 * We return EDQUOT if we haven't allocated
+				 * blks already inside this loop;
+				 */
+				if (XFS_IS_QUOTA_ON(ip->i_mount) &&
+				    xfs_trans_reserve_blkquota(NULL, ip,
+					    (long)alen)) {
+					if (n == 0) {
+						*nmap = 0;
+						ASSERT(cur == NULL);
+						return XFS_ERROR(EDQUOT);
+					}
+					break;
+				}
+				if (xfs_mod_incore_sb(ip->i_mount,
+						XFS_SBS_FDBLOCKS,
+						-(alen + indlen), rsvd)) {
+					if (XFS_IS_QUOTA_ON(ip->i_mount))
+						xfs_trans_unreserve_blkquota(
+							NULL, ip, (long)alen);
+					break;
+				}
+				ip->i_delayed_blks += alen;
+				abno = NULLSTARTBLOCK(indlen);
+			} else {
+				/*
+				 * If first time, allocate and fill in
+				 * once-only bma fields.
+				 */
+				if (bma.ip == NULL) {
+					bma.tp = tp;
+					bma.ip = ip;
+					bma.prevp = &prev;
+					bma.gotp = &got;
+					bma.total = total;
+					bma.userdata = userdata;
+				}
+				/*
+				 * Fill in changeable bma fields.
+				 */
+				bma.eof = eof;
+				bma.firstblock = *firstblock;
+				bma.alen = alen;
+				bma.off = aoff;
+				bma.wasdel = wasdelay;
+				bma.minlen = minlen;
+				bma.low = flist->xbf_low;
+				bma.minleft = minleft;
+				/*
+				 * Only want to do the alignment at the
+				 * eof if it is userdata and allocation length 
+				 * is larger than a stripe unit.
+				 */
+				if (mp->m_dalign && alen >= mp->m_dalign &&
+				    userdata && whichfork == XFS_DATA_FORK) {
+					if (error = xfs_bmap_isaeof(ip, aoff,
+							whichfork, &bma.aeof))
+						goto error0;
+				} else
+					bma.aeof = 0;
+				/*
+				 * Call allocator.
+				 */
+				if (error = xfs_bmap_alloc(&bma))
+					goto error0;
+				/*
+				 * Copy out result fields.
+				 */
+				abno = bma.rval;
+				if (flist->xbf_low = bma.low)
+					minleft = 0;
+				alen = bma.alen;
+				aoff = bma.off;
+				ASSERT(*firstblock == NULLFSBLOCK ||
+				       XFS_FSB_TO_AGNO(ip->i_mount,
+					       *firstblock) ==
+				       XFS_FSB_TO_AGNO(ip->i_mount,
+					       bma.firstblock) ||
+				       (flist->xbf_low &&
+				        XFS_FSB_TO_AGNO(ip->i_mount,
+						*firstblock) <
+					XFS_FSB_TO_AGNO(ip->i_mount,
+						bma.firstblock)));
+				*firstblock = bma.firstblock;
+				if (cur)
+					cur->bc_private.b.firstblock =
+						*firstblock;
+				if (abno == NULLFSBLOCK)
+					break;
+				if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
+					cur = xfs_btree_init_cursor(ip->i_mount,
+						tp, NULL, 0, XFS_BTNUM_BMAP,
+						ip, whichfork);
+					cur->bc_private.b.firstblock =
+						*firstblock;
+					cur->bc_private.b.flist = flist;
+				}
+				/*
+				 * Bump the number of extents we've allocated
+				 * in this call.
+				 */
+				nallocs++;
+			}
+			if (cur)
+				cur->bc_private.b.flags =
+					wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0;
+			got.br_startoff = aoff;
+			got.br_startblock = abno;
+			got.br_blockcount = alen;
+			got.br_state = XFS_EXT_NORM;	/* assume normal */
+			/*
+			 * Determine state of extent, and the filesystem.
+			 * A wasdelay extent has been initialized, so 
+			 * shouldn't be flagged as unwritten.
+			 */
+			if (wr && XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+				if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
+					got.br_state = XFS_EXT_UNWRITTEN;
+			}
+			error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
+				firstblock, flist, &tmp_logflags, whichfork,
+				rsvd);
+			logflags |= tmp_logflags;
+			if (error)
+				goto error0;
+			lastx = ifp->if_lastex;
+			ep = &ifp->if_u1.if_extents[lastx];
+			nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+			xfs_bmbt_get_all(ep, &got);
+			ASSERT(got.br_startoff <= aoff);
+			ASSERT(got.br_startoff + got.br_blockcount >=
+				aoff + alen);
+#ifdef DEBUG
+			if (delay) {
+				ASSERT(ISNULLSTARTBLOCK(got.br_startblock));
+				ASSERT(STARTBLOCKVAL(got.br_startblock) > 0);
+			}
+			ASSERT(got.br_state == XFS_EXT_NORM ||
+			       got.br_state == XFS_EXT_UNWRITTEN);
+#endif
+			/*
+			 * Fall down into the found allocated space case.
+			 */
+		} else if (inhole) {
+			/*
+			 * Reading in a hole.
+			 */
+			mval->br_startoff = bno;
+			mval->br_startblock = HOLESTARTBLOCK;
+			mval->br_blockcount =
+				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
+			mval->br_state = XFS_EXT_NORM;
+			bno += mval->br_blockcount;
+			len -= mval->br_blockcount;
+			mval++;
+			n++;
+			continue;
+		}
+		/*
+		 * Then deal with the allocated space we found.
+		 */
+		ASSERT(ep != NULL);
+		if (trim && (got.br_startoff + got.br_blockcount > obno)) {
+			if (obno > bno)
+				bno = obno;
+			ASSERT((bno >= obno) || (n == 0));
+			ASSERT(bno < end);
+			mval->br_startoff = bno;
+			if (ISNULLSTARTBLOCK(got.br_startblock)) {
+				ASSERT(!wr || delay);
+				mval->br_startblock = DELAYSTARTBLOCK;
+			} else
+				mval->br_startblock =
+					got.br_startblock +
+					(bno - got.br_startoff);
+			/*
+			 * Return the minimum of what we got and what we
+			 * asked for for the length.  We can use the len
+			 * variable here because it is modified below
+			 * and we could have been there before coming
+			 * here if the first part of the allocation
+			 * didn't overlap what was asked for.
+			 */
+			mval->br_blockcount =
+				XFS_FILBLKS_MIN(end - bno, got.br_blockcount -
+					(bno - got.br_startoff));
+			mval->br_state = got.br_state;
+			ASSERT(mval->br_blockcount <= len);
+		} else {
+			*mval = got;
+			if (ISNULLSTARTBLOCK(mval->br_startblock)) {
+				ASSERT(!wr || delay);
+				mval->br_startblock = DELAYSTARTBLOCK;
+			}
+		}
+
+		/*
+		 * Check if writing previously allocated but
+		 * unwritten extents.
+		 */
+		if (wr && mval->br_state == XFS_EXT_UNWRITTEN &&
+		    ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) {
+			/*
+			 * Modify (by adding) the state flag, if writing.
+			 */
+			ASSERT(mval->br_blockcount <= len);
+			if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
+				cur = xfs_btree_init_cursor(ip->i_mount,
+					tp, NULL, 0, XFS_BTNUM_BMAP,
+					ip, whichfork);
+				cur->bc_private.b.firstblock =
+					*firstblock;
+				cur->bc_private.b.flist = flist;
+			}
+			mval->br_state = XFS_EXT_NORM;
+			error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
+				firstblock, flist, &tmp_logflags, whichfork,
+				rsvd);
+			logflags |= tmp_logflags;
+			if (error)
+				goto error0;
+			lastx = ifp->if_lastex;
+			ep = &ifp->if_u1.if_extents[lastx];
+			nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+			xfs_bmbt_get_all(ep, &got);
+			/*
+			 * We may have combined previously unwritten 
+			 * space with written space, so generate 
+			 * another request.
+			 */
+			if (mval->br_blockcount < len)
+				continue;
+		}
+
+		ASSERT(!trim ||
+		       ((mval->br_startoff + mval->br_blockcount) <= end));
+		ASSERT(!trim || (mval->br_blockcount <= len) ||
+		       (mval->br_startoff < obno));
+		bno = mval->br_startoff + mval->br_blockcount;
+		len = end - bno;
+		if (n > 0 && mval->br_startoff == mval[-1].br_startoff) {
+			ASSERT(mval->br_startblock == mval[-1].br_startblock);
+			ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
+			ASSERT(mval->br_state == mval[-1].br_state);
+			mval[-1].br_blockcount = mval->br_blockcount;
+			mval[-1].br_state = mval->br_state;
+		} else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
+			   mval[-1].br_startblock != DELAYSTARTBLOCK &&
+			   mval[-1].br_startblock != HOLESTARTBLOCK &&
+			   mval->br_startblock ==
+			   mval[-1].br_startblock + mval[-1].br_blockcount &&
+			   (stateless || mval[-1].br_state == mval->br_state)) {
+			ASSERT(mval->br_startoff ==
+			       mval[-1].br_startoff + mval[-1].br_blockcount);
+			mval[-1].br_blockcount += mval->br_blockcount;
+		} else if (n > 0 &&
+			   mval->br_startblock == DELAYSTARTBLOCK &&
+			   mval[-1].br_startblock == DELAYSTARTBLOCK &&
+			   mval->br_startoff ==
+			   mval[-1].br_startoff + mval[-1].br_blockcount) {
+			mval[-1].br_blockcount += mval->br_blockcount;
+			mval[-1].br_state = mval->br_state;
+		} else if (!((n == 0) &&
+			     ((mval->br_startoff + mval->br_blockcount) <=
+			      obno))) {
+			mval++;
+			n++;
+		}
+		/*
+		 * If we're done, stop now.  Stop when we've allocated
+		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
+		 * the transaction may get too big.
+		 */
+		if (bno >= end || n >= *nmap || nallocs >= *nmap)
+			break;
+		/*
+		 * Else go on to the next record.
+		 */
+		ep++;
+		lastx++;
+		if (lastx >= nextents) {
+			eof = 1;
+			prev = got;
+		} else
+			xfs_bmbt_get_all(ep, &got);
+	}
+	ifp->if_lastex = lastx;
+	*nmap = n;
+	/*
+	 * Transform from btree to extents, give it cur.
+	 */
+	if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+	    XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
+		ASSERT(wr && cur);
+		error = xfs_bmap_btree_to_extents(tp, ip, cur,
+			&tmp_logflags, whichfork, 0);
+		logflags |= tmp_logflags;
+		if (error)
+			goto error0;
+	}
+	ASSERT(ifp->if_ext_max ==
+	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
+	       XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
+	error = 0;
+
+error0:
+	/*
+	 * Log everything.  Do this after conversion, there's no point in
+	 * logging the extent list if we've converted to btree format.
+	 */
+	if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+		logflags &= ~XFS_ILOG_FEXT(whichfork);
+	else if ((logflags & XFS_ILOG_FBROOT(whichfork)) &&
+		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+		logflags &= ~XFS_ILOG_FBROOT(whichfork);
+	/*
+	 * Log whatever the flags say, even if error.  Otherwise we might miss
+	 * detecting a case where the data is changed, there's an error,
+	 * and it's not logged so we don't shutdown when we should.
+	 */
+	if (logflags) {
+		ASSERT(tp && wr);
+		xfs_trans_log_inode(tp, ip, logflags);
+	}
+	if (cur) {
+		if (!error) {
+			ASSERT(*firstblock == NULLFSBLOCK ||
+			       XFS_FSB_TO_AGNO(ip->i_mount, *firstblock) ==
+			       XFS_FSB_TO_AGNO(ip->i_mount,
+				       cur->bc_private.b.firstblock) ||
+			       (flist->xbf_low &&
+				XFS_FSB_TO_AGNO(ip->i_mount, *firstblock) < 
+				XFS_FSB_TO_AGNO(ip->i_mount,
+					cur->bc_private.b.firstblock)));
+			*firstblock = cur->bc_private.b.firstblock;
+		}
+		xfs_btree_del_cursor(cur,
+			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	}
+	if (!error)
+		xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
+			orig_nmap, *nmap);
+	return error;
+}
+
+/*
+ * Map file blocks to filesystem blocks, simple version.
+ * One block (extent) only, read-only.
+ * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
+ * For the other flag values, the effect is as if XFS_BMAPI_METADATA
+ * was set and all the others were clear.
+ */
+int						/* error */
+xfs_bmapi_single(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*ip,		/* incore inode */
+	int		whichfork,	/* data or attr fork */
+	xfs_fsblock_t	*fsb,		/* output: mapped block */
+	xfs_fileoff_t	bno)		/* starting file offs. mapped */
+{
+	int		eof;		/* we've hit the end of extent list */
+	int		error;		/* error return */
+	xfs_bmbt_irec_t	got;		/* current extent list record */
+	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	xfs_extnum_t	lastx;		/* last useful extent number */
+	xfs_bmbt_irec_t	prev;		/* previous extent list record */
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+	       return XFS_ERROR(EFSCORRUPTED);
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return XFS_ERROR(EIO);
+	XFS_STATS_INC(xs_blk_mapr);
+	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+	    (error = xfs_iread_extents(tp, ip, whichfork)))
+		return error;
+	(void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+		&prev);
+	/* 
+	 * Reading past eof, act as though there's a hole
+	 * up to end.
+	 */
+	if (eof || got.br_startoff > bno) {
+		*fsb = NULLFSBLOCK;
+		return 0;
+	}
+	ASSERT(!ISNULLSTARTBLOCK(got.br_startblock));
+	ASSERT(bno < got.br_startoff + got.br_blockcount);
+	*fsb = got.br_startblock + (bno - got.br_startoff);
+	ifp->if_lastex = lastx;
+	return 0;
+}
+
+/*
+ * Unmap (remove) blocks from a file.
+ * If nexts is nonzero then the number of extents to remove is limited to
+ * that value.  If not all extents in the block range can be removed then
+ * *done is set.
+ */
+int						/* error */
+xfs_bunmapi(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode */
+	xfs_fileoff_t		bno,		/* starting offset to unmap */
+	xfs_filblks_t		len,		/* length to unmap in file */
+	int			flags,		/* misc flags */	    
+	xfs_extnum_t		nexts,		/* number of extents max */
+	xfs_fsblock_t		*firstblock,	/* first allocated block
+						   controls a.g. for allocs */
+	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
+	int			*done)		/* set if not done yet */
+{
+	int			async;		/* xactions can be async */
+	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
+	xfs_bmbt_irec_t		del;		/* extent being deleted */
+	int			eof;		/* is deleting at eof */
+	xfs_bmbt_rec_t		*ep;		/* extent list entry pointer */
+	int			error;		/* error return value */
+	xfs_extnum_t		extno;		/* extent number in list */
+	xfs_bmbt_irec_t		got;		/* current extent list entry */
+	xfs_ifork_t		*ifp;		/* inode fork pointer */
+	int			isrt;		/* freeing in rt area */
+	xfs_extnum_t		lastx;		/* last extent index used */
+	int			logflags;	/* transaction logging flags */
+	xfs_extlen_t		mod;		/* rt extent offset */
+	xfs_mount_t		*mp;		/* mount structure */
+	xfs_extnum_t		nextents;	/* size of extent list */
+	xfs_bmbt_irec_t		prev;		/* previous extent list entry */
+	xfs_fileoff_t		start;		/* first file offset deleted */
+	int			tmp_logflags;	/* partial logging flags */
+	int			wasdel;		/* was a delayed alloc extent */
+	int			whichfork;	/* data or attribute fork */
+	int			rsvd;		/* OK to allocate reserved blocks */
+	xfs_fsblock_t		sum;
+
+	xfs_bunmap_trace(ip, bno, len, flags, (inst_t *)__return_address);
+	whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+		XFS_ATTR_FORK : XFS_DATA_FORK;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+		return XFS_ERROR(EFSCORRUPTED);
+	mp = ip->i_mount;
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+	async = flags & XFS_BMAPI_ASYNC;
+	rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
+	ASSERT(len > 0);
+	ASSERT(nexts >= 0);
+	ASSERT(ifp->if_ext_max ==
+	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+	    (error = xfs_iread_extents(tp, ip, whichfork)))
+		return error;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	if (nextents == 0) {
+		*done = 1;
+		return 0;
+	}
+	XFS_STATS_INC(xs_blk_unmap);
+	isrt = (whichfork == XFS_DATA_FORK) &&
+	       (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
+	start = bno;
+	bno = start + len - 1;
+	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+		&prev);
+	/*
+	 * Check to see if the given block number is past the end of the
+	 * file, back up to the last block if so...
+	 */
+	if (eof) {
+		ep = &ifp->if_u1.if_extents[--lastx];
+		xfs_bmbt_get_all(ep, &got);
+		bno = got.br_startoff + got.br_blockcount - 1;
+	}
+	logflags = 0;
+	if (ifp->if_flags & XFS_IFBROOT) {
+		ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
+		cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
+			whichfork);
+		cur->bc_private.b.firstblock = *firstblock;
+		cur->bc_private.b.flist = flist;
+		cur->bc_private.b.flags = 0;
+	} else
+		cur = NULL;
+	extno = 0;
+	while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
+	       (nexts == 0 || extno < nexts)) {
+		/*
+		 * Is the found extent after a hole in which bno lives?
+		 * Just back up to the previous extent, if so.
+		 */
+		if (got.br_startoff > bno) {
+			if (--lastx < 0)
+				break;
+			ep--;
+			xfs_bmbt_get_all(ep, &got);
+		}
+		/*
+		 * Is the last block of this extent before the range
+		 * we're supposed to delete?  If so, we're done.
+		 */
+		bno = XFS_FILEOFF_MIN(bno,
+			got.br_startoff + got.br_blockcount - 1);
+		if (bno < start)
+			break;
+		/*
+		 * Then deal with the (possibly delayed) allocated space
+		 * we found.
+		 */
+		ASSERT(ep != NULL);
+		del = got;
+		wasdel = ISNULLSTARTBLOCK(del.br_startblock);
+		if (got.br_startoff < start) {
+			del.br_startoff = start;
+			del.br_blockcount -= start - got.br_startoff;
+			if (!wasdel)
+				del.br_startblock += start - got.br_startoff;
+		}
+		if (del.br_startoff + del.br_blockcount > bno + 1)
+			del.br_blockcount = bno + 1 - del.br_startoff;
+		sum = del.br_startblock + del.br_blockcount;
+		if (isrt &&
+		    (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
+			/*
+			 * Realtime extent not lined up at the end.
+			 * The extent could have been split into written
+			 * and unwritten pieces, or we could just be
+			 * unmapping part of it.  But we can't really
+			 * get rid of part of a realtime extent.
+			 */
+			if (del.br_state == XFS_EXT_UNWRITTEN ||
+			    !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+				/*
+				 * This piece is unwritten, or we're not
+				 * using unwritten extents.  Skip over it.
+				 */
+				ASSERT(bno >= mod);
+				bno -= mod > del.br_blockcount ?
+					del.br_blockcount : mod;
+				if (bno < got.br_startoff) {
+					if (--lastx >= 0)
+						xfs_bmbt_get_all(--ep, &got);
+				}
+				continue;
+			}
+			/*
+			 * It's written, turn it unwritten.
+			 * This is better than zeroing it.
+			 */
+			ASSERT(del.br_state == XFS_EXT_NORM);
+			ASSERT(xfs_trans_get_block_res(tp) > 0);
+			/*
+			 * If this spans a realtime extent boundary,
+			 * chop it back to the start of the one we end at.
+			 */
+			if (del.br_blockcount > mod) {
+				del.br_startoff += del.br_blockcount - mod;
+				del.br_startblock += del.br_blockcount - mod;
+				del.br_blockcount = mod;
+			}
+			del.br_state = XFS_EXT_UNWRITTEN;
+			error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
+				firstblock, flist, &logflags, XFS_DATA_FORK, 0);
+			if (error)
+				goto error0;
+			goto nodelete;
+		}
+		if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
+			/*
+			 * Realtime extent is lined up at the end but not
+			 * at the front.  We'll get rid of full extents if
+			 * we can.
+			 */
+			mod = mp->m_sb.sb_rextsize - mod;
+			if (del.br_blockcount > mod) {
+				del.br_blockcount -= mod;
+				del.br_startoff += mod;
+				del.br_startblock += mod;
+			} else if ((del.br_startoff == start &&
+				    (del.br_state == XFS_EXT_UNWRITTEN ||
+				     xfs_trans_get_block_res(tp) == 0)) ||
+				   !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+				/*
+				 * Can't make it unwritten.  There isn't
+				 * a full extent here so just skip it.
+				 */
+				ASSERT(bno >= del.br_blockcount);
+				bno -= del.br_blockcount;
+				if (bno < got.br_startoff) {
+					if (--lastx >= 0)
+						xfs_bmbt_get_all(--ep, &got);
+				}
+				continue;
+			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
+				/*
+				 * This one is already unwritten.
+				 * It must have a written left neighbor.
+				 * Unwrite the killed part of that one and
+				 * try again.
+				 */
+				ASSERT(lastx > 0);
+				xfs_bmbt_get_all(ep - 1, &prev);
+				ASSERT(prev.br_state == XFS_EXT_NORM);
+				ASSERT(!ISNULLSTARTBLOCK(prev.br_startblock));
+				ASSERT(del.br_startblock ==
+				       prev.br_startblock + prev.br_blockcount);
+				if (prev.br_startoff < start) {
+					mod = start - prev.br_startoff;
+					prev.br_blockcount -= mod;
+					prev.br_startblock += mod;
+					prev.br_startoff = start;
+				}
+				prev.br_state = XFS_EXT_UNWRITTEN;
+				error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
+					&prev, firstblock, flist, &logflags,
+					XFS_DATA_FORK, 0);
+				if (error)
+					goto error0;
+				goto nodelete;
+			} else {
+				ASSERT(del.br_state == XFS_EXT_NORM);
+				del.br_state = XFS_EXT_UNWRITTEN;
+				error = xfs_bmap_add_extent(ip, lastx, &cur,
+					&del, firstblock, flist, &logflags,
+					XFS_DATA_FORK, 0);
+				if (error)
+					goto error0;
+				goto nodelete;
+			}
+		}
+		if (wasdel) {
+			ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
+			xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
+				(int)del.br_blockcount, rsvd);
+			if (XFS_IS_QUOTA_ON(ip->i_mount)) {
+				ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
+				ASSERT(ip->i_ino != mp->m_sb.sb_pquotino);
+				if (!isrt)
+					xfs_trans_unreserve_blkquota(NULL, ip, 
+					      (long)del.br_blockcount);
+				else
+					xfs_trans_unreserve_rtblkquota(NULL, ip,
+					      (long)del.br_blockcount);
+			}
+			ip->i_delayed_blks -= del.br_blockcount;
+			if (cur)
+				cur->bc_private.b.flags |=
+					XFS_BTCUR_BPRV_WASDEL;
+		} else if (cur)
+			cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
+		/*
+		 * If it's the case where the directory code is running
+		 * with no block reservation, and the deleted block is in 
+		 * the middle of its extent, and the resulting insert
+		 * of an extent would cause transformation to btree format,
+		 * then reject it.  The calling code will then swap
+		 * blocks around instead.
+		 * We have to do this now, rather than waiting for the
+		 * conversion to btree format, since the transaction
+		 * will be dirty.
+		 */
+		if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
+		    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+		    XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max &&
+		    del.br_startoff > got.br_startoff &&
+		    del.br_startoff + del.br_blockcount < 
+		    got.br_startoff + got.br_blockcount) {
+			error = XFS_ERROR(ENOSPC);
+			goto error0;
+		}
+		error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
+			flags, &tmp_logflags, whichfork, rsvd);
+		logflags |= tmp_logflags;
+		if (error)
+			goto error0;
+		bno = del.br_startoff - 1;
+nodelete:
+		lastx = ifp->if_lastex;
+		/*
+		 * If not done go on to the next (previous) record.
+		 * Reset ep in case the extents array was re-alloced.
+		 */
+		ep = &ifp->if_u1.if_extents[lastx];
+		if (bno != (xfs_fileoff_t)-1 && bno >= start) {
+			if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) ||
+			    xfs_bmbt_get_startoff(ep) > bno) {
+				lastx--;
+				ep--;
+			}
+			if (lastx >= 0)
+				xfs_bmbt_get_all(ep, &got);
+			extno++;
+		}
+	}
+	ifp->if_lastex = lastx;
+	*done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
+	ASSERT(ifp->if_ext_max ==
+	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+	/*
+	 * Convert to a btree if necessary.
+	 */
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
+		ASSERT(cur == NULL);
+		error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
+			&cur, 0, &tmp_logflags, whichfork);
+		logflags |= tmp_logflags;
+		if (error)
+			goto error0;
+	}
+	/*
+	 * transform from btree to extents, give it cur
+	 */
+	else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+		 XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
+		ASSERT(cur != NULL);
+		error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
+			whichfork, async);
+		logflags |= tmp_logflags;
+		if (error)
+			goto error0;
+	}
+	/*
+	 * transform from extents to local?
+	 */
+	ASSERT(ifp->if_ext_max ==
+	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+	error = 0;
+error0:
+	/*
+	 * Log everything.  Do this after conversion, there's no point in
+	 * logging the extent list if we've converted to btree format.
+	 */
+	if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
+	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+		logflags &= ~XFS_ILOG_FEXT(whichfork);
+	else if ((logflags & XFS_ILOG_FBROOT(whichfork)) &&
+		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+		logflags &= ~XFS_ILOG_FBROOT(whichfork);
+	/*
+	 * Log inode even in the error case, if the transaction 
+	 * is dirty we'll need to shut down the filesystem.
+	 */
+	if (logflags)
+		xfs_trans_log_inode(tp, ip, logflags);
+	if (cur) {
+		if (!error) {
+			*firstblock = cur->bc_private.b.firstblock;
+			cur->bc_private.b.allocated = 0;
+		}
+		xfs_btree_del_cursor(cur,
+			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	}
+	return error;
+}
+
+/*
+ * Check the last inode extent to determine whether this allocation will result 
+ * in blocks being allocated at the end of the file. When we allocate new data 
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ */
+int					/* error */
+xfs_bmap_isaeof(
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_fileoff_t   off,		/* file offset in fsblocks */
+	int             whichfork,	/* data or attribute fork */
+	int		*aeof)		/* return value */
+{
+	int		error;		/* error return value */
+	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	xfs_bmbt_rec_t	*lastrec;	/* extent list entry pointer */
+	xfs_extnum_t	nextents;	/* size of extent list */
+	xfs_bmbt_irec_t	s;		/* expanded extent list entry */
+
+	ASSERT(whichfork == XFS_DATA_FORK);
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+	    (error = xfs_iread_extents(NULL, ip, whichfork)))
+		return error;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	if (nextents == 0) {
+		*aeof = 1;
+		return 0;
+	}
+	/*
+	 * Go to the last extent
+	 */
+	lastrec = &ifp->if_u1.if_extents[nextents - 1];
+	xfs_bmbt_get_all(lastrec, &s);
+	/*
+	 * Check we are allocating in the last extent (for delayed allocations)
+	 * or past the last extent for non-delayed allocations.
+	 */ 
+	*aeof = (off >= s.br_startoff &&
+		 off < s.br_startoff + s.br_blockcount &&
+		 ISNULLSTARTBLOCK(s.br_startblock)) ||
+		off >= s.br_startoff + s.br_blockcount;
+	return 0;
+}
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
new file mode 100644
index 000000000..a221892e4
--- /dev/null
+++ b/libxfs/xfs_bmap_btree.c
@@ -0,0 +1,2528 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Delete record pointed to by cur/level.
+ */
+STATIC int					/* error */
+xfs_bmbt_delrec(
+	xfs_btree_cur_t		*cur,
+	int			level,
+	int			async,		/* deletion can be async */
+	int			*stat)		/* success/failure */
+{
+	xfs_bmbt_block_t	*block;		/* bmap btree block */
+	xfs_fsblock_t		bno;		/* fs-relative block number */
+	xfs_buf_t			*bp;		/* buffer for block */
+	int			error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_delrec";
+#endif
+	int			i;		/* loop counter */
+	int			j;		/* temp state */
+	xfs_bmbt_key_t		key;		/* bmap btree key */
+	xfs_bmbt_key_t		*kp;		/* pointer to bmap btree key */
+	xfs_fsblock_t		lbno;		/* left sibling block number */
+	xfs_buf_t			*lbp;		/* left buffer pointer */
+	xfs_bmbt_block_t	*left;		/* left btree block */
+	xfs_bmbt_key_t		*lkp;		/* left btree key */
+	xfs_bmbt_ptr_t		*lpp;		/* left address pointer */
+	int			lrecs;		/* left record count */
+	xfs_bmbt_rec_t		*lrp;		/* left record pointer */
+	xfs_mount_t		*mp;		/* file system mount point */
+	xfs_bmbt_ptr_t		*pp;		/* pointer to bmap block addr */
+	int			ptr;		/* key/record index */
+	xfs_fsblock_t		rbno;		/* right sibling block number */
+	xfs_buf_t			*rbp;		/* right buffer pointer */
+	xfs_bmbt_block_t	*right;		/* right btree block */
+	xfs_bmbt_key_t		*rkp;		/* right btree key */
+	xfs_bmbt_rec_t		*rp;		/* pointer to bmap btree rec */
+	xfs_bmbt_ptr_t		*rpp;		/* right address pointer */
+	xfs_bmbt_block_t	*rrblock;	/* right-right btree block */
+	xfs_buf_t			*rrbp;		/* right-right buffer pointer */
+	int			rrecs;		/* right record count */
+	xfs_bmbt_rec_t		*rrp;		/* right record pointer */
+	xfs_btree_cur_t		*tcur;		/* temporary btree cursor */
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGI(cur, level);
+	ptr = cur->bc_ptrs[level];
+	tcur = (xfs_btree_cur_t *)0;
+	if (ptr == 0) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		goto error0;
+	}
+#endif
+	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	XFS_STATS_INC(xs_bmbt_delrec);
+	if (level > 0) {
+		kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+		pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+#ifdef DEBUG
+		for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				goto error0;
+			}
+		}
+#endif
+		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+			ovbcopy(&kp[ptr], &kp[ptr - 1],
+				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*kp));
+			ovbcopy(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */
+				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*pp));
+			xfs_bmbt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+			xfs_bmbt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+		}
+	} else {
+		rp = XFS_BMAP_REC_IADDR(block, 1, cur);
+		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+			ovbcopy(&rp[ptr], &rp[ptr - 1],
+				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*rp));
+			xfs_bmbt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+		}
+		if (ptr == 1) {
+			INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rp));
+			kp = &key;
+		}
+	}
+	INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1);
+	xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
+	/*
+	 * We're at the root level.
+	 * First, shrink the root block in-memory.
+	 * Try to get rid of the next level down.
+	 * If we can't then there's nothing left to do.
+	 */
+	if (level == cur->bc_nlevels - 1) {
+		xfs_iroot_realloc(cur->bc_private.b.ip, -1,
+			cur->bc_private.b.whichfork);
+		if (error = xfs_bmbt_killroot(cur, async)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 1;
+		return 0;
+	}
+	if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		goto error0;
+	}
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
+		if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 1;
+		return 0;
+	}
+	rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+	lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+	/*
+	 * One child of root, need to get a chance to copy its contents
+	 * into the root and delete it. Can't go up to next level,
+	 * there's nothing to delete there.
+	 */
+	if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK &&
+	    level == cur->bc_nlevels - 2) {
+		if (error = xfs_bmbt_killroot(cur, async)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 1;
+		return 0;
+	}
+	ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK);
+	if (error = xfs_btree_dup_cursor(cur, &tcur)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		goto error0;
+	}
+	bno = NULLFSBLOCK;
+	if (rbno != NULLFSBLOCK) {
+		i = xfs_btree_lastrec(tcur, level);
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		if (error = xfs_bmbt_increment(tcur, level, &i)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		i = xfs_btree_lastrec(tcur, level);
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		rbp = tcur->bc_bufs[level];
+		right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_lblock(cur, right, level, rbp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+#endif
+		bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+		if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
+		    XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
+			if (error = xfs_bmbt_lshift(tcur, level, &i)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				goto error0;
+			}
+			if (i) {
+				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				       XFS_BMAP_BLOCK_IMINRECS(level, tcur));
+				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+				tcur = NULL;
+				if (level > 0) {
+					if (error = xfs_bmbt_decrement(cur,
+							level, &i)) {
+						XFS_BMBT_TRACE_CURSOR(cur,
+							ERROR);
+						goto error0;
+					}
+				}
+				XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+				*stat = 1;
+				return 0;
+			}
+		}
+		rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+		if (lbno != NULLFSBLOCK) {
+			i = xfs_btree_firstrec(tcur, level);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			if (error = xfs_bmbt_decrement(tcur, level, &i)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				goto error0;
+			}
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		}
+	}
+	if (lbno != NULLFSBLOCK) {
+		i = xfs_btree_firstrec(tcur, level);
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		/*
+		 * decrement to last in block
+		 */
+		if (error = xfs_bmbt_decrement(tcur, level, &i)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		i = xfs_btree_firstrec(tcur, level);
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		lbp = tcur->bc_bufs[level];
+		left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_lblock(cur, left, level, lbp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+#endif
+		bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+		if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
+		    XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
+			if (error = xfs_bmbt_rshift(tcur, level, &i)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				goto error0;
+			}
+			if (i) {
+				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+				       XFS_BMAP_BLOCK_IMINRECS(level, tcur));
+				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+				tcur = NULL;
+				if (level == 0)
+					cur->bc_ptrs[0]++;
+				XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+				*stat = 1;
+				return 0;
+			}
+		}
+		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+	}
+	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+	tcur = NULL;
+	mp = cur->bc_mp;
+	ASSERT(bno != NULLFSBLOCK);
+	if (lbno != NULLFSBLOCK &&
+	    lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+		rbno = bno;
+		right = block;
+		rbp = bp;
+		if (error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp,
+				XFS_BMAP_BTREE_REF)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+		if (error = xfs_btree_check_lblock(cur, left, level, lbp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+	} else if (rbno != NULLFSBLOCK &&
+		   rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+		   XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+		lbno = bno;
+		left = block;
+		lbp = bp;
+		if (error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp,
+				XFS_BMAP_BTREE_REF)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+		if (error = xfs_btree_check_lblock(cur, right, level, rbp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+	} else {
+		if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 1;
+		return 0;
+	}
+	if (level > 0) {
+		lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
+		rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
+#ifdef DEBUG
+		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				goto error0;
+			}
+		}
+#endif
+		bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp));
+		bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp));
+		xfs_bmbt_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+			INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		xfs_bmbt_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+			INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	} else {
+		lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
+		bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
+		xfs_bmbt_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+			INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	}
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */
+	xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
+	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+		if (error = xfs_btree_read_bufl(mp, cur->bc_tp,
+				INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
+				XFS_BMAP_BTREE_REF)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
+		if (error = xfs_btree_check_lblock(cur, rrblock, level, rrbp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			goto error0;
+		}
+		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
+		xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+	}
+	xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1,
+		cur->bc_private.b.flist, mp);
+	if (!async)
+		xfs_trans_set_sync(cur->bc_tp);
+	cur->bc_private.b.ip->i_d.di_nblocks--;
+	xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
+	if (XFS_IS_QUOTA_ON(mp) &&
+	    cur->bc_private.b.ip->i_ino != mp->m_sb.sb_uquotino &&
+	    cur->bc_private.b.ip->i_ino != mp->m_sb.sb_pquotino)
+		xfs_trans_mod_dquot_byino(cur->bc_tp, cur->bc_private.b.ip, 
+			XFS_TRANS_DQ_BCOUNT, -1L);
+	xfs_trans_binval(cur->bc_tp, rbp);
+	if (bp != lbp) {
+		cur->bc_bufs[level] = lbp;
+		cur->bc_ptrs[level] += lrecs;
+		cur->bc_ra[level] = 0;
+	} else if (error = xfs_bmbt_increment(cur, level + 1, &i)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		goto error0;
+	}
+	if (level > 0)
+		cur->bc_ptrs[level]--;
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = 2;
+	return 0;
+
+error0:
+	if (tcur)
+		xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Insert one record/level.  Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int					/* error */
+xfs_bmbt_insrec(
+	xfs_btree_cur_t		*cur,
+	int			level,
+	xfs_fsblock_t		*bnop,
+	xfs_bmbt_rec_t		*recp,
+	xfs_btree_cur_t		**curp,
+	int			*stat)		/* no-go/done/continue */
+{
+	xfs_bmbt_block_t	*block;		/* bmap btree block */
+	xfs_buf_t			*bp;		/* buffer for block */
+	int			error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_insrec";
+#endif
+	int			i;		/* loop index */
+	xfs_bmbt_key_t		key;		/* bmap btree key */
+	xfs_bmbt_key_t		*kp;		/* pointer to bmap btree key */
+	int			logflags;	/* inode logging flags */
+	xfs_fsblock_t		nbno;		/* new block number */
+	struct xfs_btree_cur	*ncur;		/* new btree cursor */
+	xfs_bmbt_key_t		nkey;		/* new btree key value */
+	xfs_bmbt_rec_t		nrec;		/* new record count */
+	int			optr;		/* old key/record index */
+	xfs_bmbt_ptr_t		*pp;		/* pointer to bmap block addr */
+	int			ptr;		/* key/record index */
+	xfs_bmbt_rec_t		*rp;		/* pointer to bmap btree rec */
+
+	ASSERT(level < cur->bc_nlevels);
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
+	ncur = (xfs_btree_cur_t *)0;
+	INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(recp));
+	optr = ptr = cur->bc_ptrs[level];
+	if (ptr == 0) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	XFS_STATS_INC(xs_bmbt_insrec);
+	block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		if (level == 0) {
+			rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
+			xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp);
+		} else {
+			kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
+			xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp);
+		}
+	}
+#endif
+	nbno = NULLFSBLOCK;
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
+			/*
+			 * A root block, that can be made bigger.
+			 */
+			xfs_iroot_realloc(cur->bc_private.b.ip, 1,
+				cur->bc_private.b.whichfork);
+			block = xfs_bmbt_get_block(cur, level, &bp);
+		} else if (level == cur->bc_nlevels - 1) {
+			if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) ||
+			    *stat == 0) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				return error;
+			}
+			xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+				logflags);
+			block = xfs_bmbt_get_block(cur, level, &bp);
+		} else {
+			if (error = xfs_bmbt_rshift(cur, level, &i)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				return error;
+			}
+			if (i) {
+				/* nothing */
+			} else {
+				if (error = xfs_bmbt_lshift(cur, level, &i)) {
+					XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+					return error;
+				}
+				if (i) {
+					optr = ptr = cur->bc_ptrs[level];
+				} else {
+					if (error = xfs_bmbt_split(cur, level,
+							&nbno, &nkey, &ncur,
+							&i)) {
+						XFS_BMBT_TRACE_CURSOR(cur,
+							ERROR);
+						return error;
+					}
+					if (i) {
+						block = xfs_bmbt_get_block(
+							    cur, level, &bp);
+#ifdef DEBUG
+						if (error =
+						    xfs_btree_check_lblock(cur,
+							    block, level, bp)) {
+							XFS_BMBT_TRACE_CURSOR(
+								cur, ERROR);
+							return error;
+						}
+#endif
+						ptr = cur->bc_ptrs[level];
+						xfs_bmbt_set_allf(&nrec,
+							nkey.br_startoff, 0, 0,
+							XFS_EXT_NORM);
+					} else {
+						XFS_BMBT_TRACE_CURSOR(cur,
+							EXIT);
+						*stat = 0;
+						return 0;
+					}
+				}
+			}
+		}
+	}
+	if (level > 0) {
+		kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+		pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+#ifdef DEBUG
+		for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
+			if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT),
+					level)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				return error;
+			}
+		}
+#endif
+		ovbcopy(&kp[ptr - 1], &kp[ptr],
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp));
+		ovbcopy(&pp[ptr - 1], &pp[ptr], /* INT_: direct copy */
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp));
+#ifdef DEBUG
+		if (error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)*bnop,
+				level)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+#endif
+		kp[ptr - 1] = key;
+		INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
+		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+		xfs_bmbt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+		xfs_bmbt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+	} else {
+		rp = XFS_BMAP_REC_IADDR(block, 1, cur);
+		ovbcopy(&rp[ptr - 1], &rp[ptr],
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
+		rp[ptr - 1] = *recp;
+		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+		xfs_bmbt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+	}
+	xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
+#ifdef DEBUG
+	if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		if (level == 0)
+			xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1,
+				rp + ptr);
+		else
+			xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1,
+				kp + ptr);
+	}
+#endif
+	if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	*bnop = nbno;
+	if (nbno != NULLFSBLOCK) {
+		*recp = nrec;
+		*curp = ncur;
+	}
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = 1;
+	return 0;
+}
+
+STATIC int
+xfs_bmbt_killroot(
+	xfs_btree_cur_t		*cur,
+	int			async)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_bmbt_block_t	*cblock;
+	xfs_buf_t			*cbp;
+	xfs_bmbt_key_t		*ckp;
+	xfs_bmbt_ptr_t		*cpp;
+#ifdef DEBUG
+	int			error;
+#endif
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_killroot";
+#endif
+	int			i;
+	xfs_bmbt_key_t		*kp;
+	xfs_inode_t		*ip;
+	xfs_ifork_t		*ifp;
+	int			level;
+	xfs_bmbt_ptr_t		*pp;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	level = cur->bc_nlevels - 1;
+	ASSERT(level >= 1);
+	/*
+	 * Don't deal with the root block needs to be a leaf case.
+	 * We're just going to turn the thing back into extents anyway.
+	 */
+	if (level == 1) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		return 0;
+	}
+	block = xfs_bmbt_get_block(cur, level, &cbp);
+	/*
+	 * Give up if the root has multiple children.
+	 */
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) != 1) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		return 0;
+	}
+	/*
+	 * Only do this if the next level will fit.
+	 * Then the data must be copied up to the inode,
+	 * instead of freeing the root you free the next level.
+	 */
+	cbp = cur->bc_bufs[level - 1];
+	cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
+	if (INT_GET(cblock->bb_numrecs, ARCH_CONVERT) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		return 0;
+	}
+	ASSERT(INT_GET(cblock->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO);
+	ASSERT(INT_GET(cblock->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO);
+	ip = cur->bc_private.b.ip;
+	ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork);
+	ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) ==
+	       XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes));
+	i = (int)(INT_GET(cblock->bb_numrecs, ARCH_CONVERT) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
+	if (i) {
+		xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
+		block = ifp->if_broot;
+	}
+	INT_MOD(block->bb_numrecs, ARCH_CONVERT, i);
+	ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) == INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
+	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
+	bcopy(ckp, kp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+	cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
+#ifdef DEBUG
+	for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) {
+		if (error = xfs_btree_check_lptr(cur, INT_GET(cpp[i], ARCH_CONVERT), level - 1)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+	}
+#endif
+	bcopy(cpp, pp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+	xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
+		cur->bc_private.b.flist, cur->bc_mp);
+	if (!async)
+		xfs_trans_set_sync(cur->bc_tp);
+	ip->i_d.di_nblocks--;
+	if (XFS_IS_QUOTA_ON(cur->bc_mp) &&
+	    ip->i_ino != cur->bc_mp->m_sb.sb_uquotino &&
+	    ip->i_ino != cur->bc_mp->m_sb.sb_pquotino)
+		xfs_trans_mod_dquot_byino(cur->bc_tp, ip, XFS_TRANS_DQ_BCOUNT,
+			-1L);
+	xfs_trans_binval(cur->bc_tp, cbp);
+	cur->bc_bufs[level - 1] = NULL;
+	INT_MOD(block->bb_level, ARCH_CONVERT, -1);
+	xfs_trans_log_inode(cur->bc_tp, ip,
+		XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+	cur->bc_nlevels--;
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	return 0;
+}
+
+/*
+ * Log key values from the btree block.
+ */
+STATIC void
+xfs_bmbt_log_keys(
+	xfs_btree_cur_t	*cur,
+	xfs_buf_t		*bp,
+	int		kfirst,
+	int		klast)
+{
+#ifdef XFS_BMBT_TRACE
+	static char	fname[] = "xfs_bmbt_log_keys";
+#endif
+	xfs_trans_t	*tp;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast);
+	tp = cur->bc_tp;
+	if (bp) {
+		xfs_bmbt_block_t	*block;
+		int			first;
+		xfs_bmbt_key_t		*kp;
+		int			last;
+
+		block = XFS_BUF_TO_BMBT_BLOCK(bp);
+		kp = XFS_BMAP_KEY_DADDR(block, 1, cur);
+		first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
+		last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
+		xfs_trans_log_buf(tp, bp, first, last);
+	} else {
+		xfs_inode_t		 *ip;
+
+		ip = cur->bc_private.b.ip;
+		xfs_trans_log_inode(tp, ip,
+			XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+	}
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+}
+
+/*
+ * Log pointer values from the btree block.
+ */
+STATIC void
+xfs_bmbt_log_ptrs(
+	xfs_btree_cur_t	*cur,
+	xfs_buf_t		*bp,
+	int		pfirst,
+	int		plast)
+{
+#ifdef XFS_BMBT_TRACE
+	static char	fname[] = "xfs_bmbt_log_ptrs";
+#endif
+	xfs_trans_t	*tp;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast);
+	tp = cur->bc_tp;
+	if (bp) {
+		xfs_bmbt_block_t	*block;
+		int			first;
+		int			last;
+		xfs_bmbt_ptr_t		*pp;
+
+		block = XFS_BUF_TO_BMBT_BLOCK(bp);
+		pp = XFS_BMAP_PTR_DADDR(block, 1, cur);
+		first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
+		last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
+		xfs_trans_log_buf(tp, bp, first, last);
+	} else {
+		xfs_inode_t		*ip;
+
+		ip = cur->bc_private.b.ip;
+		xfs_trans_log_inode(tp, ip,
+			XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+	}
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+}
+
+/*
+ * Lookup the record.  The cursor is made to point to it, based on dir.
+ */
+STATIC int				/* error */
+xfs_bmbt_lookup(
+	xfs_btree_cur_t		*cur,
+	xfs_lookup_t		dir,
+	int			*stat)		/* success/failure */
+{
+	xfs_bmbt_block_t	*block;
+	xfs_buf_t			*bp;
+	xfs_daddr_t			d;
+	xfs_sfiloff_t		diff;
+	int			error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char	fname[] = "xfs_bmbt_lookup";
+#endif
+	xfs_fsblock_t		fsbno;
+	int			high;
+	int			i;
+	int			keyno;
+	xfs_bmbt_key_t		*kkbase;
+	xfs_bmbt_key_t		*kkp;
+	xfs_bmbt_rec_t		*krbase;
+	xfs_bmbt_rec_t		*krp;
+	int			level;
+	int			low;
+	xfs_mount_t		*mp;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmbt_irec_t		*rp;
+	xfs_fileoff_t		startoff;
+	xfs_trans_t		*tp;
+
+	XFS_STATS_INC(xs_bmbt_lookup);
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGI(cur, (int)dir);
+	tp = cur->bc_tp;
+	mp = cur->bc_mp;
+	rp = &cur->bc_rec.b;
+	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+		if (level < cur->bc_nlevels - 1) {
+			d = XFS_FSB_TO_DADDR(mp, fsbno);
+			bp = cur->bc_bufs[level];
+			if (bp && XFS_BUF_ADDR(bp) != d)
+				bp = (xfs_buf_t *)0;
+			if (!bp) {
+				if (error = xfs_btree_read_bufl(mp, tp, fsbno,
+						0, &bp, XFS_BMAP_BTREE_REF)) {
+					XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+					return error;
+				}
+				xfs_btree_setbuf(cur, level, bp);
+				block = XFS_BUF_TO_BMBT_BLOCK(bp);
+				if (error = xfs_btree_check_lblock(cur, block,
+						level, bp)) {
+					XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+					return error;
+				}
+			} else
+				block = XFS_BUF_TO_BMBT_BLOCK(bp);
+		} else
+			block = xfs_bmbt_get_block(cur, level, &bp);
+		if (diff == 0)
+			keyno = 1;
+		else {
+			if (level > 0)
+				kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur);
+			else
+				krbase = XFS_BMAP_REC_IADDR(block, 1, cur);
+			low = 1;
+			if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+				ASSERT(level == 0);
+				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+				XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+				*stat = 0;
+				return 0;
+			}
+			while (low <= high) {
+				XFS_STATS_INC(xs_bmbt_compare);
+				keyno = (low + high) >> 1;
+				if (level > 0) {
+					kkp = kkbase + keyno - 1;
+					startoff = INT_GET(kkp->br_startoff, ARCH_CONVERT);
+				} else {
+					krp = krbase + keyno - 1;
+					startoff = xfs_bmbt_get_startoff(krp);
+				}
+				diff = (xfs_sfiloff_t)
+						(startoff - rp->br_startoff);
+				if (diff < 0)
+					low = keyno + 1;
+				else if (diff > 0)
+					high = keyno - 1;
+				else
+					break;
+			}
+		}
+		if (level > 0) {
+			if (diff > 0 && --keyno < 1)
+				keyno = 1;
+			pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
+#ifdef DEBUG
+			if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				return error;
+			}
+#endif
+			fsbno = INT_GET(*pp, ARCH_CONVERT);
+			cur->bc_ptrs[level] = keyno;
+		}
+	}
+	if (dir != XFS_LOOKUP_LE && diff < 0) {
+		keyno++;
+		/*
+		 * If ge search and we went off the end of the block, but it's
+		 * not the last block, we're in the wrong block.
+		 */
+		if (dir == XFS_LOOKUP_GE && keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
+		    INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+			cur->bc_ptrs[0] = keyno;
+			if (error = xfs_bmbt_increment(cur, 0, &i)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				return error;
+			}
+			XFS_WANT_CORRUPTED_RETURN(i == 1);
+			XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+			*stat = 1;
+			return 0;
+		}
+	}
+	else if (dir == XFS_LOOKUP_LE && diff > 0)
+		keyno--;
+	cur->bc_ptrs[0] = keyno;
+	if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+	} else {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
+	}
+	return 0;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int					/* error */
+xfs_bmbt_lshift(
+	xfs_btree_cur_t		*cur,
+	int			level,
+	int			*stat)		/* success/failure */
+{
+	int			error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_lshift";
+#endif
+#ifdef DEBUG
+	int			i;		/* loop counter */
+#endif
+	xfs_bmbt_key_t		key;		/* bmap btree key */
+	xfs_buf_t			*lbp;		/* left buffer pointer */
+	xfs_bmbt_block_t	*left;		/* left btree block */
+	xfs_bmbt_key_t		*lkp;		/* left btree key */
+	xfs_bmbt_ptr_t		*lpp;		/* left address pointer */
+	int			lrecs;		/* left record count */
+	xfs_bmbt_rec_t		*lrp;		/* left record pointer */
+	xfs_mount_t		*mp;		/* file system mount point */
+	xfs_buf_t			*rbp;		/* right buffer pointer */
+	xfs_bmbt_block_t	*right;		/* right btree block */
+	xfs_bmbt_key_t		*rkp;		/* right btree key */
+	xfs_bmbt_ptr_t		*rpp;		/* right address pointer */
+	xfs_bmbt_rec_t		*rrp;		/* right record pointer */
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGI(cur, level);
+	if (level == cur->bc_nlevels - 1) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	rbp = cur->bc_bufs[level];
+	right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_lblock(cur, right, level, rbp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+#endif
+	if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	if (cur->bc_ptrs[level] <= 1) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	mp = cur->bc_mp;
+	if (error = xfs_btree_read_bufl(mp, cur->bc_tp, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0,
+			&lbp, XFS_BMAP_BTREE_REF)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+	if (error = xfs_btree_check_lblock(cur, left, level, lbp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+	if (level > 0) {
+		lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
+		*lkp = *rkp;
+		xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs);
+		lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
+#ifdef DEBUG
+		if (error = xfs_btree_check_lptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+#endif
+		*lpp = *rpp; /* INT_: direct copy */
+		xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs);
+	} else {
+		lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
+		*lrp = *rrp;
+		xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs);
+	}
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
+	xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
+#ifdef DEBUG
+	if (level > 0)
+		xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp);
+	else
+		xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp);
+#endif
+	INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
+	xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
+	if (level > 0) {
+#ifdef DEBUG
+		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+					level)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				return error;
+			}
+		}
+#endif
+		ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	} else {
+		ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
+		rkp = &key;
+	}
+	if (error = xfs_bmbt_updkey(cur, rkp, level + 1)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	cur->bc_ptrs[level]--;
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int					/* error */
+xfs_bmbt_rshift(
+	xfs_btree_cur_t		*cur,
+	int			level,
+	int			*stat)		/* success/failure */
+{
+	int			error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_rshift";
+#endif
+	int			i;		/* loop counter */
+	xfs_bmbt_key_t		key;		/* bmap btree key */
+	xfs_buf_t			*lbp;		/* left buffer pointer */
+	xfs_bmbt_block_t	*left;		/* left btree block */
+	xfs_bmbt_key_t		*lkp;		/* left btree key */
+	xfs_bmbt_ptr_t		*lpp;		/* left address pointer */
+	xfs_bmbt_rec_t		*lrp;		/* left record pointer */
+	xfs_mount_t		*mp;		/* file system mount point */
+	xfs_buf_t			*rbp;		/* right buffer pointer */
+	xfs_bmbt_block_t	*right;		/* right btree block */
+	xfs_bmbt_key_t		*rkp;		/* right btree key */
+	xfs_bmbt_ptr_t		*rpp;		/* right address pointer */
+	xfs_bmbt_rec_t		*rrp;		/* right record pointer */
+	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGI(cur, level);
+	if (level == cur->bc_nlevels - 1) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	lbp = cur->bc_bufs[level];
+	left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_lblock(cur, left, level, lbp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+#endif
+	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	mp = cur->bc_mp;
+	if (error = xfs_btree_read_bufl(mp, cur->bc_tp, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
+			&rbp, XFS_BMAP_BTREE_REF)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+	if (error = xfs_btree_check_lblock(cur, right, level, rbp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	if (level > 0) {
+		lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
+		rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
+#ifdef DEBUG
+		for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
+			if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				return error;
+			}
+		}
+#endif
+		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+#ifdef DEBUG
+		if (error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+#endif
+		*rkp = *lkp;
+		*rpp = *lpp; /* INT_: direct copy */
+		xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+	} else {
+		lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
+		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		*rrp = *lrp;
+		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
+		rkp = &key;
+	}
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+	xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
+	INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+#ifdef DEBUG
+	if (level > 0)
+		xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
+	else
+		xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1);
+#endif
+	xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
+	if (error = xfs_btree_dup_cursor(cur, &tcur)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	i = xfs_btree_lastrec(tcur, level);
+	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+	if (error = xfs_bmbt_increment(tcur, level, &i)) {
+		XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
+		goto error1;
+	}
+	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+	if (error = xfs_bmbt_updkey(tcur, rkp, level + 1)) {
+		XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
+		goto error1;
+	}
+	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = 1;
+	return 0;
+error0:
+	XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+error1:
+	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Determine the extent state.
+ */
+/* ARGSUSED */
+STATIC xfs_exntst_t
+xfs_extent_state(
+	xfs_filblks_t		blks,
+	int			extent_flag)
+{
+	if (extent_flag) {
+		ASSERT(blks != 0);	/* saved for DMIG */
+		return XFS_EXT_UNWRITTEN;
+	}
+	return XFS_EXT_NORM;
+}
+
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and its first record (to be inserted into parent).
+ */
+STATIC int					/* error */
+xfs_bmbt_split(
+	xfs_btree_cur_t		*cur,
+	int			level,
+	xfs_fsblock_t		*bnop,
+	xfs_bmbt_key_t		*keyp,
+	xfs_btree_cur_t		**curp,
+	int			*stat)		/* success/failure */
+{
+	xfs_alloc_arg_t		args;		/* block allocation args */
+	int			error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_split";
+#endif
+	int			i;		/* loop counter */
+	xfs_fsblock_t		lbno;		/* left sibling block number */
+	xfs_buf_t			*lbp;		/* left buffer pointer */
+	xfs_bmbt_block_t	*left;		/* left btree block */
+	xfs_bmbt_key_t		*lkp;		/* left btree key */
+	xfs_bmbt_ptr_t		*lpp;		/* left address pointer */
+	xfs_bmbt_rec_t		*lrp;		/* left record pointer */
+	xfs_buf_t			*rbp;		/* right buffer pointer */
+	xfs_bmbt_block_t	*right;		/* right btree block */
+	xfs_bmbt_key_t		*rkp;		/* right btree key */
+	xfs_bmbt_ptr_t		*rpp;		/* right address pointer */
+	xfs_bmbt_block_t	*rrblock;	/* right-right btree block */
+	xfs_buf_t			*rrbp;		/* right-right buffer pointer */
+	xfs_bmbt_rec_t		*rrp;		/* right record pointer */
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, keyp);
+	args.tp = cur->bc_tp;
+	args.mp = cur->bc_mp;
+	lbp = cur->bc_bufs[level];
+	lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
+	left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+	args.fsbno = cur->bc_private.b.firstblock;
+	if (args.fsbno == NULLFSBLOCK) {
+		args.fsbno = lbno;
+		args.type = XFS_ALLOCTYPE_START_BNO;
+	} else if (cur->bc_private.b.flist->xbf_low)
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
+	else
+		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+	args.mod = args.minleft = args.alignment = args.total = args.isfl =
+		args.userdata = args.minalignslop = 0;
+	args.minlen = args.maxlen = args.prod = 1;
+	args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+	if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return XFS_ERROR(ENOSPC);
+	}
+	if (error = xfs_alloc_vextent(&args)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	if (args.fsbno == NULLFSBLOCK) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	ASSERT(args.len == 1);
+	cur->bc_private.b.firstblock = args.fsbno;
+	cur->bc_private.b.allocated++;
+	cur->bc_private.b.ip->i_d.di_nblocks++;
+	xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
+	if (XFS_IS_QUOTA_ON(args.mp) &&
+	    cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_uquotino &&
+	    cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_pquotino)
+		xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
+			XFS_TRANS_DQ_BCOUNT, 1L);
+	rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0);
+	right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_lblock(cur, left, level, rbp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+#endif
+	INT_SET(right->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+	right->bb_level = left->bb_level; /* INT_: direct copy */
+	INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
+	if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
+	    cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
+		INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+	i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+	if (level > 0) {
+		lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
+		lpp = XFS_BMAP_PTR_IADDR(left, i, cur);
+		rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
+		rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
+#ifdef DEBUG
+		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_lptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)) {
+				XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+				return error;
+			}
+		}
+#endif
+		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT);
+	} else {
+		lrp = XFS_BMAP_REC_IADDR(left, i, cur);
+		rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
+		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		keyp->br_startoff = xfs_bmbt_get_startoff(rrp);
+	}
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
+	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
+	INT_SET(left->bb_rightsib, ARCH_CONVERT, args.fsbno);
+	INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+	xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS);
+	xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+	if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+		if (error = xfs_btree_read_bufl(args.mp, args.tp,
+				INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
+				XFS_BMAP_BTREE_REF)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
+		if (error = xfs_btree_check_lblock(cur, rrblock, level, rrbp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.fsbno);
+		xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+	}
+	if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+		xfs_btree_setbuf(cur, level, rbp);
+		cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+	}
+	if (level + 1 < cur->bc_nlevels) {
+		if (error = xfs_btree_dup_cursor(cur, curp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		(*curp)->bc_ptrs[level + 1]++;
+	}
+	*bnop = args.fsbno;
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Update keys for the record.
+ */
+STATIC int
+xfs_bmbt_updkey(
+	xfs_btree_cur_t		*cur,
+	xfs_bmbt_key_t		*keyp,	/* on-disk format */
+	int			level)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_buf_t			*bp;
+#ifdef DEBUG
+	int			error;
+#endif
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_updkey";
+#endif
+	xfs_bmbt_key_t		*kp;
+	int			ptr;
+
+	ASSERT(level >= 1);
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGIK(cur, level, keyp);
+	for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+		block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+#endif
+		ptr = cur->bc_ptrs[level];
+		kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
+		*kp = *keyp;
+		xfs_bmbt_log_keys(cur, bp, ptr, ptr);
+	}
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	return 0;
+}
+
+/*
+ * Convert on-disk form of btree root to in-memory form.
+ */
+void
+xfs_bmdr_to_bmbt(
+	xfs_bmdr_block_t	*dblock,
+	int			dblocklen,
+	xfs_bmbt_block_t	*rblock,
+	int			rblocklen)
+{
+	int			dmxr;
+	xfs_bmbt_key_t		*fkp;
+	xfs_bmbt_ptr_t		*fpp;
+	xfs_bmbt_key_t		*tkp;
+	xfs_bmbt_ptr_t		*tpp;
+
+	INT_SET(rblock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+	rblock->bb_level = dblock->bb_level;	/* both in on-disk format */
+	ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) > 0);
+	rblock->bb_numrecs = dblock->bb_numrecs;/* both in on-disk format */
+	INT_SET(rblock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
+	INT_SET(rblock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+	dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
+	fkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+	tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
+	fpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+	tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
+	dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
+	bcopy(fkp, tkp, sizeof(*fkp) * dmxr);
+	bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+}
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int						/* error */
+xfs_bmbt_decrement(
+	xfs_btree_cur_t		*cur,
+	int			level,
+	int			*stat)		/* success/failure */
+{
+	xfs_bmbt_block_t	*block;
+	xfs_buf_t			*bp;
+	int			error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_decrement";
+#endif
+	xfs_fsblock_t		fsbno;
+	int			lev;
+	xfs_mount_t		*mp;
+	xfs_trans_t		*tp;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGI(cur, level);
+	ASSERT(level < cur->bc_nlevels);
+	if (level < cur->bc_nlevels - 1)
+		xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+	if (--cur->bc_ptrs[level] > 0) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 1;
+		return 0;
+	}
+	block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+#endif
+	if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+		if (--cur->bc_ptrs[lev] > 0)
+			break;
+		if (lev < cur->bc_nlevels - 1)
+			xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+	}
+	if (lev == cur->bc_nlevels) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	tp = cur->bc_tp;
+	mp = cur->bc_mp;
+	for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
+		fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		if (error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
+				XFS_BMAP_BTREE_REF)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		lev--;
+		xfs_btree_setbuf(cur, lev, bp);
+		block = XFS_BUF_TO_BMBT_BLOCK(bp);
+		if (error = xfs_btree_check_lblock(cur, block, lev, bp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	}
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Delete the record pointed to by cur.
+ */
+int					/* error */
+xfs_bmbt_delete(
+	xfs_btree_cur_t	*cur,
+	int		async,		/* deletion can be async */
+	int		*stat)		/* success/failure */
+{
+	int		error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char	fname[] = "xfs_bmbt_delete";
+#endif
+	int		i;
+	int		level;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	for (level = 0, i = 2; i == 2; level++) {
+		if (error = xfs_bmbt_delrec(cur, level, async, &i)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+	}
+	if (i == 0) {
+		for (level = 1; level < cur->bc_nlevels; level++) {
+			if (cur->bc_ptrs[level] == 0) {
+				if (error = xfs_bmbt_decrement(cur, level,
+						&i)) {
+					XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+					return error;
+				}
+				break;
+			}
+		}
+	}
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = i;
+	return 0;
+}
+
+/*
+ * Convert a compressed bmap extent record to an uncompressed form.
+ * This code must be in sync with the routines xfs_bmbt_get_startoff,
+ * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
+ */
+void
+xfs_bmbt_get_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t	*s)
+{
+	int	ext_flag;
+	xfs_exntst_t st;
+
+#if BMBT_USE_64
+	ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN));
+#if XFS_BIG_FILES
+	s->br_startoff = ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+			   XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+#else	/* !XFS_BIG_FILES */
+	{
+		xfs_dfiloff_t	o;
+
+		o = ((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) &
+		      XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+		ASSERT((o >> 32) == 0);
+		s->br_startoff = (xfs_fileoff_t)o;
+	}
+#endif	/* XFS_BIG_FILES */
+#if XFS_BIG_FILESYSTEMS
+	s->br_startblock = (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) | 
+			   (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+#else
+#ifdef DEBUG
+	{
+		xfs_dfsbno_t	b;
+
+		b = (((xfs_dfsbno_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) | 
+		    (((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+		ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+		s->br_startblock = (xfs_fsblock_t)b;
+	}
+#else	/* !DEBUG */
+	s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+#endif	/* DEBUG */
+#endif	/* XFS_BIG_FILESYSTEMS */
+	s->br_blockcount = (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21));
+#else	/* !BMBT_USE_64 */
+	ext_flag = (INT_GET(r->l0, ARCH_CONVERT) >> (32 - BMBT_EXNTFLAG_BITLEN));
+#if XFS_BIG_FILES
+	s->br_startoff = (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+			    XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+			 (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+#else	/* !XFS_BIG_FILES */
+#ifdef DEBUG
+	{
+		xfs_dfiloff_t	o;
+
+		o = (((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) &
+		       XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+		    (((xfs_dfiloff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+		ASSERT((o >> 32) == 0);
+		s->br_startoff = (xfs_fileoff_t)o;
+	}
+#else	/* !DEBUG */
+	s->br_startoff = (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+			    XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+			 (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+#endif	/* DEBUG */
+#endif	/* XFS_BIG_FILES */
+#if XFS_BIG_FILESYSTEMS
+	s->br_startblock =
+		(((xfs_fsblock_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | 
+		(((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+		(((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+#else
+#ifdef DEBUG
+	{
+		xfs_dfsbno_t	b;
+
+		b = (((xfs_dfsbno_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | 
+		    (((xfs_dfsbno_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+		    (((xfs_dfsbno_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+		ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+		s->br_startblock = (xfs_fsblock_t)b;
+	}
+#else	/* !DEBUG */
+	s->br_startblock = (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+			   (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+#endif	/* DEBUG */
+#endif	/* XFS_BIG_FILESYSTEMS */
+	s->br_blockcount = (xfs_filblks_t)(INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21));
+#endif	/* BMBT_USE_64 */
+	/* This is xfs_extent_state() in-line */
+	if (ext_flag) {
+		ASSERT(s->br_blockcount != 0);	/* saved for DMIG */
+		st = XFS_EXT_UNWRITTEN;
+	} else
+		st = XFS_EXT_NORM;
+	s->br_state = st;
+}
+
+/*
+ * Get the block pointer for the given level of the cursor.
+ * Fill in the buffer pointer, if applicable.
+ */
+xfs_bmbt_block_t *
+xfs_bmbt_get_block(
+	xfs_btree_cur_t		*cur,
+	int			level,
+	xfs_buf_t			**bpp)
+{
+	xfs_ifork_t		*ifp;
+	xfs_bmbt_block_t	*rval;
+
+	if (level < cur->bc_nlevels - 1) {
+		*bpp = cur->bc_bufs[level];
+		rval = XFS_BUF_TO_BMBT_BLOCK(*bpp);
+	} else {
+		*bpp = 0;
+		ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
+			cur->bc_private.b.whichfork);
+		rval = ifp->if_broot;
+	}
+	return rval;
+}
+
+/*
+ * Extract the blockcount field from a bmap extent record.
+ */
+xfs_filblks_t
+xfs_bmbt_get_blockcount(
+	xfs_bmbt_rec_t	*r)
+{
+#if BMBT_USE_64
+	return (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21));
+#else	/* !BMBT_USE_64 */
+	return (xfs_filblks_t)(INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21));
+#endif	/* BMBT_USE_64 */
+}
+
+/*
+ * Extract the startblock field from a bmap extent record.
+ */
+xfs_fsblock_t
+xfs_bmbt_get_startblock(
+	xfs_bmbt_rec_t	*r)
+{
+#if BMBT_USE_64
+#if XFS_BIG_FILESYSTEMS
+	return (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
+	       (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+#else
+#ifdef DEBUG
+	xfs_dfsbno_t	b;
+
+	b = (((xfs_dfsbno_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
+	    (((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+	ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+	return (xfs_fsblock_t)b;
+#else	/* !DEBUG */
+	return (xfs_fsblock_t)(((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+#endif	/* DEBUG */
+#endif	/* XFS_BIG_FILESYSTEMS */
+#else	/* !BMBT_USE_64 */
+#if XFS_BIG_FILESYSTEMS
+	return (((xfs_fsblock_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | 
+	       (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+	       (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+#else
+#ifdef DEBUG
+	xfs_dfsbno_t	b;
+
+	b = (((xfs_dfsbno_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | 
+	    (((xfs_dfsbno_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+	    (((xfs_dfsbno_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+	ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+	return (xfs_fsblock_t)b;
+#else	/* !DEBUG */
+	return (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+	       (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+#endif	/* DEBUG */
+#endif	/* XFS_BIG_FILESYSTEMS */
+#endif	/* BMBT_USE_64 */
+}
+
+/*
+ * Extract the startoff field from a bmap extent record.
+ */
+xfs_fileoff_t
+xfs_bmbt_get_startoff(
+	xfs_bmbt_rec_t	*r)
+{
+#if BMBT_USE_64
+#if XFS_BIG_FILES
+	return ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+		 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+#else	/* !XFS_BIG_FILES */
+	xfs_dfiloff_t	o;
+
+	o = ((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) &
+	      XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+	ASSERT((o >> 32) == 0);
+	return (xfs_fileoff_t)o;
+#endif	/* XFS_BIG_FILES */
+#else	/* !BMBT_USE_64 */
+#if XFS_BIG_FILES
+	return (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+		  XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+	       (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+#else	/* !XFS_BIG_FILES */
+#ifdef DEBUG
+	xfs_dfiloff_t	o;
+
+	o = (((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) &
+	       XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+	    (((xfs_dfiloff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+	ASSERT((o >> 32) == 0);
+	return (xfs_fileoff_t)o;
+#else	/* !DEBUG */
+	return (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+		  XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+	       (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+#endif	/* DEBUG */
+#endif	/* XFS_BIG_FILES */
+#endif	/* BMBT_USE_64 */
+}
+
+xfs_exntst_t
+xfs_bmbt_get_state(
+	xfs_bmbt_rec_t  *r)
+{
+	int	ext_flag;
+
+#if BMBT_USE_64
+	ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN));
+#else	/* !BMBT_USE_64 */
+	ext_flag = (INT_GET(r->l0, ARCH_CONVERT) >> (32 - BMBT_EXNTFLAG_BITLEN));
+#endif	/* BMBT_USE_64 */
+	return xfs_extent_state(xfs_bmbt_get_blockcount(r),
+				ext_flag);
+}
+
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int						/* error */
+xfs_bmbt_increment(
+	xfs_btree_cur_t		*cur,
+	int			level,
+	int			*stat)		/* success/failure */
+{
+	xfs_bmbt_block_t	*block;
+	xfs_buf_t			*bp;
+	int			error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_increment";
+#endif
+	xfs_fsblock_t		fsbno;
+	int			lev;
+	xfs_mount_t		*mp;
+	xfs_trans_t		*tp;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGI(cur, level);
+	ASSERT(level < cur->bc_nlevels);
+	if (level < cur->bc_nlevels - 1)
+		xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+	block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+#endif
+	if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 1;
+		return 0;
+	}
+	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+		block = xfs_bmbt_get_block(cur, lev, &bp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_lblock(cur, block, lev, bp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+#endif
+		if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+			break;
+		if (lev < cur->bc_nlevels - 1)
+			xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+	}
+	if (lev == cur->bc_nlevels) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	tp = cur->bc_tp;
+	mp = cur->bc_mp;
+	for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
+		fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		if (error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
+				XFS_BMAP_BTREE_REF)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		lev--;
+		xfs_btree_setbuf(cur, lev, bp);
+		block = XFS_BUF_TO_BMBT_BLOCK(bp);
+		if (error = xfs_btree_check_lblock(cur, block, lev, bp)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		cur->bc_ptrs[lev] = 1;
+	}
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Insert the current record at the point referenced by cur.
+ */
+int					/* error */
+xfs_bmbt_insert(
+	xfs_btree_cur_t	*cur,
+	int		*stat)		/* success/failure */
+{
+	int		error;		/* error return value */
+#ifdef XFS_BMBT_TRACE
+	static char	fname[] = "xfs_bmbt_insert";
+#endif
+	int		i;
+	int		level;
+	xfs_fsblock_t	nbno;
+	xfs_btree_cur_t	*ncur;
+	xfs_bmbt_rec_t	nrec;
+	xfs_btree_cur_t	*pcur;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	level = 0;
+	nbno = NULLFSBLOCK;
+	xfs_bmbt_set_all(&nrec, &cur->bc_rec.b);
+	ncur = (xfs_btree_cur_t *)0;
+	pcur = cur;
+	do {
+		if (error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
+				&i)) {
+			if (pcur != cur)
+				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
+			cur->bc_nlevels = pcur->bc_nlevels;
+			cur->bc_private.b.allocated +=
+				pcur->bc_private.b.allocated;
+			pcur->bc_private.b.allocated = 0;
+			ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) ||
+			       (cur->bc_private.b.ip->i_d.di_flags & 
+				XFS_DIFLAG_REALTIME));
+			cur->bc_private.b.firstblock =
+				pcur->bc_private.b.firstblock;
+			ASSERT(cur->bc_private.b.flist ==
+			       pcur->bc_private.b.flist);
+			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+		}
+		if (ncur) {
+			pcur = ncur;
+			ncur = (xfs_btree_cur_t *)0;
+		}
+	} while (nbno != NULLFSBLOCK);
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*stat = i;
+	return 0;
+error0:
+	XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+	return error;
+}
+
+/*
+ * Log fields from the btree block header.
+ */
+void
+xfs_bmbt_log_block(
+	xfs_btree_cur_t		*cur,
+	xfs_buf_t			*bp,
+	int			fields)
+{
+	int			first;
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_log_block";
+#endif
+	int			last;
+	xfs_trans_t		*tp;
+	static const short	offsets[] = {
+		offsetof(xfs_bmbt_block_t, bb_magic),
+		offsetof(xfs_bmbt_block_t, bb_level),
+		offsetof(xfs_bmbt_block_t, bb_numrecs),
+		offsetof(xfs_bmbt_block_t, bb_leftsib),
+		offsetof(xfs_bmbt_block_t, bb_rightsib),
+		sizeof(xfs_bmbt_block_t)
+	};
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGBI(cur, bp, fields);
+	tp = cur->bc_tp;
+	if (bp) {
+		xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first,
+				  &last);
+		xfs_trans_log_buf(tp, bp, first, last);
+	} else
+		xfs_trans_log_inode(tp, cur->bc_private.b.ip,
+			XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+}
+
+/*
+ * Log record values from the btree block.
+ */
+void
+xfs_bmbt_log_recs(
+	xfs_btree_cur_t		*cur,
+	xfs_buf_t			*bp,
+	int			rfirst,
+	int			rlast)
+{
+	xfs_bmbt_block_t	*block;
+	int			first;
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_log_recs";
+#endif
+	int			last;
+	xfs_bmbt_rec_t		*rp;
+	xfs_trans_t		*tp;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast);
+	ASSERT(bp);
+	tp = cur->bc_tp;
+	block = XFS_BUF_TO_BMBT_BLOCK(bp);
+	rp = XFS_BMAP_REC_DADDR(block, 1, cur);
+	first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
+	last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
+	xfs_trans_log_buf(tp, bp, first, last);
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+}
+
+int					/* error */
+xfs_bmbt_lookup_eq(
+	xfs_btree_cur_t	*cur,
+	xfs_fileoff_t	off,
+	xfs_fsblock_t	bno,
+	xfs_filblks_t	len,
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.b.br_startoff = off;
+	cur->bc_rec.b.br_startblock = bno;
+	cur->bc_rec.b.br_blockcount = len;
+	return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+int					/* error */
+xfs_bmbt_lookup_ge(
+	xfs_btree_cur_t	*cur,
+	xfs_fileoff_t	off,
+	xfs_fsblock_t	bno,
+	xfs_filblks_t	len,
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.b.br_startoff = off;
+	cur->bc_rec.b.br_startblock = bno;
+	cur->bc_rec.b.br_blockcount = len;
+	return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+int					/* error */
+xfs_bmbt_lookup_le(
+	xfs_btree_cur_t	*cur,
+	xfs_fileoff_t	off,
+	xfs_fsblock_t	bno,
+	xfs_filblks_t	len,
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.b.br_startoff = off;
+	cur->bc_rec.b.br_startblock = bno;
+	cur->bc_rec.b.br_blockcount = len;
+	return xfs_bmbt_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Give the bmap btree a new root block.  Copy the old broot contents
+ * down into a real block and make the broot point to it.
+ */
+int						/* error */
+xfs_bmbt_newroot(
+	xfs_btree_cur_t		*cur,		/* btree cursor */
+	int			*logflags,	/* logging flags for inode */
+	int			*stat)		/* return status - 0 fail */
+{
+	xfs_alloc_arg_t		args;		/* allocation arguments */
+	xfs_bmbt_block_t	*block;		/* bmap btree block */
+	xfs_buf_t			*bp;		/* buffer for block */
+	xfs_bmbt_block_t	*cblock;	/* child btree block */
+	xfs_bmbt_key_t		*ckp;		/* child key pointer */
+	xfs_bmbt_ptr_t		*cpp;		/* child ptr pointer */
+	int			error;		/* error return code */
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_newroot";
+#endif
+#ifdef DEBUG
+	int			i;		/* loop counter */
+#endif
+	xfs_bmbt_key_t		*kp;		/* pointer to bmap btree key */
+	int			level;		/* btree level */
+	xfs_bmbt_ptr_t		*pp;		/* pointer to bmap block addr */
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	level = cur->bc_nlevels - 1;
+	block = xfs_bmbt_get_block(cur, level, &bp);
+	/*
+	 * Copy the root into a real block.
+	 */
+	args.mp = cur->bc_mp;
+	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+	args.tp = cur->bc_tp;
+	args.fsbno = cur->bc_private.b.firstblock;
+	args.mod = args.minleft = args.alignment = args.total = args.isfl =
+		args.userdata = args.minalignslop = 0;
+	args.minlen = args.maxlen = args.prod = 1;
+	args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+	if (args.fsbno == NULLFSBLOCK) {
+#ifdef DEBUG
+		if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+#endif
+		args.fsbno = INT_GET(*pp, ARCH_CONVERT);
+		args.type = XFS_ALLOCTYPE_START_BNO;
+	} else if (args.wasdel)
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
+	else
+		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+	if (error = xfs_alloc_vextent(&args)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	if (args.fsbno == NULLFSBLOCK) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		*stat = 0;
+		return 0;
+	}
+	ASSERT(args.len == 1);
+	cur->bc_private.b.firstblock = args.fsbno;
+	cur->bc_private.b.allocated++;
+	cur->bc_private.b.ip->i_d.di_nblocks++;
+	if (XFS_IS_QUOTA_ON(args.mp) &&
+	    cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_uquotino &&
+	    cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_pquotino)
+		xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
+					  XFS_TRANS_DQ_BCOUNT, 1L);
+	bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
+	cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
+	*cblock = *block;
+	INT_MOD(block->bb_level, ARCH_CONVERT, +1);
+	INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+	cur->bc_nlevels++;
+	cur->bc_ptrs[level + 1] = 1;
+	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+	ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
+	bcopy(kp, ckp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+	cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
+#ifdef DEBUG
+	for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) {
+		if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+	}
+#endif
+	bcopy(pp, cpp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+#ifdef DEBUG
+	if (error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno,
+			level)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+#endif
+	INT_SET(*pp, ARCH_CONVERT, args.fsbno);
+	xfs_iroot_realloc(cur->bc_private.b.ip, 1 - INT_GET(cblock->bb_numrecs, ARCH_CONVERT),
+		cur->bc_private.b.whichfork);
+	xfs_btree_setbuf(cur, level, bp);
+	/*
+	 * Do all this logging at the end so that 
+	 * the root is at the right level.
+	 */
+	xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS);
+	xfs_bmbt_log_keys(cur, bp, 1, INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
+	xfs_bmbt_log_ptrs(cur, bp, 1, INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	*logflags |=
+		XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Set all the fields in a bmap extent record from the uncompressed form.
+ */
+void
+xfs_bmbt_set_all(
+	xfs_bmbt_rec_t	*r,
+	xfs_bmbt_irec_t	*s)
+{
+	int	extent_flag;
+
+	ASSERT((s->br_state == XFS_EXT_NORM) ||
+		(s->br_state == XFS_EXT_UNWRITTEN));
+	extent_flag = (s->br_state == XFS_EXT_NORM) ? 0 : 1;
+#if XFS_BIG_FILES
+	ASSERT((s->br_startoff & XFS_MASK64HI(9)) == 0);
+	ASSERT((s->br_blockcount & XFS_MASK64HI(43)) == 0);
+#else	/* !XFS_BIG_FILES */
+	ASSERT((s->br_blockcount & XFS_MASK32HI(11)) == 0);
+#endif	/* XFS_BIG_FILES */
+#if XFS_BIG_FILESYSTEMS
+	ASSERT((s->br_startblock & XFS_MASK64HI(12)) == 0);
+#endif	/* XFS_BIG_FILESYSTEMS */
+#if BMBT_USE_64
+#if XFS_BIG_FILESYSTEMS
+	INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | 
+		  ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
+		  ((xfs_bmbt_rec_base_t)s->br_startblock >> 43));
+	INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | 
+		  ((xfs_bmbt_rec_base_t)s->br_blockcount &
+		   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(s->br_startblock)) {
+		INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
+			  (xfs_bmbt_rec_base_t)XFS_MASK64LO(9));
+		INT_SET(r->l1, ARCH_CONVERT, XFS_MASK64HI(11) |
+			  ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
+			  ((xfs_bmbt_rec_base_t)s->br_blockcount &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+	} else {
+		INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)s->br_startoff << 9));
+		INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | 
+			  ((xfs_bmbt_rec_base_t)s->br_blockcount &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+#else	/* !BMBT_USE_64 */
+	INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 31) |
+		((xfs_bmbt_rec_base_t)(s->br_startoff >> 23)));
+	INT_SET(r->l3, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)s->br_startblock) << 21) |
+		  ((xfs_bmbt_rec_base_t)(s->br_blockcount & XFS_MASK32LO(21))));
+#if XFS_BIG_FILESYSTEMS
+	INT_SET(r->l1, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)s->br_startoff) << 9) |
+		  ((xfs_bmbt_rec_base_t)(s->br_startblock >> 43)));
+	INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startblock >> 11));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(s->br_startblock)) {
+		INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startoff << 9) |
+			  (xfs_bmbt_rec_base_t)XFS_MASK32LO(9));
+		INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK32HI(11) |
+			  (xfs_bmbt_rec_base_t)(s->br_startblock >> 11));
+	} else {
+		INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startoff << 9));
+		INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startblock >> 11));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+#endif	/* BMBT_USE_64 */
+}
+
+/*
+ * Set all the fields in a bmap extent record from the arguments.
+ */
+void
+xfs_bmbt_set_allf(
+	xfs_bmbt_rec_t	*r,
+	xfs_fileoff_t	o,
+	xfs_fsblock_t	b,
+	xfs_filblks_t	c,
+	xfs_exntst_t	v)
+{
+	int	extent_flag;
+
+	ASSERT((v == XFS_EXT_NORM) || (v == XFS_EXT_UNWRITTEN));
+	extent_flag = (v == XFS_EXT_NORM) ? 0 : 1;
+#if XFS_BIG_FILES
+	ASSERT((o & XFS_MASK64HI(64-BMBT_STARTOFF_BITLEN)) == 0);
+	ASSERT((c & XFS_MASK64HI(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
+#else	/* !XFS_BIG_FILES */
+	ASSERT((c & XFS_MASK32HI(11)) == 0);
+#endif	/* XFS_BIG_FILES */
+#if XFS_BIG_FILESYSTEMS
+	ASSERT((b & XFS_MASK64HI(64-BMBT_STARTBLOCK_BITLEN)) == 0);
+#endif	/* XFS_BIG_FILESYSTEMS */
+#if BMBT_USE_64
+#if XFS_BIG_FILESYSTEMS
+	INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | 
+	        ((xfs_bmbt_rec_base_t)o << 9) |
+		((xfs_bmbt_rec_base_t)b >> 43));
+	INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)b << 21) | 
+		  ((xfs_bmbt_rec_base_t)c &
+		   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(b)) {
+		INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)o << 9) |
+			 (xfs_bmbt_rec_base_t)XFS_MASK64LO(9));
+		INT_SET(r->l1, ARCH_CONVERT, XFS_MASK64HI(11) |
+			  ((xfs_bmbt_rec_base_t)b << 21) |
+			  ((xfs_bmbt_rec_base_t)c &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+	} else {
+		INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+			((xfs_bmbt_rec_base_t)o << 9));
+		INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)b << 21) | 
+			  ((xfs_bmbt_rec_base_t)c &
+			   (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+#else	/* !BMBT_USE_64 */
+	INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 31) |
+		((xfs_bmbt_rec_base_t)(o >> 23)));
+	INT_SET(r->l3, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)b) << 21) |
+		  ((xfs_bmbt_rec_base_t)(c & XFS_MASK32LO(21))));
+#if XFS_BIG_FILESYSTEMS
+	INT_SET(r->l1, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)o) << 9) |
+		  ((xfs_bmbt_rec_base_t)(b >> 43)));
+	INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(b >> 11));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(b)) {
+		INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(o << 9) |
+			  (xfs_bmbt_rec_base_t)XFS_MASK32LO(9));
+		INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK32HI(11) |
+			  (xfs_bmbt_rec_base_t)(b >> 11));
+	} else {
+		INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(o << 9));
+		INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(b >> 11));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+#endif	/* BMBT_USE_64 */
+}
+
+/*
+ * Set the blockcount field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_blockcount(
+	xfs_bmbt_rec_t	*r,
+	xfs_filblks_t	v)
+{
+#if XFS_BIG_FILES
+	ASSERT((v & XFS_MASK64HI(43)) == 0);
+#else	/* !XFS_BIG_FILES */
+	ASSERT((v & XFS_MASK32HI(11)) == 0);
+#endif
+#if BMBT_USE_64
+	INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(43)) |
+		  (xfs_bmbt_rec_base_t)(v & XFS_MASK64LO(21)));
+#else	/* !BMBT_USE_64 */
+	INT_SET(r->l3, ARCH_CONVERT, (INT_GET(r->l3, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK32HI(11)) |
+		  ((xfs_bmbt_rec_base_t)v & XFS_MASK32LO(21)));
+#endif	/* BMBT_USE_64 */
+}
+
+/*
+ * Set the startblock field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_startblock(
+	xfs_bmbt_rec_t	*r,
+	xfs_fsblock_t	v)
+{
+#if XFS_BIG_FILESYSTEMS
+	ASSERT((v & XFS_MASK64HI(12)) == 0);
+#endif	/* XFS_BIG_FILESYSTEMS */
+#if BMBT_USE_64
+#if XFS_BIG_FILESYSTEMS
+	INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(55)) |
+		  (xfs_bmbt_rec_base_t)(v >> 43));
+	INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)) |
+		  (xfs_bmbt_rec_base_t)(v << 21));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(v)) {
+		INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) | (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
+		INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK64HI(11) |
+			  ((xfs_bmbt_rec_base_t)v << 21) |
+			  (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+	} else {
+		INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & ~(xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
+		INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)v << 21) |
+			  (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+#else	/* !BMBT_USE_64 */
+#if XFS_BIG_FILESYSTEMS
+	INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32HI(23)) | (xfs_bmbt_rec_base_t)(v >> 43));
+	INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(v >> 11));
+#else	/* !XFS_BIG_FILESYSTEMS */
+	if (ISNULLSTARTBLOCK(v)) {
+		INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) | XFS_MASK32LO(9)));
+		INT_SET(r->l2, ARCH_CONVERT, XFS_MASK32HI(11) | (xfs_bmbt_rec_base_t)(v >> 11));
+	} else {
+		INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & ~XFS_MASK32LO(9)));
+		INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(v >> 11));
+	}
+#endif	/* XFS_BIG_FILESYSTEMS */
+	INT_SET(r->l3, ARCH_CONVERT, (INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21)) |
+		  (((xfs_bmbt_rec_base_t)v) << 21));
+#endif	/* BMBT_USE_64 */
+}
+
+/*
+ * Set the startoff field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_startoff(
+	xfs_bmbt_rec_t	*r,
+	xfs_fileoff_t	v)
+{
+#if XFS_BIG_FILES
+	ASSERT((v & XFS_MASK64HI(9)) == 0);
+#endif	/* XFS_BIG_FILES */
+#if BMBT_USE_64
+	INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t) XFS_MASK64HI(1)) |
+		((xfs_bmbt_rec_base_t)v << 9) |
+		  (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
+#else	/* !BMBT_USE_64 */
+	INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t) XFS_MASK32HI(1)) |
+		(xfs_bmbt_rec_base_t)(v >> 23));
+	INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)v << 9) |
+		  (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK32LO(9)));
+#endif	/* BMBT_USE_64 */
+}
+
+/*
+ * Set the extent state field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_state(
+	xfs_bmbt_rec_t	*r,
+	xfs_exntst_t	v)
+{
+	ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
+	if (v == XFS_EXT_NORM)
+#if BMBT_USE_64
+		INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN));
+#else	/* !BMBT_USE_64 */
+		INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN));
+#endif	/* BMBT_USE_64 */
+	else
+#if BMBT_USE_64
+		INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) | XFS_MASK64HI(BMBT_EXNTFLAG_BITLEN));
+#else	/* !BMBT_USE_64 */
+		INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) | XFS_MASK32HI(BMBT_EXNTFLAG_BITLEN));
+#endif	/* BMBT_USE_64 */
+}
+
+/*
+ * Convert in-memory form of btree root to on-disk form.
+ */
+void
+xfs_bmbt_to_bmdr(
+	xfs_bmbt_block_t	*rblock,
+	int			rblocklen,
+	xfs_bmdr_block_t	*dblock,
+	int			dblocklen)
+{
+	int			dmxr;
+	xfs_bmbt_key_t		*fkp;
+	xfs_bmbt_ptr_t		*fpp;
+	xfs_bmbt_key_t		*tkp;
+	xfs_bmbt_ptr_t		*tpp;
+
+	ASSERT(INT_GET(rblock->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC);
+	ASSERT(INT_GET(rblock->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO);
+	ASSERT(INT_GET(rblock->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO);
+	ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) > 0);
+	dblock->bb_level = rblock->bb_level;	/* both in on-disk format */
+	dblock->bb_numrecs = rblock->bb_numrecs;/* both in on-disk format */
+	dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
+	fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
+	tkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+	fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
+	tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+	dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
+	bcopy(fkp, tkp, sizeof(*fkp) * dmxr);
+	bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+}
+
+/*
+ * Update the record to the passed values.
+ */
+int
+xfs_bmbt_update(
+	xfs_btree_cur_t		*cur,
+	xfs_fileoff_t		off,
+	xfs_fsblock_t		bno,
+	xfs_filblks_t		len,
+	xfs_exntst_t		state)
+{
+	xfs_bmbt_block_t	*block;
+	xfs_buf_t			*bp;
+	int			error;
+#ifdef XFS_BMBT_TRACE
+	static char		fname[] = "xfs_bmbt_update";
+#endif
+	xfs_bmbt_key_t		key;
+	int			ptr;
+	xfs_bmbt_rec_t		*rp;
+
+	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+	XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno,
+		(xfs_dfilblks_t)len, (int)state);
+	block = xfs_bmbt_get_block(cur, 0, &bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_lblock(cur, block, 0, bp)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+#endif
+	ptr = cur->bc_ptrs[0];
+	rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
+	xfs_bmbt_set_allf(rp, off, bno, len, state);
+	xfs_bmbt_log_recs(cur, bp, ptr, ptr);
+	if (ptr > 1) {
+		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+		return 0;
+	}
+	INT_SET(key.br_startoff, ARCH_CONVERT, off);
+	if (error = xfs_bmbt_updkey(cur, &key, 1)) {
+		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+		return error;
+	}
+	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+	return 0;
+}
+
+/*
+ * Check an extent list, which has just been read, for
+ * any bit in the extent flag field. ASSERT on debug
+ * kernels, as this condition should not occur.
+ * Return an error condition (1) if any flags found,
+ * otherwise return 0.
+ */
+int
+xfs_check_nostate_extents(
+	xfs_bmbt_rec_t		*ep,
+	xfs_extnum_t		num)
+{
+	for (; num > 0; num--, ep++) {
+		if (
+#if BMBT_USE_64
+			((INT_GET(ep->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN)) != 0
+#else	/* !BMBT_USE_64 */
+			((INT_GET(ep->l0, ARCH_CONVERT)) >> (32 - BMBT_EXNTFLAG_BITLEN)) != 0
+#endif	/* BMBT_USE_64 */
+		) {
+			ASSERT(0);
+			return 1;
+		}
+	}
+	return 0;
+}
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
new file mode 100644
index 000000000..73cdd9ccd
--- /dev/null
+++ b/libxfs/xfs_btree.c
@@ -0,0 +1,889 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * This file contains common code for the space manager's btree implementations.
+ */
+
+#include <xfs.h>
+
+/*
+ * Cursor allocation zone.
+ */
+xfs_zone_t	*xfs_btree_cur_zone;
+
+/*
+ * Btree magic numbers.
+ */
+const __uint32_t xfs_magics[XFS_BTNUM_MAX] =
+{
+	XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
+};
+
+/* 
+ * Prototypes for internal routines.
+ */
+
+/*
+ * Checking routine: return maxrecs for the block.
+ */
+STATIC int				/* number of records fitting in block */
+xfs_btree_maxrecs(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_btree_block_t	*block);/* generic btree block pointer */
+
+/*
+ * Internal routines.
+ */
+
+/*
+ * Checking routine: return maxrecs for the block.
+ */
+STATIC int				/* number of records fitting in block */
+xfs_btree_maxrecs(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_btree_block_t	*block)	/* generic btree block pointer */
+{
+	switch (cur->bc_btnum) {
+	case XFS_BTNUM_BNO:
+	case XFS_BTNUM_CNT:
+		return (int)XFS_ALLOC_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+	case XFS_BTNUM_BMAP:
+		return (int)XFS_BMAP_BLOCK_IMAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+	case XFS_BTNUM_INO:
+		return (int)XFS_INOBT_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
+/*
+ * External routines.
+ */
+
+#ifdef DEBUG
+/*
+ * Debug routine: check that block header is ok.
+ */
+void
+xfs_btree_check_block(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_btree_block_t	*block,	/* generic btree block pointer */
+	int			level,	/* level of the btree block */
+	xfs_buf_t		*bp)	/* buffer containing block, if any */
+{
+	if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
+		xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
+			bp);
+	else
+		xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
+			bp);
+}
+
+/*
+ * Debug routine: check that keys are in the right order.
+ */
+void
+xfs_btree_check_key(
+	xfs_btnum_t	btnum,		/* btree identifier */
+	void		*ak1,		/* pointer to left (lower) key */
+	void		*ak2)		/* pointer to right (higher) key */
+{
+	switch (btnum) {
+	case XFS_BTNUM_BNO: {
+		xfs_alloc_key_t	*k1;
+		xfs_alloc_key_t	*k2;
+
+		k1 = ak1;
+		k2 = ak2;
+		ASSERT(INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT));
+		break;
+	    }
+	case XFS_BTNUM_CNT: {
+		xfs_alloc_key_t	*k1;
+		xfs_alloc_key_t	*k2;
+
+		k1 = ak1;
+		k2 = ak2;
+		ASSERT(INT_GET(k1->ar_blockcount, ARCH_CONVERT) < INT_GET(k2->ar_blockcount, ARCH_CONVERT) ||
+		       (INT_GET(k1->ar_blockcount, ARCH_CONVERT) == INT_GET(k2->ar_blockcount, ARCH_CONVERT) &&
+			INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT)));
+		break;
+	    }
+	case XFS_BTNUM_BMAP: {
+		xfs_bmbt_key_t	*k1;
+		xfs_bmbt_key_t	*k2;
+
+		k1 = ak1; 
+		k2 = ak2;
+		ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT));
+		break;
+	    }
+	case XFS_BTNUM_INO: {
+		xfs_inobt_key_t	*k1;
+		xfs_inobt_key_t	*k2;
+
+		k1 = ak1;
+		k2 = ak2;
+		ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT));
+		break;
+	    }
+	default:
+		ASSERT(0);
+	}
+}
+#endif	/* DEBUG */
+
+/*
+ * Checking routine: check that long form block header is ok.
+ */
+/* ARGSUSED */
+int					/* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lblock(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_btree_lblock_t	*block,	/* btree long form block pointer */
+	int			level,	/* level of the btree block */
+	xfs_buf_t		*bp)	/* buffer for block, if any */
+{
+	int			lblock_ok; /* block passes checks */
+	xfs_mount_t		*mp;	/* file system mount point */
+
+	mp = cur->bc_mp;
+	lblock_ok =
+		INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
+		INT_GET(block->bb_level, ARCH_CONVERT) == level &&
+		INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+			xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
+		INT_GET(block->bb_leftsib, ARCH_CONVERT) != 0 &&
+		(INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO ||
+		 XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_leftsib, ARCH_CONVERT))) &&
+		INT_GET(block->bb_rightsib, ARCH_CONVERT) != 0 &&
+		(INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO ||
+		 XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_rightsib, ARCH_CONVERT)));
+	if (XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
+			XFS_RANDOM_BTREE_CHECK_LBLOCK)) {
+#pragma mips_frequency_hint NEVER
+		if (bp)
+			xfs_buftrace("LBTREE ERROR", bp);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	return 0;
+}
+
+/*
+ * Checking routine: check that (long) pointer is ok.
+ */
+int					/* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lptr(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	xfs_dfsbno_t	ptr,		/* btree block disk address */
+	int		level)		/* btree block level */
+{
+	xfs_mount_t	*mp;		/* file system mount point */
+
+	mp = cur->bc_mp;
+	XFS_WANT_CORRUPTED_RETURN(
+		level > 0 &&
+		ptr != NULLDFSBNO &&
+		XFS_FSB_SANITY_CHECK(mp, ptr));
+	return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Debug routine: check that records are in the right order.
+ */
+void
+xfs_btree_check_rec(
+	xfs_btnum_t	btnum,		/* btree identifier */
+	void		*ar1,		/* pointer to left (lower) record */
+	void		*ar2)		/* pointer to right (higher) record */
+{
+	switch (btnum) {
+	case XFS_BTNUM_BNO: {
+		xfs_alloc_rec_t	*r1;
+		xfs_alloc_rec_t	*r2;
+
+		r1 = ar1;
+		r2 = ar2;
+		ASSERT(INT_GET(r1->ar_startblock, ARCH_CONVERT) + INT_GET(r1->ar_blockcount, ARCH_CONVERT) <=
+		       INT_GET(r2->ar_startblock, ARCH_CONVERT));
+		break;
+	    }
+	case XFS_BTNUM_CNT: {
+		xfs_alloc_rec_t	*r1;
+		xfs_alloc_rec_t	*r2;
+		
+		r1 = ar1;
+		r2 = ar2;
+		ASSERT(INT_GET(r1->ar_blockcount, ARCH_CONVERT) < INT_GET(r2->ar_blockcount, ARCH_CONVERT) ||
+		       (INT_GET(r1->ar_blockcount, ARCH_CONVERT) == INT_GET(r2->ar_blockcount, ARCH_CONVERT) &&
+			INT_GET(r1->ar_startblock, ARCH_CONVERT) < INT_GET(r2->ar_startblock, ARCH_CONVERT)));
+		break;
+	    }
+	case XFS_BTNUM_BMAP: {
+		xfs_bmbt_rec_t	*r1;
+		xfs_bmbt_rec_t	*r2;
+
+		r1 = ar1;
+		r2 = ar2;
+		ASSERT(xfs_bmbt_get_startoff(r1) +
+		       xfs_bmbt_get_blockcount(r1) <=
+		       xfs_bmbt_get_startoff(r2));
+		break;
+	    }
+	case XFS_BTNUM_INO: {
+		xfs_inobt_rec_t	*r1;
+		xfs_inobt_rec_t	*r2;
+
+		r1 = ar1;
+		r2 = ar2;
+		ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <=
+		       INT_GET(r2->ir_startino, ARCH_CONVERT));
+		break;
+	    }
+	default:
+		ASSERT(0);
+	}
+}
+#endif	/* DEBUG */
+
+/*
+ * Checking routine: check that block header is ok.
+ */
+/* ARGSUSED */
+int					/* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sblock(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_btree_sblock_t	*block,	/* btree short form block pointer */
+	int			level,	/* level of the btree block */
+	xfs_buf_t		*bp)	/* buffer containing block */
+{
+	xfs_buf_t		*agbp;	/* buffer for ag. freespace struct */
+	xfs_agf_t		*agf;	/* ag. freespace structure */
+	xfs_agblock_t		agflen;	/* native ag. freespace length */
+	int			sblock_ok; /* block passes checks */
+
+	agbp = cur->bc_private.a.agbp;
+	agf = XFS_BUF_TO_AGF(agbp);
+	agflen = INT_GET(agf->agf_length, ARCH_CONVERT);
+	sblock_ok =
+		INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
+		INT_GET(block->bb_level, ARCH_CONVERT) == level &&
+		INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+			xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
+		(INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK ||
+		 INT_GET(block->bb_leftsib, ARCH_CONVERT) < agflen) &&
+		INT_GET(block->bb_leftsib, ARCH_CONVERT) != 0 &&
+		(INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK ||
+		 INT_GET(block->bb_rightsib, ARCH_CONVERT) < agflen) &&
+		INT_GET(block->bb_rightsib, ARCH_CONVERT) != 0;
+	if (XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
+			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
+			XFS_RANDOM_BTREE_CHECK_SBLOCK)) {
+#pragma mips_frequency_hint NEVER
+		if (bp)
+			xfs_buftrace("SBTREE ERROR", bp);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	return 0;
+}
+
+/*
+ * Checking routine: check that (short) pointer is ok.
+ */
+int					/* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sptr(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	xfs_agblock_t	ptr,		/* btree block disk address */
+	int		level)		/* btree block level */
+{
+	xfs_buf_t	*agbp;		/* buffer for ag. freespace struct */
+	xfs_agf_t	*agf;		/* ag. freespace structure */
+
+	agbp = cur->bc_private.a.agbp;
+	agf = XFS_BUF_TO_AGF(agbp);
+	XFS_WANT_CORRUPTED_RETURN(
+		level > 0 &&
+		ptr != NULLAGBLOCK && ptr != 0 &&
+		ptr < INT_GET(agf->agf_length, ARCH_CONVERT));
+	return 0;
+}
+
+/*
+ * Delete the btree cursor.
+ */
+void
+xfs_btree_del_cursor(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	int		error)		/* del because of error */
+{
+	int		i;		/* btree level */
+
+	/*
+	 * Clear the buffer pointers, and release the buffers.
+	 * If we're doing this in the face of an error, we
+	 * need to make sure to inspect all of the entries
+	 * in the bc_bufs array for buffers to be unlocked.
+	 * This is because some of the btree code works from
+	 * level n down to 0, and if we get an error along
+	 * the way we won't have initialized all the entries
+	 * down to 0.
+	 */
+	for (i = 0; i < cur->bc_nlevels; i++) {
+		if (cur->bc_bufs[i])
+			xfs_btree_setbuf(cur, i, NULL);
+		else if (!error)
+			break;
+	}
+	/*
+	 * Can't free a bmap cursor without having dealt with the 
+	 * allocated indirect blocks' accounting.
+	 */
+	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
+	       cur->bc_private.b.allocated == 0);
+	/*
+	 * Free the cursor.
+	 */
+	kmem_zone_free(xfs_btree_cur_zone, cur);
+}
+
+/*
+ * Duplicate the btree cursor.
+ * Allocate a new one, copy the record, re-get the buffers.
+ */
+int					/* error */
+xfs_btree_dup_cursor(
+	xfs_btree_cur_t	*cur,		/* input cursor */
+	xfs_btree_cur_t	**ncur)		/* output cursor */
+{
+	xfs_buf_t	*bp;		/* btree block's buffer pointer */
+	int 		error;		/* error return value */
+	int		i;		/* level number of btree block */
+	xfs_mount_t	*mp;		/* mount structure for filesystem */
+	xfs_btree_cur_t	*new;		/* new cursor value */
+	xfs_trans_t	*tp;		/* transaction pointer, can be NULL */
+
+	tp = cur->bc_tp;
+	mp = cur->bc_mp;
+	/*
+	 * Allocate a new cursor like the old one.
+	 */
+	new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
+		cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
+		cur->bc_private.b.whichfork);
+	/*
+	 * Copy the record currently in the cursor.
+	 */
+	new->bc_rec = cur->bc_rec;
+	/*
+	 * For each level current, re-get the buffer and copy the ptr value.
+	 */
+	for (i = 0; i < new->bc_nlevels; i++) {
+		new->bc_ptrs[i] = cur->bc_ptrs[i];
+		new->bc_ra[i] = cur->bc_ra[i];
+		if (bp = cur->bc_bufs[i]) {
+			if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+				XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp)) {
+#pragma mips_frequency_hint NEVER
+				xfs_btree_del_cursor(new, error);
+				*ncur = NULL;
+				return error;
+			}
+			new->bc_bufs[i] = bp;
+			ASSERT(bp);
+			ASSERT(!XFS_BUF_GETERROR(bp));
+		} else
+			new->bc_bufs[i] = NULL;
+	}
+	/*
+	 * For bmap btrees, copy the firstblock, flist, and flags values,
+	 * since init cursor doesn't get them.
+	 */
+	if (new->bc_btnum == XFS_BTNUM_BMAP) {
+		new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
+		new->bc_private.b.flist = cur->bc_private.b.flist;
+		new->bc_private.b.flags = cur->bc_private.b.flags;
+	}
+	*ncur = new;
+	return 0;
+}
+
+/*
+ * Change the cursor to point to the first record at the given level.
+ * Other levels are unaffected.
+ */
+int					/* success=1, failure=0 */
+xfs_btree_firstrec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level)	/* level to change */
+{
+	xfs_btree_block_t	*block;	/* generic btree block pointer */
+	xfs_buf_t		*bp;	/* buffer containing block */
+
+	/*
+	 * Get the block pointer for this level.
+	 */
+	block = xfs_btree_get_block(cur, level, &bp);
+	xfs_btree_check_block(cur, block, level, bp);
+	/*
+	 * It's empty, there is no such record.
+	 */
+	if (INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT) == 0)
+		return 0;
+	/*
+	 * Set the ptr value to 1, that's the first record/key.
+	 */
+	cur->bc_ptrs[level] = 1;
+	return 1;
+}
+
+/* 
+ * Retrieve the block pointer from the cursor at the given level.
+ * This may be a bmap btree root or from a buffer.
+ */
+xfs_btree_block_t *			/* generic btree block pointer */
+xfs_btree_get_block(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level in btree */
+	xfs_buf_t		**bpp)	/* buffer containing the block */
+{
+	xfs_btree_block_t	*block;	/* return value */
+	xfs_buf_t		*bp;	/* return buffer */
+	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	int			whichfork; /* data or attr fork */
+
+	if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
+		whichfork = cur->bc_private.b.whichfork;
+		ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
+		block = (xfs_btree_block_t *)ifp->if_broot;
+		bp = NULL;
+	} else {
+		bp = cur->bc_bufs[level];
+		block = XFS_BUF_TO_BLOCK(bp);
+	}
+	ASSERT(block != NULL);
+	*bpp = bp;
+	return block;
+}
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Long-form addressing.
+ */
+xfs_buf_t *				/* buffer for fsbno */
+xfs_btree_get_bufl(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_fsblock_t	fsbno,		/* file system block number */
+	uint		lock)		/* lock flags for get_buf */
+{
+	xfs_buf_t	*bp;		/* buffer pointer (return value) */
+	xfs_daddr_t		d;		/* real disk block address */
+
+	ASSERT(fsbno != NULLFSBLOCK);
+	d = XFS_FSB_TO_DADDR(mp, fsbno);
+	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+	ASSERT(bp);
+	ASSERT(!XFS_BUF_GETERROR(bp));
+	return bp;
+}
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Short-form addressing.
+ */
+xfs_buf_t *				/* buffer for agno/agbno */
+xfs_btree_get_bufs(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_agnumber_t	agno,		/* allocation group number */
+	xfs_agblock_t	agbno,		/* allocation group block number */
+	uint		lock)		/* lock flags for get_buf */
+{
+	xfs_buf_t	*bp;		/* buffer pointer (return value) */
+	xfs_daddr_t		d;		/* real disk block address */
+
+	ASSERT(agno != NULLAGNUMBER);
+	ASSERT(agbno != NULLAGBLOCK);
+	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+	ASSERT(bp);
+	ASSERT(!XFS_BUF_GETERROR(bp));
+	return bp;
+}
+
+/*
+ * Allocate a new btree cursor.
+ * The cursor is either for allocation (A) or bmap (B) or inodes (I).
+ */
+xfs_btree_cur_t *			/* new btree cursor */
+xfs_btree_init_cursor(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_buf_t	*agbp,		/* (A only) buffer for agf structure */
+					/* (I only) buffer for agi structure */
+	xfs_agnumber_t	agno,		/* (AI only) allocation group number */
+	xfs_btnum_t	btnum,		/* btree identifier */
+	xfs_inode_t	*ip,		/* (B only) inode owning the btree */
+	int		whichfork)	/* (B only) data or attr fork */
+{
+	xfs_agf_t	*agf;		/* (A) allocation group freespace */
+	xfs_agi_t	*agi;		/* (I) allocation group inodespace */
+	xfs_btree_cur_t	*cur;		/* return value */
+	xfs_ifork_t	*ifp;		/* (I) inode fork pointer */
+	int		nlevels;	/* number of levels in the btree */
+
+	ASSERT(xfs_btree_cur_zone != NULL);
+	/*
+	 * Allocate a new cursor.
+	 */
+	cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+	/* 
+	 * Deduce the number of btree levels from the arguments.
+	 */
+	switch (btnum) {
+	case XFS_BTNUM_BNO:
+	case XFS_BTNUM_CNT:
+		agf = XFS_BUF_TO_AGF(agbp);
+		nlevels = INT_GET(agf->agf_levels[btnum], ARCH_CONVERT);
+		break;
+	case XFS_BTNUM_BMAP:
+		ifp = XFS_IFORK_PTR(ip, whichfork);
+		nlevels = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
+		break;
+	case XFS_BTNUM_INO:
+		agi = XFS_BUF_TO_AGI(agbp);
+		nlevels = INT_GET(agi->agi_level, ARCH_CONVERT);
+		break;
+	default:
+		ASSERT(0);
+	}
+	/*
+	 * Fill in the common fields.
+	 */
+	cur->bc_tp = tp;
+	cur->bc_mp = mp;
+	cur->bc_nlevels = nlevels;
+	cur->bc_btnum = btnum;
+	cur->bc_blocklog = mp->m_sb.sb_blocklog;
+	/*
+	 * Fill in private fields.
+	 */
+	switch (btnum) {
+	case XFS_BTNUM_BNO:
+	case XFS_BTNUM_CNT:
+		/*
+		 * Allocation btree fields.
+		 */
+		cur->bc_private.a.agbp = agbp;
+		cur->bc_private.a.agno = agno;
+		break;
+	case XFS_BTNUM_BMAP:
+		/*
+		 * Bmap btree fields.
+		 */
+		cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
+		cur->bc_private.b.ip = ip;
+		cur->bc_private.b.firstblock = NULLFSBLOCK;
+		cur->bc_private.b.flist = NULL;
+		cur->bc_private.b.allocated = 0;
+		cur->bc_private.b.flags = 0;
+		cur->bc_private.b.whichfork = whichfork;
+		break;
+	case XFS_BTNUM_INO:
+		/*
+		 * Inode allocation btree fields.
+		 */
+		cur->bc_private.i.agbp = agbp;
+		cur->bc_private.i.agno = agno;
+		break;
+	default:
+		ASSERT(0);
+	}
+	return cur;
+}
+
+/*
+ * Check for the cursor referring to the last block at the given level.
+ */
+int					/* 1=is last block, 0=not last block */
+xfs_btree_islastblock(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level)	/* level to check */
+{
+	xfs_btree_block_t	*block;	/* generic btree block pointer */
+	xfs_buf_t		*bp;	/* buffer containing block */
+
+	block = xfs_btree_get_block(cur, level, &bp);
+	xfs_btree_check_block(cur, block, level, bp);
+	if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
+		return INT_GET(block->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO;
+	else
+		return INT_GET(block->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK;
+}
+
+/*
+ * Change the cursor to point to the last record in the current block
+ * at the given level.  Other levels are unaffected.
+ */
+int					/* success=1, failure=0 */
+xfs_btree_lastrec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level)	/* level to change */
+{
+	xfs_btree_block_t	*block;	/* generic btree block pointer */
+	xfs_buf_t		*bp;	/* buffer containing block */
+
+	/*
+	 * Get the block pointer for this level.
+	 */
+	block = xfs_btree_get_block(cur, level, &bp);
+	xfs_btree_check_block(cur, block, level, bp);
+	/*
+	 * It's empty, there is no such record.
+	 */
+	if (INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT) == 0)
+		return 0;
+	/*
+	 * Set the ptr value to numrecs, that's the last record/key.
+	 */
+	cur->bc_ptrs[level] = INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT);
+	return 1;
+}
+
+/*
+ * Compute first and last byte offsets for the fields given.
+ * Interprets the offsets table, which contains struct field offsets.
+ */
+void
+xfs_btree_offsets(
+	__int64_t	fields,		/* bitmask of fields */
+	const short	*offsets,	/* table of field offsets */
+	int		nbits,		/* number of bits to inspect */
+	int		*first,		/* output: first byte offset */
+	int		*last)		/* output: last byte offset */
+{
+	int		i;		/* current bit number */
+	__int64_t	imask;		/* mask for current bit number */
+
+	ASSERT(fields != 0);
+	/*
+	 * Find the lowest bit, so the first byte offset.
+	 */
+	for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
+		if (imask & fields) {
+			*first = offsets[i];
+			break;
+		}
+	}
+	/*
+	 * Find the highest bit, so the last byte offset.
+	 */
+	for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
+		if (imask & fields) {
+			*last = offsets[i + 1] - 1;
+			break;
+		}
+	}
+}
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Long-form addressing.
+ */
+int					/* error */
+xfs_btree_read_bufl(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_fsblock_t	fsbno,		/* file system block number */
+	uint		lock,		/* lock flags for read_buf */
+	xfs_buf_t	**bpp,		/* buffer for fsbno */
+	int		refval)		/* ref count value for buffer */
+{
+	xfs_buf_t	*bp;		/* return value */
+	xfs_daddr_t		d;		/* real disk block address */
+	int		error;
+
+	ASSERT(fsbno != NULLFSBLOCK);
+	d = XFS_FSB_TO_DADDR(mp, fsbno);
+	if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+			mp->m_bsize, lock, &bp)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+	if (bp != NULL) {
+		XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
+	}
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Short-form addressing.
+ */
+int					/* error */
+xfs_btree_read_bufs(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_agnumber_t	agno,		/* allocation group number */
+	xfs_agblock_t	agbno,		/* allocation group block number */
+	uint		lock,		/* lock flags for read_buf */
+	xfs_buf_t	**bpp,		/* buffer for agno/agbno */
+	int		refval)		/* ref count value for buffer */
+{
+	xfs_buf_t	*bp;		/* return value */
+	xfs_daddr_t		d;		/* real disk block address */
+	int		error;
+
+	ASSERT(agno != NULLAGNUMBER);
+	ASSERT(agbno != NULLAGBLOCK);
+	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+	if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+					mp->m_bsize, lock, &bp)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+	if (bp != NULL)
+		switch (refval) {
+		case XFS_ALLOC_BTREE_REF:
+			XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
+			break;
+		case XFS_INO_BTREE_REF:
+			XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
+			break;
+		}
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Read-ahead btree blocks, at the given level.
+ * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
+ */
+int
+xfs_btree_readahead(
+	xfs_btree_cur_t		*cur,		/* btree cursor */
+	int			lev,		/* level in btree */
+	int			lr)		/* left/right bits */
+{
+	xfs_alloc_block_t	*a;
+	xfs_bmbt_block_t	*b;
+	xfs_inobt_block_t	*i;
+	int			rval = 0;
+
+	ASSERT(cur->bc_bufs[lev] != NULL);
+	if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
+		return 0;
+	cur->bc_ra[lev] |= lr;
+	switch (cur->bc_btnum) {
+	case XFS_BTNUM_BNO:
+	case XFS_BTNUM_CNT:
+		a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
+		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(a->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
+			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+				INT_GET(a->bb_leftsib, ARCH_CONVERT), 1);
+			rval++;
+		}
+		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(a->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+				INT_GET(a->bb_rightsib, ARCH_CONVERT), 1);
+			rval++;
+		}
+		break;
+	case XFS_BTNUM_BMAP:
+		b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
+		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(b->bb_leftsib, ARCH_CONVERT) != NULLDFSBNO) {
+			xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_leftsib, ARCH_CONVERT), 1);
+			rval++;
+		}
+		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(b->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+			xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_rightsib, ARCH_CONVERT), 1);
+			rval++;
+		}
+		break;
+	case XFS_BTNUM_INO:
+		i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
+		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(i->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
+			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
+				INT_GET(i->bb_leftsib, ARCH_CONVERT), 1);
+			rval++;
+		}
+		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(i->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
+				INT_GET(i->bb_rightsib, ARCH_CONVERT), 1);
+			rval++;
+		}
+		break;
+	default:
+		ASSERT(0);
+	}
+	return rval;
+}
+
+/*
+ * Set the buffer for level "lev" in the cursor to bp, releasing
+ * any previous buffer.
+ */
+void
+xfs_btree_setbuf(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			lev,	/* level in btree */
+	xfs_buf_t		*bp)	/* new buffer to set */
+{
+	xfs_btree_block_t	*b;	/* btree block */
+	xfs_buf_t		*obp;	/* old buffer pointer */
+
+	obp = cur->bc_bufs[lev];
+	if (obp)
+		xfs_trans_brelse(cur->bc_tp, obp);
+	cur->bc_bufs[lev] = bp;
+	cur->bc_ra[lev] = 0;
+	if (!bp)
+		return;
+	b = XFS_BUF_TO_BLOCK(bp);
+	if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
+		if (INT_GET(b->bb_u.l.bb_leftsib, ARCH_CONVERT) == NULLDFSBNO)
+			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+		if (INT_GET(b->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO)
+			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+	} else {
+		if (INT_GET(b->bb_u.s.bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK)
+			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+		if (INT_GET(b->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK)
+			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+	}
+}
diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c
new file mode 100644
index 000000000..37ad6269d
--- /dev/null
+++ b/libxfs/xfs_da_btree.c
@@ -0,0 +1,2524 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_da_btree.c
+ *
+ * Routines to implement directories as Btrees of hashed names.
+ */
+
+
+/*========================================================================
+ * Routines used for growing the Btree.
+ *========================================================================*/
+
+/*
+ * Create the initial contents of an intermediate node.
+ */
+int
+xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
+				 xfs_dabuf_t **bpp, int whichfork)
+{
+	xfs_da_intnode_t *node;
+	xfs_dabuf_t *bp;
+	int error;
+	xfs_trans_t *tp;
+
+	tp = args->trans;
+	error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
+	if (error)
+		return(error);
+	ASSERT(bp != NULL);
+	node = bp->data;
+	INT_ZERO(node->hdr.info.forw, ARCH_CONVERT);
+        INT_ZERO(node->hdr.info.back, ARCH_CONVERT);
+	INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
+	INT_ZERO(node->hdr.info.pad, ARCH_CONVERT);
+	INT_ZERO(node->hdr.count, ARCH_CONVERT);
+	INT_SET(node->hdr.level, ARCH_CONVERT, level);
+
+	xfs_da_log_buf(tp, bp,
+		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
+
+	*bpp = bp;
+	return(0);
+}
+
+/*
+ * Split a leaf node, rebalance, then possibly split
+ * intermediate nodes, rebalance, etc.
+ */
+int							/* error */
+xfs_da_split(xfs_da_state_t *state)
+{
+	xfs_da_state_blk_t *oldblk, *newblk, *addblk;
+	xfs_da_intnode_t *node;
+	xfs_dabuf_t *bp;
+	int max, action, error, i;
+
+	/*
+	 * Walk back up the tree splitting/inserting/adjusting as necessary.
+	 * If we need to insert and there isn't room, split the node, then
+	 * decide which fragment to insert the new block from below into.
+	 * Note that we may split the root this way, but we need more fixup.
+	 */
+	max = state->path.active - 1;
+	ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
+	ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
+	       state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
+
+	addblk = &state->path.blk[max];		/* initial dummy value */
+	for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
+		oldblk = &state->path.blk[i];
+		newblk = &state->altpath.blk[i];
+
+		/*
+		 * If a leaf node then
+		 *     Allocate a new leaf node, then rebalance across them.
+		 * else if an intermediate node then
+		 *     We split on the last layer, must we split the node?
+		 */
+		switch (oldblk->magic) {
+		case XFS_ATTR_LEAF_MAGIC:
+#ifndef __KERNEL__
+			return(ENOTTY);
+#else
+			error = xfs_attr_leaf_split(state, oldblk, newblk);
+			if ((error != 0) && (error != ENOSPC)) {
+				return(error);	/* GROT: attr is inconsistent */
+			}
+			if (!error) {
+				addblk = newblk;
+				break;
+			}
+			/*
+			 * Entry wouldn't fit, split the leaf again.
+			 */
+			state->extravalid = 1;
+			if (state->inleaf) {
+				state->extraafter = 0;	/* before newblk */
+				error = xfs_attr_leaf_split(state, oldblk,
+							    &state->extrablk);
+			} else {
+				state->extraafter = 1;	/* after newblk */
+				error = xfs_attr_leaf_split(state, newblk,
+							    &state->extrablk);
+			}
+			if (error)
+				return(error);	/* GROT: attr inconsistent */
+			addblk = newblk;
+			break;
+#endif
+		case XFS_DIR_LEAF_MAGIC:
+			ASSERT(XFS_DIR_IS_V1(state->mp));
+			error = xfs_dir_leaf_split(state, oldblk, newblk);
+			if ((error != 0) && (error != ENOSPC)) {
+				return(error);	/* GROT: dir is inconsistent */
+			}
+			if (!error) {
+				addblk = newblk;
+				break;
+			}
+			/*
+			 * Entry wouldn't fit, split the leaf again.
+			 */
+			state->extravalid = 1;
+			if (state->inleaf) {
+				state->extraafter = 0;	/* before newblk */
+				error = xfs_dir_leaf_split(state, oldblk,
+							   &state->extrablk);
+				if (error)
+					return(error);	/* GROT: dir incon. */
+				addblk = newblk;
+			} else {
+				state->extraafter = 1;	/* after newblk */
+				error = xfs_dir_leaf_split(state, newblk,
+							   &state->extrablk);
+				if (error)
+					return(error);	/* GROT: dir incon. */
+				addblk = newblk;
+			}
+			break;
+		case XFS_DIR2_LEAFN_MAGIC:
+			ASSERT(XFS_DIR_IS_V2(state->mp));
+			error = xfs_dir2_leafn_split(state, oldblk, newblk);
+			if (error)
+				return error;
+			addblk = newblk;
+			break;
+		case XFS_DA_NODE_MAGIC:
+			error = xfs_da_node_split(state, oldblk, newblk, addblk,
+							 max - i, &action);
+			xfs_da_buf_done(addblk->bp);
+			addblk->bp = NULL;
+			if (error)
+				return(error);	/* GROT: dir is inconsistent */
+			/*
+			 * Record the newly split block for the next time thru?
+			 */
+			if (action)
+				addblk = newblk;
+			else
+				addblk = NULL;
+			break;
+		}
+
+		/*
+		 * Update the btree to show the new hashval for this child.
+		 */
+		xfs_da_fixhashpath(state, &state->path);
+		/*
+		 * If we won't need this block again, it's getting dropped
+		 * from the active path by the loop control, so we need
+		 * to mark it done now.
+		 */
+		if (i > 0 || !addblk)
+			xfs_da_buf_done(oldblk->bp);
+	}
+	if (!addblk)
+		return(0);
+
+	/*
+	 * Split the root node.
+	 */
+	ASSERT(state->path.active == 0);
+	oldblk = &state->path.blk[0];
+	error = xfs_da_root_split(state, oldblk, addblk);
+	if (error) {
+		xfs_da_buf_done(oldblk->bp);
+		xfs_da_buf_done(addblk->bp);
+		addblk->bp = NULL;
+		return(error);	/* GROT: dir is inconsistent */
+	}
+
+	/*
+	 * Update pointers to the node which used to be block 0 and
+	 * just got bumped because of the addition of a new root node.
+	 * There might be three blocks involved if a double split occurred,
+	 * and the original block 0 could be at any position in the list.
+	 */
+
+	node = oldblk->bp->data;
+	if (!INT_ISZERO(node->hdr.info.forw, ARCH_CONVERT)) {
+		if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
+			bp = addblk->bp;
+		} else {
+			ASSERT(state->extravalid);
+			bp = state->extrablk.bp;
+		}
+		node = bp->data;
+		INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
+		xfs_da_log_buf(state->args->trans, bp,
+		    XFS_DA_LOGRANGE(node, &node->hdr.info,
+		    sizeof(node->hdr.info)));
+	}
+	node = oldblk->bp->data;
+	if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
+		if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
+			bp = addblk->bp;
+		} else {
+			ASSERT(state->extravalid);
+			bp = state->extrablk.bp;
+		}
+		node = bp->data;
+		INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
+		xfs_da_log_buf(state->args->trans, bp,
+		    XFS_DA_LOGRANGE(node, &node->hdr.info,
+		    sizeof(node->hdr.info)));
+	}
+	xfs_da_buf_done(oldblk->bp);
+	xfs_da_buf_done(addblk->bp);
+	addblk->bp = NULL;
+	return(0);
+}
+
+/*
+ * Split the root.  We have to create a new root and point to the two
+ * parts (the split old root) that we just created.  Copy block zero to
+ * the EOF, extending the inode in process.
+ */
+STATIC int						/* error */
+xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
+				 xfs_da_state_blk_t *blk2)
+{
+	xfs_da_intnode_t *node, *oldroot;
+	xfs_da_args_t *args;
+	xfs_dablk_t blkno;
+	xfs_dabuf_t *bp;
+	int error, size;
+	xfs_inode_t *dp;
+	xfs_trans_t *tp;
+	xfs_mount_t *mp;
+	xfs_dir2_leaf_t *leaf;
+
+	/*
+	 * Copy the existing (incorrect) block from the root node position
+	 * to a free space somewhere.
+	 */
+	args = state->args;
+	ASSERT(args != NULL);
+	error = xfs_da_grow_inode(args, &blkno);
+	if (error)
+		return(error);
+	dp = args->dp;
+	tp = args->trans;
+	mp = state->mp;
+	error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
+	if (error)
+		return(error);
+	ASSERT(bp != NULL);
+	node = bp->data;
+	oldroot = blk1->bp->data;
+	if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+		size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
+			     (char *)oldroot);
+	} else {
+		ASSERT(XFS_DIR_IS_V2(mp));
+		ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+		leaf = (xfs_dir2_leaf_t *)oldroot;
+		size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
+			     (char *)leaf);
+	}
+	bcopy(oldroot, node, size);
+	xfs_da_log_buf(tp, bp, 0, size - 1);
+	xfs_da_buf_done(blk1->bp);
+	blk1->bp = bp;
+	blk1->blkno = blkno;
+
+	/*
+	 * Set up the new root node.
+	 */
+	error = xfs_da_node_create(args,
+		args->whichfork == XFS_DATA_FORK &&
+		XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
+		INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
+	if (error)
+		return(error);
+	node = bp->data;
+	INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
+	INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
+	INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
+	INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
+	INT_SET(node->hdr.count, ARCH_CONVERT, 2);
+	if (XFS_DIR_IS_V2(mp)) {
+		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
+		       blk1->blkno < mp->m_dirfreeblk);
+		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
+		       blk2->blkno < mp->m_dirfreeblk);
+	}
+	/* Header is already logged by xfs_da_node_create */
+	xfs_da_log_buf(tp, bp,
+		XFS_DA_LOGRANGE(node, node->btree,
+			sizeof(xfs_da_node_entry_t) * 2));
+	xfs_da_buf_done(bp);
+
+	return(0);
+}
+
+/*
+ * Split the node, rebalance, then add the new entry.
+ */
+STATIC int						/* error */
+xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
+				 xfs_da_state_blk_t *newblk,
+				 xfs_da_state_blk_t *addblk,
+				 int treelevel, int *result)
+{
+	xfs_da_intnode_t *node;
+	xfs_dablk_t blkno;
+	int newcount, error;
+	int useextra;
+
+	node = oldblk->bp->data;
+	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+
+	/*
+	 * With V2 the extra block is data or freespace.
+	 */
+	useextra = state->extravalid && XFS_DIR_IS_V1(state->mp);
+	newcount = 1 + useextra;
+	/*
+	 * Do we have to split the node?
+	 */
+	if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > XFS_DA_NODE_ENTRIES(state->mp)) {
+		/*
+		 * Allocate a new node, add to the doubly linked chain of
+		 * nodes, then move some of our excess entries into it.
+		 */
+		error = xfs_da_grow_inode(state->args, &blkno);
+		if (error)
+			return(error);	/* GROT: dir is inconsistent */
+		
+		error = xfs_da_node_create(state->args, blkno, treelevel,
+					   &newblk->bp, state->args->whichfork);
+		if (error)
+			return(error);	/* GROT: dir is inconsistent */
+		newblk->blkno = blkno;
+		newblk->magic = XFS_DA_NODE_MAGIC;
+		xfs_da_node_rebalance(state, oldblk, newblk);
+		error = xfs_da_blk_link(state, oldblk, newblk);
+		if (error)
+			return(error);
+		*result = 1;
+	} else {
+		*result = 0;
+	}
+
+	/*
+	 * Insert the new entry(s) into the correct block
+	 * (updating last hashval in the process).
+	 *
+	 * xfs_da_node_add() inserts BEFORE the given index,
+	 * and as a result of using node_lookup_int() we always
+	 * point to a valid entry (not after one), but a split
+	 * operation always results in a new block whose hashvals
+	 * FOLLOW the current block.
+	 *
+	 * If we had double-split op below us, then add the extra block too.
+	 */
+	node = oldblk->bp->data;
+	if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
+		oldblk->index++;
+		xfs_da_node_add(state, oldblk, addblk);
+		if (useextra) {
+			if (state->extraafter)
+				oldblk->index++;
+			xfs_da_node_add(state, oldblk, &state->extrablk);
+			state->extravalid = 0;
+		}
+	} else {
+		newblk->index++;
+		xfs_da_node_add(state, newblk, addblk);
+		if (useextra) {
+			if (state->extraafter)
+				newblk->index++;
+			xfs_da_node_add(state, newblk, &state->extrablk);
+			state->extravalid = 0;
+		}
+	}
+
+	return(0);
+}
+
+/*
+ * Balance the btree elements between two intermediate nodes,
+ * usually one full and one empty.
+ *
+ * NOTE: if blk2 is empty, then it will get the upper half of blk1.
+ */
+STATIC void
+xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
+				     xfs_da_state_blk_t *blk2)
+{
+	xfs_da_intnode_t *node1, *node2, *tmpnode;
+	xfs_da_node_entry_t *btree_s, *btree_d;
+	int count, tmp;
+	xfs_trans_t *tp;
+
+	node1 = blk1->bp->data;
+	node2 = blk2->bp->data;
+	/*
+	 * Figure out how many entries need to move, and in which direction.
+	 * Swap the nodes around if that makes it simpler.
+	 */
+	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
+	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
+	     (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
+	      INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+		tmpnode = node1;
+		node1 = node2;
+		node2 = tmpnode;
+	}
+	ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
+	if (count == 0)
+		return;
+	tp = state->args->trans;
+	/*
+	 * Two cases: high-to-low and low-to-high.
+	 */
+	if (count > 0) {
+		/*
+		 * Move elements in node2 up to make a hole.
+		 */
+		if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
+			tmp *= (uint)sizeof(xfs_da_node_entry_t);
+			btree_s = &node2->btree[0];
+			btree_d = &node2->btree[count];
+			ovbcopy(btree_s, btree_d, tmp);
+		}
+
+		/*
+		 * Move the req'd B-tree elements from high in node1 to
+		 * low in node2.
+		 */
+		INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
+		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
+		btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
+		btree_d = &node2->btree[0];
+		bcopy(btree_s, btree_d, tmp);
+		INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
+
+	} else {
+		/*
+		 * Move the req'd B-tree elements from low in node2 to
+		 * high in node1.
+		 */
+		count = -count;
+		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
+		btree_s = &node2->btree[0];
+		btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
+		bcopy(btree_s, btree_d, tmp);
+		INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
+		xfs_da_log_buf(tp, blk1->bp,
+			XFS_DA_LOGRANGE(node1, btree_d, tmp));
+
+		/*
+		 * Move elements in node2 down to fill the hole.
+		 */
+		tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
+		tmp *= (uint)sizeof(xfs_da_node_entry_t);
+		btree_s = &node2->btree[count];
+		btree_d = &node2->btree[0];
+		ovbcopy(btree_s, btree_d, tmp);
+		INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
+	}
+
+	/*
+	 * Log header of node 1 and all current bits of node 2.
+	 */
+	xfs_da_log_buf(tp, blk1->bp,
+		XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
+	xfs_da_log_buf(tp, blk2->bp,
+		XFS_DA_LOGRANGE(node2, &node2->hdr,
+			sizeof(node2->hdr) +
+			sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
+
+	/*
+	 * Record the last hashval from each block for upward propagation.
+	 * (note: don't use the swapped node pointers)
+	 */
+	node1 = blk1->bp->data;
+	node2 = blk2->bp->data;
+	blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+
+	/*
+	 * Adjust the expected index for insertion.
+	 */
+	if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
+		blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
+		blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;	/* make it invalid */
+	}
+}
+
+/*
+ * Add a new entry to an intermediate node.
+ */
+STATIC void
+xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
+			       xfs_da_state_blk_t *newblk)
+{
+	xfs_da_intnode_t *node;
+	xfs_da_node_entry_t *btree;
+	int tmp;
+	xfs_mount_t *mp;
+
+	node = oldblk->bp->data;
+	mp = state->mp;
+	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
+	ASSERT(newblk->blkno != 0);
+	if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+		ASSERT(newblk->blkno >= mp->m_dirleafblk &&
+		       newblk->blkno < mp->m_dirfreeblk);
+
+	/*
+	 * We may need to make some room before we insert the new node.
+	 */
+	tmp = 0;
+	btree = &node->btree[ oldblk->index ];
+	if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
+		tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
+		ovbcopy(btree, btree + 1, tmp);
+	}
+	INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
+	INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
+	xfs_da_log_buf(state->args->trans, oldblk->bp,
+		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
+	INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
+	xfs_da_log_buf(state->args->trans, oldblk->bp,
+		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
+
+	/*
+	 * Copy the last hash value from the oldblk to propagate upwards.
+	 */
+	oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+}
+
+/*========================================================================
+ * Routines used for shrinking the Btree.
+ *========================================================================*/
+
+/*
+ * Deallocate an empty leaf node, remove it from its parent,
+ * possibly deallocating that block, etc...
+ */
+int
+xfs_da_join(xfs_da_state_t *state)
+{
+	xfs_da_state_blk_t *drop_blk, *save_blk;
+	int action, error;
+
+	action = 0;
+	drop_blk = &state->path.blk[ state->path.active-1 ];
+	save_blk = &state->altpath.blk[ state->path.active-1 ];
+	ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
+	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
+	       drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
+
+	/*
+	 * Walk back up the tree joining/deallocating as necessary.
+	 * When we stop dropping blocks, break out.
+	 */
+	for (  ; state->path.active >= 2; drop_blk--, save_blk--,
+		 state->path.active--) {
+		/*
+		 * See if we can combine the block with a neighbor.
+		 *   (action == 0) => no options, just leave
+		 *   (action == 1) => coalesce, then unlink
+		 *   (action == 2) => block empty, unlink it
+		 */
+		switch (drop_blk->magic) {
+		case XFS_ATTR_LEAF_MAGIC:
+#ifndef __KERNEL__
+			error = ENOTTY;
+#else
+			error = xfs_attr_leaf_toosmall(state, &action);
+#endif
+			if (error)
+				return(error);
+			if (action == 0)
+				return(0);
+#ifdef __KERNEL__
+			xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
+#endif
+			break;
+		case XFS_DIR_LEAF_MAGIC:
+			ASSERT(XFS_DIR_IS_V1(state->mp));
+			error = xfs_dir_leaf_toosmall(state, &action);
+			if (error)
+				return(error);
+			if (action == 0)
+				return(0);
+			xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
+			break;
+		case XFS_DIR2_LEAFN_MAGIC:
+			ASSERT(XFS_DIR_IS_V2(state->mp));
+			error = xfs_dir2_leafn_toosmall(state, &action);
+			if (error)
+				return error;
+			if (action == 0)
+				return 0;
+			xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
+			break;
+		case XFS_DA_NODE_MAGIC:
+			/*
+			 * Remove the offending node, fixup hashvals,
+			 * check for a toosmall neighbor.
+			 */
+			xfs_da_node_remove(state, drop_blk);
+			xfs_da_fixhashpath(state, &state->path);
+			error = xfs_da_node_toosmall(state, &action);
+			if (error)
+				return(error);
+			if (action == 0)
+				return 0;
+			xfs_da_node_unbalance(state, drop_blk, save_blk);
+			break;
+		}
+		xfs_da_fixhashpath(state, &state->altpath);
+		error = xfs_da_blk_unlink(state, drop_blk, save_blk);
+		xfs_da_state_kill_altpath(state);
+		if (error)
+			return(error);
+		error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
+							 drop_blk->bp);
+		drop_blk->bp = NULL;
+		if (error)
+			return(error);
+	}
+	/*
+	 * We joined all the way to the top.  If it turns out that
+	 * we only have one entry in the root, make the child block
+	 * the new root.
+	 */
+	xfs_da_node_remove(state, drop_blk);
+	xfs_da_fixhashpath(state, &state->path);
+	error = xfs_da_root_join(state, &state->path.blk[0]);
+	return(error);
+}
+
+/*
+ * We have only one entry in the root.  Copy the only remaining child of
+ * the old root to block 0 as the new root node.
+ */
+STATIC int
+xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
+{
+	xfs_da_intnode_t *oldroot;
+	/* REFERENCED */
+	xfs_da_blkinfo_t *blkinfo;
+	xfs_da_args_t *args;
+	xfs_dablk_t child;
+	xfs_dabuf_t *bp;
+	int error;
+
+	args = state->args;
+	ASSERT(args != NULL);
+	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
+	oldroot = root_blk->bp->data;
+	ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(INT_ISZERO(oldroot->hdr.info.forw, ARCH_CONVERT));
+	ASSERT(INT_ISZERO(oldroot->hdr.info.back, ARCH_CONVERT));
+
+	/*
+	 * If the root has more than one child, then don't do anything.
+	 */
+	if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
+		return(0);
+
+	/*
+	 * Read in the (only) child block, then copy those bytes into
+	 * the root block's buffer and free the original child block.
+	 */
+	child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
+	ASSERT(child != 0);
+	error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
+					     args->whichfork);
+	if (error)
+		return(error);
+	ASSERT(bp != NULL);
+	blkinfo = bp->data;
+	if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
+		ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+	} else {
+		ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	}
+	ASSERT(INT_GET(blkinfo->forw, ARCH_CONVERT) == 0);
+	ASSERT(INT_GET(blkinfo->back, ARCH_CONVERT) == 0);
+	bcopy(bp->data, root_blk->bp->data, state->blocksize);
+	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
+	error = xfs_da_shrink_inode(args, child, bp);
+	return(error);
+}
+
+/*
+ * Check a node block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ */
+STATIC int
+xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
+{
+	xfs_da_intnode_t *node;
+	xfs_da_state_blk_t *blk;
+	xfs_da_blkinfo_t *info;
+	int count, forward, error, retval, i;
+	xfs_dablk_t blkno;
+	xfs_dabuf_t *bp;
+
+	/*
+	 * Check for the degenerate case of the block being over 50% full.
+	 * If so, it's not worth even looking to see if we might be able
+	 * to coalesce with a sibling.
+	 */
+	blk = &state->path.blk[ state->path.active-1 ];
+	info = blk->bp->data;
+	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	node = (xfs_da_intnode_t *)info;
+	count = INT_GET(node->hdr.count, ARCH_CONVERT);
+	if (count > (XFS_DA_NODE_ENTRIES(state->mp) >> 1)) {
+		*action = 0;	/* blk over 50%, dont try to join */
+		return(0);	/* blk over 50%, dont try to join */
+	}
+
+	/*
+	 * Check for the degenerate case of the block being empty.
+	 * If the block is empty, we'll simply delete it, no need to
+	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * to merge with the forward block unless it is NULL.
+	 */
+	if (count == 0) {
+		/*
+		 * Make altpath point to the block we want to keep and
+		 * path point to the block we want to drop (this one).
+		 */
+		forward = (!INT_ISZERO(info->forw, ARCH_CONVERT));
+		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		error = xfs_da_path_shift(state, &state->altpath, forward,
+						 0, &retval);
+		if (error)
+			return(error);
+		if (retval) {
+			*action = 0;
+		} else {
+			*action = 2;
+		}
+		return(0);
+	}
+
+	/*
+	 * Examine each sibling block to see if we can coalesce with
+	 * at least 25% free space to spare.  We need to figure out
+	 * whether to merge with the forward or the backward block.
+	 * We prefer coalescing with the lower numbered sibling so as
+	 * to shrink a directory over time.
+	 */
+	/* start with smaller blk num */
+	forward = (INT_GET(info->forw, ARCH_CONVERT)
+				< INT_GET(info->back, ARCH_CONVERT));
+	for (i = 0; i < 2; forward = !forward, i++) {
+		if (forward)
+			blkno = INT_GET(info->forw, ARCH_CONVERT);
+		else
+			blkno = INT_GET(info->back, ARCH_CONVERT);
+		if (blkno == 0)
+			continue;
+		error = xfs_da_read_buf(state->args->trans, state->args->dp,
+					blkno, -1, &bp, state->args->whichfork);
+		if (error)
+			return(error);
+		ASSERT(bp != NULL);
+
+		node = (xfs_da_intnode_t *)info;
+		count  = XFS_DA_NODE_ENTRIES(state->mp);
+		count -= XFS_DA_NODE_ENTRIES(state->mp) >> 2;
+		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
+		node = bp->data;
+		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		count -= INT_GET(node->hdr.count, ARCH_CONVERT);
+		xfs_da_brelse(state->args->trans, bp);
+		if (count >= 0)
+			break;	/* fits with at least 25% to spare */
+	}
+	if (i >= 2) {
+		*action = 0;
+		return(0);
+	}
+
+	/*
+	 * Make altpath point to the block we want to keep (the lower
+	 * numbered block) and path point to the block we want to drop.
+	 */
+	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	if (blkno < blk->blkno) {
+		error = xfs_da_path_shift(state, &state->altpath, forward,
+						 0, &retval);
+		if (error) {
+			return(error);
+		}
+		if (retval) {
+			*action = 0;
+			return(0);
+		}
+	} else {
+		error = xfs_da_path_shift(state, &state->path, forward,
+						 0, &retval);
+		if (error) {
+			return(error);
+		}
+		if (retval) {
+			*action = 0;
+			return(0);
+		}
+	}
+	*action = 1;
+	return(0);
+}
+
+
+/*
+ * Walk back up the tree adjusting hash values as necessary,
+ * when we stop making changes, return.
+ */
+void
+xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
+{
+	xfs_da_state_blk_t *blk;
+	xfs_da_intnode_t *node;
+	xfs_da_node_entry_t *btree;
+	xfs_dahash_t lasthash;
+	int level, count;
+
+	level = path->active-1;
+	blk = &path->blk[ level ];
+	switch (blk->magic) {
+#ifdef __KERNEL__
+	case XFS_ATTR_LEAF_MAGIC:
+		lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
+		if (count == 0)
+			return;
+		break;
+#endif
+	case XFS_DIR_LEAF_MAGIC:
+		ASSERT(XFS_DIR_IS_V1(state->mp));
+		lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
+		if (count == 0)
+			return;
+		break;
+	case XFS_DIR2_LEAFN_MAGIC:
+		ASSERT(XFS_DIR_IS_V2(state->mp));
+		lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
+		if (count == 0)
+			return;
+		break;
+	case XFS_DA_NODE_MAGIC:
+		lasthash = xfs_da_node_lasthash(blk->bp, &count);
+		if (count == 0)
+			return;
+		break;
+	}
+	for (blk--, level--; level >= 0; blk--, level--) {
+		node = blk->bp->data;
+		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		btree = &node->btree[ blk->index ];
+		if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
+			break;
+		blk->hashval = lasthash;
+                INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
+		xfs_da_log_buf(state->args->trans, blk->bp,
+				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
+
+		lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	}
+}
+
+
+
+/*
+ * Remove an entry from an intermediate node.
+ */
+STATIC void
+xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
+{
+	xfs_da_intnode_t *node;
+	xfs_da_node_entry_t *btree;
+	int tmp;
+
+	node = drop_blk->bp->data;
+	ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
+	ASSERT(drop_blk->index >= 0);
+
+	/*
+	 * Copy over the offending entry, or just zero it out.
+	 */
+	btree = &node->btree[drop_blk->index];
+	if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
+		tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
+		tmp *= (uint)sizeof(xfs_da_node_entry_t);
+		ovbcopy(btree + 1, btree, tmp);
+		xfs_da_log_buf(state->args->trans, drop_blk->bp,
+		    XFS_DA_LOGRANGE(node, btree, tmp));
+		btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
+	}
+	bzero((char *)btree, sizeof(xfs_da_node_entry_t));
+	xfs_da_log_buf(state->args->trans, drop_blk->bp,
+	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
+	INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
+	xfs_da_log_buf(state->args->trans, drop_blk->bp,
+	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
+
+	/*
+	 * Copy the last hash value from the block to propagate upwards.
+	 */
+	btree--;
+	drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
+}
+
+/*
+ * Unbalance the btree elements between two intermediate nodes,
+ * move all Btree elements from one node into another.
+ */
+STATIC void
+xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+				     xfs_da_state_blk_t *save_blk)
+{
+	xfs_da_intnode_t *drop_node, *save_node;
+	xfs_da_node_entry_t *btree;
+	int tmp;
+	xfs_trans_t *tp;
+
+	drop_node = drop_blk->bp->data;
+	save_node = save_blk->bp->data;
+	ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	tp = state->args->trans;
+
+	/*
+	 * If the dying block has lower hashvals, then move all the
+	 * elements in the remaining block up to make a hole.
+	 */
+	if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
+	    (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
+	     INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
+	{
+		btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
+		tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
+		ovbcopy(&save_node->btree[0], btree, tmp);
+		btree = &save_node->btree[0];
+		xfs_da_log_buf(tp, save_blk->bp,
+			XFS_DA_LOGRANGE(save_node, btree,
+				(INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
+				sizeof(xfs_da_node_entry_t)));
+	} else {
+		btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
+		xfs_da_log_buf(tp, save_blk->bp,
+			XFS_DA_LOGRANGE(save_node, btree,
+				INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
+				sizeof(xfs_da_node_entry_t)));
+	}
+
+	/*
+	 * Move all the B-tree elements from drop_blk to save_blk.
+	 */
+	tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
+	bcopy(&drop_node->btree[0], btree, tmp);
+	INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
+
+	xfs_da_log_buf(tp, save_blk->bp,
+		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
+			sizeof(save_node->hdr)));
+
+	/*
+	 * Save the last hashval in the remaining block for upward propagation.
+	 */
+	save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+}
+
+
+/*========================================================================
+ * Routines used for finding things in the Btree.
+ *========================================================================*/
+
+/*
+ * Walk down the Btree looking for a particular filename, filling
+ * in the state structure as we go.
+ *
+ * We will set the state structure to point to each of the elements
+ * in each of the nodes where either the hashval is or should be.
+ *
+ * We support duplicate hashval's so for each entry in the current
+ * node that could contain the desired hashval, descend.  This is a
+ * pruned depth-first tree search.
+ */
+int							/* error */
+xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
+{
+	xfs_da_state_blk_t *blk;
+	xfs_da_blkinfo_t *curr;
+	xfs_da_intnode_t *node;
+	xfs_da_node_entry_t *btree;
+	xfs_dablk_t blkno;
+	int probe, span, max, error, retval;
+	xfs_dahash_t hashval;
+	xfs_da_args_t *args;
+
+	args = state->args;
+	/*
+	 * Descend thru the B-tree searching each level for the right
+	 * node to use, until the right hashval is found.
+	 */
+	if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
+		blkno = state->mp->m_dirleafblk;
+	else
+		blkno = 0;
+	for (blk = &state->path.blk[0], state->path.active = 1;
+			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
+			 blk++, state->path.active++) {
+		/*
+		 * Read the next node down in the tree.
+		 */
+		blk->blkno = blkno;
+		error = xfs_da_read_buf(state->args->trans, state->args->dp,
+					blkno, -1, &blk->bp,
+					state->args->whichfork);
+		if (error) {
+			blk->blkno = 0;
+			state->path.active--;
+			return(error);
+		}
+		ASSERT(blk->bp != NULL);
+		curr = blk->bp->data;
+		ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
+		       INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+
+		/*
+		 * Search an intermediate node for a match.
+		 */
+		blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
+		if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+			node = blk->bp->data;
+			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+
+			/*
+			 * Binary search.  (note: small blocks will skip loop)
+			 */
+			max = INT_GET(node->hdr.count, ARCH_CONVERT);
+			probe = span = max / 2;
+			hashval = state->args->hashval;
+			for (btree = &node->btree[probe]; span > 4;
+				   btree = &node->btree[probe]) {
+				span /= 2;
+				if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
+					probe += span;
+				else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
+					probe -= span;
+				else
+					break;
+			}
+			ASSERT((probe >= 0) && (probe < max));
+			ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
+
+			/*
+			 * Since we may have duplicate hashval's, find the first
+			 * matching hashval in the node.
+			 */
+			while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
+				btree--;
+				probe--;
+			}
+			while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
+				btree++;
+				probe++;
+			}
+
+			/*
+			 * Pick the right block to descend on.
+			 */
+			if (probe == max) {
+				blk->index = max-1;
+				blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
+			} else {
+				blk->index = probe;
+				blkno = INT_GET(btree->before, ARCH_CONVERT);	
+			}
+		}
+#ifdef __KERNEL__
+		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
+			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
+			break;
+		}
+#endif
+		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
+			blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
+			break;
+		}
+		else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
+			break;
+		}
+	}
+
+	/*
+	 * A leaf block that ends in the hashval that we are interested in
+	 * (final hashval == search hashval) means that the next block may
+	 * contain more entries with the same hashval, shift upward to the
+	 * next leaf and keep searching.
+	 */
+	for (;;) {
+		if (blk->magic == XFS_DIR_LEAF_MAGIC) {
+			ASSERT(XFS_DIR_IS_V1(state->mp));
+			retval = xfs_dir_leaf_lookup_int(blk->bp, state->args,
+								  &blk->index);
+		} else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
+			ASSERT(XFS_DIR_IS_V2(state->mp));
+			retval = xfs_dir2_leafn_lookup_int(blk->bp, state->args,
+							&blk->index, state);
+		}
+#ifdef __KERNEL__
+		else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
+			retval = xfs_attr_leaf_lookup_int(blk->bp, state->args);
+			blk->index = state->args->index;
+			state->args->blkno = blk->blkno;
+		}
+#endif
+		if (((retval == ENOENT) || (retval == ENOATTR)) &&
+		    (blk->hashval == state->args->hashval)) {
+			error = xfs_da_path_shift(state, &state->path, 1, 1,
+							 &retval);
+			if (error)
+				return(error);
+			if (retval == 0) {
+				continue;
+			}
+#ifdef __KERNEL__
+			else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
+				/* path_shift() gives ENOENT */
+				retval = XFS_ERROR(ENOATTR);
+			}
+#endif
+		}
+		break;
+	}
+	*result = retval;
+	return(0);	
+}
+
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Link a new block into a doubly linked list of blocks (of whatever type).
+ */
+int							/* error */
+xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
+			       xfs_da_state_blk_t *new_blk)
+{
+	xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
+	xfs_da_args_t *args;
+	int before, error;
+	xfs_dabuf_t *bp;
+
+	/*
+	 * Set up environment.
+	 */
+	args = state->args;
+	ASSERT(args != NULL);
+	old_info = old_blk->bp->data;
+	new_info = new_blk->bp->data;
+	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
+	       old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
+	ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
+	ASSERT(old_blk->magic == new_blk->magic);
+
+	switch (old_blk->magic) {
+#ifdef __KERNEL__
+	case XFS_ATTR_LEAF_MAGIC:
+		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
+		break;
+#endif
+	case XFS_DIR_LEAF_MAGIC:
+		ASSERT(XFS_DIR_IS_V1(state->mp));
+		before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
+		break;
+	case XFS_DIR2_LEAFN_MAGIC:
+		ASSERT(XFS_DIR_IS_V2(state->mp));
+		before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
+		break;
+	case XFS_DA_NODE_MAGIC:
+		before = xfs_da_node_order(old_blk->bp, new_blk->bp);
+		break;
+	}
+
+	/*
+	 * Link blocks in appropriate order.
+	 */
+	if (before) {
+		/*
+		 * Link new block in before existing block.
+		 */
+		INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
+		new_info->back = old_info->back; /* INT_: direct copy */
+		if (INT_GET(old_info->back, ARCH_CONVERT)) {
+			error = xfs_da_read_buf(args->trans, args->dp,
+						INT_GET(old_info->back,
+							ARCH_CONVERT), -1, &bp,
+						args->whichfork);
+			if (error)
+				return(error);
+			ASSERT(bp != NULL);
+			tmp_info = bp->data;
+			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
+			ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
+			INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
+			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
+			xfs_da_buf_done(bp);
+		}
+		INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
+	} else {
+		/*
+		 * Link new block in after existing block.
+		 */
+		new_info->forw = old_info->forw; /* INT_: direct copy */
+		INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
+		if (INT_GET(old_info->forw, ARCH_CONVERT)) {
+			error = xfs_da_read_buf(args->trans, args->dp,
+						INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
+						args->whichfork);
+			if (error)
+				return(error);
+			ASSERT(bp != NULL);
+			tmp_info = bp->data;
+			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
+				    == INT_GET(old_info->magic, ARCH_CONVERT));
+			ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
+				    == old_blk->blkno);
+			INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
+			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
+			xfs_da_buf_done(bp);
+		}
+		INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
+	}
+
+	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
+	xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
+	return(0);
+}
+
+
+/*
+ * Compare two intermediate nodes for "order".
+ */
+STATIC int
+xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
+{
+	xfs_da_intnode_t *node1, *node2;
+
+	node1 = node1_bp->data;
+	node2 = node2_bp->data;
+	ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
+	       (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
+	if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) && 
+	    ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
+	      INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
+	     (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
+	      INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+		return(1);
+	}
+	return(0);
+}
+
+
+/*
+ * Pick up the last hashvalue from an intermediate node.
+ */
+STATIC uint
+xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
+{
+	xfs_da_intnode_t *node;
+
+	node = bp->data;
+	ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+	if (count)
+		*count = INT_GET(node->hdr.count, ARCH_CONVERT);
+	if (INT_GET(node->hdr.count, ARCH_CONVERT) == 0)
+		return(0);
+	return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+}
+
+/*
+ * Unlink a block from a doubly linked list of blocks.
+ */
+int							/* error */
+xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+				 xfs_da_state_blk_t *save_blk)
+{
+	xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
+	xfs_da_args_t *args;
+	xfs_dabuf_t *bp;
+	int error;
+
+	/*
+	 * Set up environment.
+	 */
+	args = state->args;
+	ASSERT(args != NULL);
+	save_info = save_blk->bp->data;
+	drop_info = drop_blk->bp->data;
+	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
+	       save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
+	ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
+	ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
+	ASSERT(save_blk->magic == drop_blk->magic);
+	ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
+	       (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
+	ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
+	       (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
+
+	/*
+	 * Unlink the leaf block from the doubly linked chain of leaves.
+	 */
+	if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
+		save_info->back = drop_info->back; /* INT_: direct copy */
+		if (INT_GET(drop_info->back, ARCH_CONVERT)) {
+			error = xfs_da_read_buf(args->trans, args->dp,
+						INT_GET(drop_info->back,
+							ARCH_CONVERT), -1, &bp,
+						args->whichfork);
+			if (error)
+				return(error);
+			ASSERT(bp != NULL);
+			tmp_info = bp->data;
+			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
+			ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
+			INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
+			xfs_da_log_buf(args->trans, bp, 0,
+						    sizeof(*tmp_info) - 1);
+			xfs_da_buf_done(bp);
+		}
+	} else {
+		save_info->forw = drop_info->forw; /* INT_: direct copy */
+		if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
+			error = xfs_da_read_buf(args->trans, args->dp,
+						INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
+						args->whichfork);
+			if (error)
+				return(error);
+			ASSERT(bp != NULL);
+			tmp_info = bp->data;
+			ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
+				    == INT_GET(save_info->magic, ARCH_CONVERT));
+			ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
+				    == drop_blk->blkno);
+			INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
+			xfs_da_log_buf(args->trans, bp, 0,
+						    sizeof(*tmp_info) - 1);
+			xfs_da_buf_done(bp);
+		}
+	}
+
+	xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
+	return(0);
+}
+
+/*
+ * Move a path "forward" or "!forward" one block at the current level.
+ *
+ * This routine will adjust a "path" to point to the next block
+ * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
+ * Btree, including updating pointers to the intermediate nodes between
+ * the new bottom and the root.
+ */
+int							/* error */
+xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
+				 int forward, int release, int *result)
+{
+	xfs_da_state_blk_t *blk;
+	xfs_da_blkinfo_t *info;
+	xfs_da_intnode_t *node;
+	xfs_da_args_t *args;
+	xfs_dablk_t blkno;
+	int level, error;
+
+	/*
+	 * Roll up the Btree looking for the first block where our
+	 * current index is not at the edge of the block.  Note that
+	 * we skip the bottom layer because we want the sibling block.
+	 */
+	args = state->args;
+	ASSERT(args != NULL);
+	ASSERT(path != NULL);
+	ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+	level = (path->active-1) - 1;	/* skip bottom layer in path */
+	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
+		ASSERT(blk->bp != NULL);
+		node = blk->bp->data;
+		ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
+			blk->index++;
+			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+			break;
+		} else if (!forward && (blk->index > 0)) {
+			blk->index--;
+			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+			break;
+		}
+	}
+	if (level < 0) {
+		*result = XFS_ERROR(ENOENT);	/* we're out of our tree */
+		ASSERT(args->oknoent);
+		return(0);
+	}
+
+	/*
+	 * Roll down the edge of the subtree until we reach the
+	 * same depth we were at originally.
+	 */
+	for (blk++, level++; level < path->active; blk++, level++) {
+		/*
+		 * Release the old block.
+		 * (if it's dirty, trans won't actually let go)
+		 */
+		if (release)
+			xfs_da_brelse(args->trans, blk->bp);
+
+		/*
+		 * Read the next child block.
+		 */
+		blk->blkno = blkno;
+		error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
+						     &blk->bp, args->whichfork);
+		if (error)
+			return(error);
+		ASSERT(blk->bp != NULL);
+		info = blk->bp->data;
+		ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
+		       INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+		blk->magic = INT_GET(info->magic, ARCH_CONVERT);
+		if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+			node = (xfs_da_intnode_t *)info;
+			blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+			if (forward)
+				blk->index = 0;
+			else
+				blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
+			blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+		} else {
+			ASSERT(level == path->active-1);
+			blk->index = 0;
+			switch(blk->magic) {
+#ifdef __KERNEL__
+			case XFS_ATTR_LEAF_MAGIC:
+				blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
+								      NULL);
+				break;
+#endif
+			case XFS_DIR_LEAF_MAGIC:
+				ASSERT(XFS_DIR_IS_V1(state->mp));
+				blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
+								     NULL);
+				break;
+			case XFS_DIR2_LEAFN_MAGIC:
+				ASSERT(XFS_DIR_IS_V2(state->mp));
+				blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
+								       NULL);
+				break;
+			default:
+				ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
+				       blk->magic ==
+				       XFS_DIRX_LEAF_MAGIC(state->mp));
+				break;
+			}
+		}
+	}
+	*result = 0;
+	return(0);
+}
+
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Implement a simple hash on a character string.
+ * Rotate the hash value by 7 bits, then XOR each character in.
+ * This is implemented with some source-level loop unrolling.
+ */
+xfs_dahash_t
+xfs_da_hashname(char *name, int namelen)
+{
+	xfs_dahash_t hash;
+
+#define	ROTL(x,y)	(((x) << (y)) | ((x) >> (32 - (y))))
+#ifdef SLOWVERSION
+	/*
+	 * This is the old one-byte-at-a-time version.
+	 */
+	for (hash = 0; namelen > 0; namelen--) {
+		hash = *name++ ^ ROTL(hash, 7);
+	}
+	return(hash);
+#else
+	/*
+	 * Do four characters at a time as long as we can.
+	 */
+	for (hash = 0; namelen >= 4; namelen -= 4, name += 4) {
+		hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
+		       (name[3] << 0) ^ ROTL(hash, 7 * 4);
+	}
+	/*
+	 * Now do the rest of the characters.
+	 */
+	switch (namelen) {
+	case 3:
+		return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
+		       ROTL(hash, 7 * 3);
+	case 2:
+		return (name[0] << 7) ^ (name[1] << 0) ^ ROTL(hash, 7 * 2);
+	case 1:
+		return (name[0] << 0) ^ ROTL(hash, 7 * 1);
+	case 0:
+		return hash;
+	}
+	/* NOTREACHED */
+#endif
+#undef ROTL
+	return 0; /* keep gcc happy */
+}
+
+/*
+ * Add a block to the btree ahead of the file.
+ * Return the new block number to the caller.
+ */
+int
+xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
+{
+	xfs_fileoff_t bno, b;
+	xfs_bmbt_irec_t map;
+	xfs_bmbt_irec_t	*mapp;
+	xfs_inode_t *dp;
+	int nmap, error, w, count, c, got, i, mapi;
+	xfs_fsize_t size;
+	xfs_trans_t *tp;
+	xfs_mount_t *mp;
+
+	dp = args->dp;
+	mp = dp->i_mount;
+	w = args->whichfork;
+	tp = args->trans;
+	/*
+	 * For new directories adjust the file offset and block count.
+	 */
+	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
+		bno = mp->m_dirleafblk;
+		count = mp->m_dirblkfsbs;
+	} else {
+		bno = 0;
+		count = 1;
+	}
+	/*
+	 * Find a spot in the file space to put the new block.
+	 */
+	if (error = xfs_bmap_first_unused(tp, dp, count, &bno, w)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+		ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
+	/*
+	 * Try mapping it in one filesystem block.
+	 */
+	nmap = 1;
+	ASSERT(args->firstblock != NULL);
+	if (error = xfs_bmapi(tp, dp, bno, count,
+			XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
+			XFS_BMAPI_CONTIG,
+			args->firstblock, args->total, &map, &nmap,
+			args->flist)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(nmap <= 1);
+	if (nmap == 1) {
+		mapp = &map;
+		mapi = 1;
+	}
+	/*
+	 * If we didn't get it and the block might work if fragmented,
+	 * try without the CONTIG flag.  Loop until we get it all.
+	 */
+	else if (nmap == 0 && count > 1) {
+#pragma mips_frequency_hint NEVER
+		mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
+		for (b = bno, mapi = 0; b < bno + count; ) {
+			nmap = MIN(XFS_BMAP_MAX_NMAP, count);
+			c = (int)(bno + count - b);
+			if (error = xfs_bmapi(tp, dp, b, c,
+					XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
+					XFS_BMAPI_METADATA,
+					args->firstblock, args->total,
+					&mapp[mapi], &nmap, args->flist)) {
+				kmem_free(mapp, sizeof(*mapp) * count);
+				return error;
+			}
+			if (nmap < 1)
+				break;
+			mapi += nmap;
+			b = mapp[mapi - 1].br_startoff +
+			    mapp[mapi - 1].br_blockcount;
+		}
+	} else {
+#pragma mips_frequency_hint NEVER
+		mapi = 0;
+		mapp = NULL;
+	}
+	/*
+	 * Count the blocks we got, make sure it matches the total.
+	 */
+	for (i = 0, got = 0; i < mapi; i++)
+		got += mapp[i].br_blockcount;
+	if (got != count || mapp[0].br_startoff != bno ||
+	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
+	    bno + count) {
+#pragma mips_frequency_hint NEVER
+		if (mapp != &map)
+			kmem_free(mapp, sizeof(*mapp) * count);
+		return XFS_ERROR(ENOSPC);
+	}
+	if (mapp != &map)
+		kmem_free(mapp, sizeof(*mapp) * count);
+	*new_blkno = (xfs_dablk_t)bno;
+	/*
+	 * For version 1 directories, adjust the file size if it changed.
+	 */
+	if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
+		ASSERT(mapi == 1);
+		if (error = xfs_bmap_last_offset(tp, dp, &bno, w))
+			return error;
+		size = XFS_FSB_TO_B(mp, bno);
+		if (size != dp->i_d.di_size) {
+			dp->i_d.di_size = size;
+			xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+		}
+	}
+	return 0;
+}
+
+
+/*
+ * Ick.  We need to always be able to remove a btree block, even
+ * if there's no space reservation because the filesystem is full.
+ * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
+ * It swaps the target block with the last block in the file.  The
+ * last block in the file can always be removed since it can't cause
+ * a bmap btree split to do that.
+ */
+STATIC int
+xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
+		      xfs_dabuf_t **dead_bufp)
+{
+	xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
+	xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
+	xfs_fileoff_t lastoff;
+	xfs_inode_t *ip;
+	xfs_trans_t *tp;
+	xfs_mount_t *mp;
+	int error, w, entno, level, dead_level;
+	xfs_da_blkinfo_t *dead_info, *sib_info;
+	xfs_da_intnode_t *par_node, *dead_node;
+	xfs_dir_leafblock_t *dead_leaf;
+	xfs_dir2_leaf_t *dead_leaf2;
+	xfs_dahash_t dead_hash;
+
+	dead_buf = *dead_bufp;
+	dead_blkno = *dead_blknop;
+	tp = args->trans;
+	ip = args->dp;
+	w = args->whichfork;
+	ASSERT(w == XFS_DATA_FORK);
+	mp = ip->i_mount;
+	if (XFS_DIR_IS_V2(mp)) {
+		lastoff = mp->m_dirfreeblk;
+		error = xfs_bmap_last_before(tp, ip, &lastoff, w);
+	} else
+		error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
+	if (error)
+		return error;
+	if (lastoff == 0)
+		return XFS_ERROR(EFSCORRUPTED);
+	/*
+	 * Read the last block in the btree space.
+	 */
+	last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
+	if (error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w))
+		return error;
+	/*
+	 * Copy the last block into the dead buffer and log it.
+	 */
+	bcopy(last_buf->data, dead_buf->data, mp->m_dirblksize);
+	xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
+	dead_info = dead_buf->data;
+	/*
+	 * Get values from the moved block.
+	 */
+	if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
+		ASSERT(XFS_DIR_IS_V1(mp));
+		dead_leaf = (xfs_dir_leafblock_t *)dead_info;
+		dead_level = 0;
+		dead_hash =
+			INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	} else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+		ASSERT(XFS_DIR_IS_V2(mp));
+		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
+		dead_level = 0;
+		dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	} else {
+		ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+		dead_node = (xfs_da_intnode_t *)dead_info;
+		dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
+		dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	}
+	sib_buf = par_buf = NULL;
+	/*
+	 * If the moved block has a left sibling, fix up the pointers.
+	 */
+	if (sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT)) {
+		if (error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))
+			goto done;
+		sib_info = sib_buf->data;
+		if (INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
+		    INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT)) {
+			error = XFS_ERROR(EFSCORRUPTED);
+			goto done;
+		}
+		INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
+		xfs_da_log_buf(tp, sib_buf,
+			XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
+					sizeof(sib_info->forw)));
+		xfs_da_buf_done(sib_buf);
+		sib_buf = NULL;
+	}
+	/*
+	 * If the moved block has a right sibling, fix up the pointers.
+	 */
+	if (sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT)) {
+		if (error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))
+			goto done;
+		sib_info = sib_buf->data;
+		if (   INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
+		    || INT_GET(sib_info->magic, ARCH_CONVERT)
+				!= INT_GET(dead_info->magic, ARCH_CONVERT)) {
+			error = XFS_ERROR(EFSCORRUPTED);
+			goto done;
+		}
+		INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
+		xfs_da_log_buf(tp, sib_buf,
+			XFS_DA_LOGRANGE(sib_info, &sib_info->back,
+					sizeof(sib_info->back)));
+		xfs_da_buf_done(sib_buf);
+		sib_buf = NULL;
+	}
+	par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
+	level = -1;
+	/*
+	 * Walk down the tree looking for the parent of the moved block.
+	 */
+	for (;;) {
+		if (error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))
+			goto done;
+		par_node = par_buf->data;
+		if (INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
+		    (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1)) {
+			error = XFS_ERROR(EFSCORRUPTED);
+			goto done;
+		}
+		level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
+		for (entno = 0;
+		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
+		     INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
+		     entno++)
+			continue;
+		if (entno == INT_GET(par_node->hdr.count, ARCH_CONVERT)) {
+			error = XFS_ERROR(EFSCORRUPTED);
+			goto done;
+		}
+		par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
+		if (level == dead_level + 1)
+			break;
+		xfs_da_brelse(tp, par_buf);
+		par_buf = NULL;
+	}
+	/*
+	 * We're in the right parent block.
+	 * Look for the right entry.
+	 */
+	for (;;) {
+		for (;
+		     entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
+		     INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
+		     entno++)
+			continue;
+		if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
+			break;
+		par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
+		xfs_da_brelse(tp, par_buf);
+		par_buf = NULL;
+		if (par_blkno == 0) {
+			error = XFS_ERROR(EFSCORRUPTED);
+			goto done;
+		}
+		if (error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))
+			goto done;
+		par_node = par_buf->data;
+		if (INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
+		    INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) {
+			error = XFS_ERROR(EFSCORRUPTED);
+			goto done;
+		}
+		entno = 0;
+	}
+	/*
+	 * Update the parent entry pointing to the moved block.
+	 */
+	INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
+	xfs_da_log_buf(tp, par_buf,
+		XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
+				sizeof(par_node->btree[entno].before)));
+	xfs_da_buf_done(par_buf);
+	xfs_da_buf_done(dead_buf);
+	*dead_blknop = last_blkno;
+	*dead_bufp = last_buf;
+	return 0;
+done:
+	if (par_buf)
+		xfs_da_brelse(tp, par_buf);
+	if (sib_buf)
+		xfs_da_brelse(tp, sib_buf);
+	xfs_da_brelse(tp, last_buf);
+	return error;
+}
+
+/*
+ * Remove a btree block from a directory or attribute.
+ */
+int
+xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
+		    xfs_dabuf_t *dead_buf)
+{
+	xfs_inode_t *dp;
+	int done, error, w, count;
+	xfs_fileoff_t bno;
+	xfs_fsize_t size;
+	xfs_trans_t *tp;
+	xfs_mount_t *mp;
+
+	dp = args->dp;
+	w = args->whichfork;
+	tp = args->trans;
+	mp = dp->i_mount;
+	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+		count = mp->m_dirblkfsbs;
+	else
+		count = 1;
+	for (;;) {
+		/*
+		 * Remove extents.  If we get ENOSPC for a dir we have to move
+		 * the last block to the place we want to kill.
+		 */
+		if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
+				XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
+				0, args->firstblock, args->flist,
+				&done)) == ENOSPC) {
+			if (w != XFS_DATA_FORK)
+				goto done;
+			if (error = xfs_da_swap_lastblock(args, &dead_blkno,
+					&dead_buf))
+				goto done;
+		} else if (error)
+			goto done;
+		else
+			break;
+	}
+	ASSERT(done);
+	xfs_da_binval(tp, dead_buf);
+	/*
+	 * Adjust the directory size for version 1.
+	 */
+	if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
+		if (error = xfs_bmap_last_offset(tp, dp, &bno, w))
+			return error;
+		size = XFS_FSB_TO_B(dp->i_mount, bno);
+		if (size != dp->i_d.di_size) {
+			dp->i_d.di_size = size;
+			xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+		}
+	}
+	return 0;
+done:
+	xfs_da_binval(tp, dead_buf);
+	return error;
+}
+
+/*
+ * See if the mapping(s) for this btree block are valid, i.e.
+ * don't contain holes, are logically contiguous, and cover the whole range.
+ */
+STATIC int
+xfs_da_map_covers_blocks(
+	int		nmap,
+	xfs_bmbt_irec_t	*mapp,
+	xfs_dablk_t	bno,
+	int		count)
+{
+	int		i;
+	xfs_fileoff_t	off;
+
+	for (i = 0, off = bno; i < nmap; i++) {
+		if (mapp[i].br_startblock == HOLESTARTBLOCK ||
+		    mapp[i].br_startblock == DELAYSTARTBLOCK) {
+#pragma mips_frequency_hint NEVER
+			return 0;
+		}
+		if (off != mapp[i].br_startoff) {
+#pragma mips_frequency_hint NEVER
+			return 0;
+		}
+		off += mapp[i].br_blockcount;
+	}
+	return off == bno + count;
+}
+
+/*
+ * Make a dabuf.
+ * Used for get_buf, read_buf, read_bufr, and reada_buf.
+ */
+STATIC int
+xfs_da_do_buf(
+	xfs_trans_t	*trans,
+	xfs_inode_t	*dp,
+	xfs_dablk_t	bno,
+	xfs_daddr_t	*mappedbnop,
+	xfs_dabuf_t	**bpp,
+	int		whichfork,
+	int		caller,
+	inst_t		*ra)
+{
+	xfs_buf_t		*bp = 0;
+	xfs_buf_t		**bplist;
+	int		error;
+	int		i;
+	xfs_bmbt_irec_t	map;
+	xfs_bmbt_irec_t	*mapp;
+	xfs_daddr_t	mappedbno;
+	xfs_mount_t	*mp;
+	int		nbplist;
+	int		nfsb;
+	int		nmap;
+	xfs_dabuf_t	*rbp;
+
+	mp = dp->i_mount;
+	if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+		nfsb = mp->m_dirblkfsbs;
+	else
+		nfsb = 1;
+	mappedbno = *mappedbnop;
+	/*
+	 * Caller doesn't have a mapping.  -2 means don't complain
+	 * if we land in a hole.
+	 */
+	if (mappedbno == -1 || mappedbno == -2) {
+		/*
+		 * Optimize the one-block case.
+		 */
+		if (nfsb == 1) {
+			xfs_fsblock_t	fsb;
+
+			if (error =
+			    xfs_bmapi_single(trans, dp, whichfork, &fsb,
+				    (xfs_fileoff_t)bno)) {
+#pragma mips_frequency_hint NEVER
+				return error;
+			}
+			mapp = &map;
+			if (fsb == NULLFSBLOCK) {
+#pragma mips_frequency_hint NEVER
+				nmap = 0;
+			} else {
+				map.br_startblock = fsb;
+				map.br_startoff = (xfs_fileoff_t)bno;
+				map.br_blockcount = 1;
+				nmap = 1;
+			}
+		} else {
+#pragma mips_frequency_hint NEVER
+			xfs_fsblock_t	firstblock;
+
+			firstblock = NULLFSBLOCK;
+			mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
+			nmap = nfsb;
+			if (error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
+					nfsb,
+					XFS_BMAPI_METADATA |
+						XFS_BMAPI_AFLAG(whichfork),
+					&firstblock, 0, mapp, &nmap, NULL))
+				goto exit0;
+		}
+	} else {
+		map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
+		map.br_startoff = (xfs_fileoff_t)bno;
+		map.br_blockcount = nfsb;
+		mapp = &map;
+		nmap = 1;
+	}
+	if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
+#pragma mips_frequency_hint NEVER
+		error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
+		goto exit0;
+	}
+	if (caller != 3 && nmap > 1) {
+#pragma mips_frequency_hint NEVER
+		bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
+		nbplist = 0;
+	} else
+		bplist = NULL;
+	/*
+	 * Turn the mapping(s) into buffer(s).
+	 */
+	for (i = 0; i < nmap; i++) {
+		int	nmapped;
+
+		mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
+		if (i == 0)
+			*mappedbnop = mappedbno;
+		nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
+		switch (caller) {
+		case 0:
+			bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
+				mappedbno, nmapped, 0);
+			error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
+			break;
+		case 1:
+#ifndef __KERNEL__
+		case 2:
+#endif
+			bp = NULL;
+			error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
+				mappedbno, nmapped, 0, &bp);
+			break;
+#ifdef __KERNEL__
+		case 3:
+			xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
+			error = 0;
+			bp = NULL;
+			break;
+#endif
+		}
+		if (error) {
+#pragma mips_frequency_hint NEVER
+			if (bp)
+				xfs_trans_brelse(trans, bp);
+			goto exit1;
+		}
+		if (!bp)
+			continue;
+		if (caller == 1) {
+			if (whichfork == XFS_ATTR_FORK) {
+				XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE, 
+						XFS_ATTR_BTREE_REF);
+			} else {
+				XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
+						XFS_DIR_BTREE_REF);
+			}
+		}
+		if (bplist) {
+#pragma mips_frequency_hint NEVER
+			bplist[nbplist++] = bp;
+		}
+	}
+	/*
+	 * Build a dabuf structure.
+	 */
+	if (bplist) {
+#pragma mips_frequency_hint NEVER
+		rbp = xfs_da_buf_make(nbplist, bplist, ra);
+	} else if (bp)
+		rbp = xfs_da_buf_make(1, &bp, ra);
+	else
+		rbp = NULL;
+	/*
+	 * For read_buf, check the magic number.
+	 */
+	if (caller == 1) {
+		xfs_dir2_data_t		*data;
+		xfs_dir2_free_t		*free;
+		xfs_da_blkinfo_t	*info;
+
+		info = rbp->data;
+		data = rbp->data;
+		free = rbp->data;
+		if (XFS_TEST_ERROR((INT_GET(info->magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) &&
+				   (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) &&
+				   (INT_GET(info->magic, ARCH_CONVERT) != XFS_ATTR_LEAF_MAGIC) &&
+				   (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC) &&
+				   (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) &&
+				   (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) &&
+				   (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC) &&
+				   (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
+				mp, XFS_ERRTAG_DA_READ_BUF,
+				XFS_RANDOM_DA_READ_BUF)) {
+#pragma mips_frequency_hint NEVER
+			xfs_buftrace("DA READ ERROR", rbp->bps[0]);
+			error = XFS_ERROR(EFSCORRUPTED);
+			xfs_da_brelse(trans, rbp);
+			nbplist = 0;
+			goto exit1;
+		}
+	}
+	if (bplist) {
+#pragma mips_frequency_hint NEVER
+		kmem_free(bplist, sizeof(*bplist) * nmap);
+	}
+	if (mapp != &map) {
+#pragma mips_frequency_hint NEVER
+		kmem_free(mapp, sizeof(*mapp) * nfsb);
+	}
+	if (bpp)
+		*bpp = rbp;
+	return 0;
+exit1:
+	if (bplist) {
+		for (i = 0; i < nbplist; i++)
+			xfs_trans_brelse(trans, bplist[i]);
+		kmem_free(bplist, sizeof(*bplist) * nmap);
+	}
+exit0:
+	if (mapp != &map)
+		kmem_free(mapp, sizeof(*mapp) * nfsb);
+	if (bpp)
+		*bpp = NULL;
+	return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block.
+ */
+int
+xfs_da_get_buf(
+	xfs_trans_t	*trans,
+	xfs_inode_t	*dp,
+	xfs_dablk_t	bno,
+	xfs_daddr_t		mappedbno,
+	xfs_dabuf_t	**bpp,
+	int		whichfork)
+{
+	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
+						 (inst_t *)__return_address);
+}
+
+/*
+ * Get a buffer for the dir/attr block, fill in the contents.
+ */
+int
+xfs_da_read_buf(
+	xfs_trans_t	*trans,
+	xfs_inode_t	*dp,
+	xfs_dablk_t	bno,
+	xfs_daddr_t		mappedbno,
+	xfs_dabuf_t	**bpp,
+	int		whichfork)
+{
+	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
+		(inst_t *)__return_address);
+}
+
+/*
+ * Calculate the number of bits needed to hold i different values.
+ */
+uint
+xfs_da_log2_roundup(uint i)
+{
+	uint rval;
+
+	for (rval = 0; rval < NBBY * sizeof(i); rval++) {
+		if ((1 << rval) >= i)
+			break;
+	}
+	return(rval);
+}
+
+xfs_zone_t *xfs_da_state_zone;	/* anchor for state struct zone */
+xfs_zone_t *xfs_dabuf_zone;		/* dabuf zone */
+
+/*
+ * Allocate a dir-state structure.
+ * We don't put them on the stack since they're large.
+ */
+xfs_da_state_t *
+xfs_da_state_alloc(void)
+{
+	return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
+}
+
+/*
+ * Kill the altpath contents of a da-state structure.
+ */
+void
+xfs_da_state_kill_altpath(xfs_da_state_t *state)
+{
+	int	i;
+
+	for (i = 0; i < state->altpath.active; i++) {
+		if (state->altpath.blk[i].bp) {
+			if (state->altpath.blk[i].bp != state->path.blk[i].bp)
+				xfs_da_buf_done(state->altpath.blk[i].bp);
+			state->altpath.blk[i].bp = NULL;
+		}
+	}
+	state->altpath.active = 0;
+}
+
+/*
+ * Free a da-state structure.
+ */
+void
+xfs_da_state_free(xfs_da_state_t *state)
+{
+	int	i;
+
+	xfs_da_state_kill_altpath(state);
+	for (i = 0; i < state->path.active; i++) {
+		if (state->path.blk[i].bp)
+			xfs_da_buf_done(state->path.blk[i].bp);
+	}
+	if (state->extravalid && state->extrablk.bp)
+		xfs_da_buf_done(state->extrablk.bp);
+#ifdef DEBUG
+	bzero((char *)state, sizeof(*state));
+#endif /* DEBUG */
+	kmem_zone_free(xfs_da_state_zone, state);
+}
+
+#ifdef XFS_DABUF_DEBUG
+xfs_dabuf_t	*xfs_dabuf_global_list;
+lock_t		xfs_dabuf_global_lock;
+#endif
+
+/*
+ * Create a dabuf.
+ */
+/* ARGSUSED */
+STATIC xfs_dabuf_t *
+xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
+{
+	xfs_buf_t		*bp;
+	xfs_dabuf_t	*dabuf;
+	int		i;
+	int		off;
+
+	if (nbuf == 1)
+		dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
+	else
+		dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
+	dabuf->dirty = 0;
+#ifdef XFS_DABUF_DEBUG
+	dabuf->ra = ra;
+	dabuf->dev = XFS_BUF_TARGET(bps[0]);
+	dabuf->blkno = XFS_BUF_ADDR(bps[0]);
+#endif
+	if (nbuf == 1) {
+		dabuf->nbuf = 1;
+		bp = bps[0];
+		dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
+		dabuf->data = XFS_BUF_PTR(bp);
+		dabuf->bps[0] = bp;
+	} else {
+		dabuf->nbuf = nbuf;
+		for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
+			dabuf->bps[i] = bp = bps[i];
+			dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
+		}
+		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
+		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
+			bp = bps[i];
+			bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
+				XFS_BUF_COUNT(bp));
+		}
+	}
+#ifdef XFS_DABUF_DEBUG
+	{
+		int		s;
+		xfs_dabuf_t	*p;
+
+		s = mutex_spinlock(&xfs_dabuf_global_lock);
+		for (p = xfs_dabuf_global_list; p; p = p->next) {
+			ASSERT(p->blkno != dabuf->blkno ||
+			       p->dev != dabuf->dev);
+		}
+		dabuf->prev = NULL;
+		if (xfs_dabuf_global_list)
+			xfs_dabuf_global_list->prev = dabuf;
+		dabuf->next = xfs_dabuf_global_list;
+		xfs_dabuf_global_list = dabuf;
+		mutex_spinunlock(&xfs_dabuf_global_lock, s);
+	}
+#endif
+	return dabuf;
+}
+
+/*
+ * Un-dirty a dabuf.
+ */
+STATIC void
+xfs_da_buf_clean(xfs_dabuf_t *dabuf)
+{
+	xfs_buf_t	*bp;
+	int	i;
+	int	off;
+
+	if (dabuf->dirty) {
+		ASSERT(dabuf->nbuf > 1);
+		dabuf->dirty = 0;
+		for (i = off = 0; i < dabuf->nbuf;
+				i++, off += XFS_BUF_COUNT(bp)) {
+			bp = dabuf->bps[i];
+			bcopy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+				XFS_BUF_COUNT(bp));
+		}
+	}
+}
+
+/*
+ * Release a dabuf.
+ */
+void
+xfs_da_buf_done(xfs_dabuf_t *dabuf)
+{
+        ASSERT(dabuf);
+	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
+	if (dabuf->dirty)
+		xfs_da_buf_clean(dabuf);
+	if (dabuf->nbuf > 1)
+		kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
+#ifdef XFS_DABUF_DEBUG
+	{
+		int	s;
+
+		s = mutex_spinlock(&xfs_dabuf_global_lock);
+		if (dabuf->prev)
+			dabuf->prev->next = dabuf->next;
+		else
+			xfs_dabuf_global_list = dabuf->next;
+		if (dabuf->next)
+			dabuf->next->prev = dabuf->prev;
+		mutex_spinunlock(&xfs_dabuf_global_lock, s);
+	}
+	bzero(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+#endif
+	if (dabuf->nbuf == 1)
+		kmem_zone_free(xfs_dabuf_zone, dabuf);
+	else
+		kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+}
+
+/*
+ * Log transaction from a dabuf.
+ */
+void
+xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
+{
+	xfs_buf_t	*bp;
+	uint	f;
+	int	i;
+	uint	l;
+	int	off;
+
+	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
+	if (dabuf->nbuf == 1) {
+		ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
+		xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
+		return;
+	}
+	dabuf->dirty = 1;
+	ASSERT(first <= last);
+	for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
+		bp = dabuf->bps[i];
+		f = off;
+		l = f + XFS_BUF_COUNT(bp) - 1;
+		if (f < first)
+			f = first;
+		if (l > last)
+			l = last;
+		if (f <= l)
+			xfs_trans_log_buf(tp, bp, f - off, l - off);
+		/* 
+		 * B_DONE is set by xfs_trans_log buf.
+		 * If we don't set it on a new buffer (get not read)
+		 * then if we don't put anything in the buffer it won't
+		 * be set, and at commit it it released into the cache,
+		 * and then a read will fail.
+		 */
+		else if (!(XFS_BUF_ISDONE(bp)))
+		  XFS_BUF_DONE(bp);
+	}
+	ASSERT(last < off);
+}
+
+/*
+ * Release dabuf from a transaction.
+ * Have to free up the dabuf before the buffers are released,
+ * since the synchronization on the dabuf is really the lock on the buffer.
+ */
+void
+xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
+{
+	xfs_buf_t	*bp;
+	xfs_buf_t	**bplist;
+	int	i;
+	int	nbuf;
+
+	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
+	if ((nbuf = dabuf->nbuf) == 1) {
+		bplist = &bp;
+		bp = dabuf->bps[0];
+	} else {
+		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
+		bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+	}
+	xfs_da_buf_done(dabuf);
+	for (i = 0; i < nbuf; i++)
+		xfs_trans_brelse(tp, bplist[i]);
+	if (bplist != &bp)
+		kmem_free(bplist, nbuf * sizeof(*bplist));
+}
+
+/*
+ * Invalidate dabuf from a transaction.
+ */
+void
+xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
+{
+	xfs_buf_t	*bp;
+	xfs_buf_t	**bplist;
+	int	i;
+	int	nbuf;
+
+	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
+	if ((nbuf = dabuf->nbuf) == 1) {
+		bplist = &bp;
+		bp = dabuf->bps[0];
+	} else {
+		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
+		bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+	}
+	xfs_da_buf_done(dabuf);
+	for (i = 0; i < nbuf; i++)
+		xfs_trans_binval(tp, bplist[i]);
+	if (bplist != &bp)
+		kmem_free(bplist, nbuf * sizeof(*bplist));
+}
diff --git a/libxfs/xfs_dir.c b/libxfs/xfs_dir.c
new file mode 100644
index 000000000..b13d24642
--- /dev/null
+++ b/libxfs/xfs_dir.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_dir.c
+ *
+ * Provide the external interfaces to manage directories.
+ */
+
+
+xfs_dahash_t	xfs_dir_hash_dot, xfs_dir_hash_dotdot;
+
+/*
+ * One-time startup routine called from xfs_init().
+ */
+void
+xfs_dir_startup(void)
+{
+	xfs_dir_hash_dot = xfs_da_hashname(".", 1);
+	xfs_dir_hash_dotdot = xfs_da_hashname("..", 2);
+}
+
+/*
+ * Initialize directory-related fields in the mount structure.
+ */
+STATIC void
+xfs_dir_mount(xfs_mount_t *mp)
+{
+	uint shortcount, leafcount, count;
+
+	mp->m_dirversion = 1;
+	shortcount = (mp->m_attroffset - (uint)sizeof(xfs_dir_sf_hdr_t)) /
+		     (uint)sizeof(xfs_dir_sf_entry_t);
+	leafcount = (XFS_LBSIZE(mp) - (uint)sizeof(xfs_dir_leaf_hdr_t)) /
+		    ((uint)sizeof(xfs_dir_leaf_entry_t) +
+		     (uint)sizeof(xfs_dir_leaf_name_t));
+	count = shortcount > leafcount ? shortcount : leafcount;
+	mp->m_dircook_elog = xfs_da_log2_roundup(count + 1);
+	ASSERT(mp->m_dircook_elog <= mp->m_sb.sb_blocklog);
+	mp->m_da_node_ents =
+		(XFS_LBSIZE(mp) - (uint)sizeof(xfs_da_node_hdr_t)) /
+		(uint)sizeof(xfs_da_node_entry_t);
+	mp->m_dir_magicpct = (XFS_LBSIZE(mp) * 37) / 100;
+	mp->m_dirblksize = mp->m_sb.sb_blocksize;
+	mp->m_dirblkfsbs = 1;
+}
+
+/*
+ * Initialize a directory with its "." and ".." entries.
+ */
+STATIC int
+xfs_dir_init(xfs_trans_t *trans, xfs_inode_t *dir, xfs_inode_t *parent_dir)
+{
+	xfs_da_args_t args;
+	int error;
+
+	bzero((char *)&args, sizeof(args));
+	args.dp = dir;
+	args.trans = trans;
+
+	ASSERT((dir->i_d.di_mode & IFMT) == IFDIR);
+	if (error = xfs_dir_ino_validate(trans->t_mountp, parent_dir->i_ino))
+		return error;
+
+	return(xfs_dir_shortform_create(&args, parent_dir->i_ino));
+}
+
+/*
+ * Generic handler routine to add a name to a directory.
+ * Transitions directory from shortform to Btree as necessary.
+ */
+STATIC int						/* error */
+xfs_dir_createname(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
+		   int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock,
+		   xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+	xfs_da_args_t args;
+	int retval, newsize, done;
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+
+	if (retval = xfs_dir_ino_validate(trans->t_mountp, inum))
+		return (retval);
+
+	XFS_STATS_INC(xs_dir_create);
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = xfs_da_hashname(name, namelen);
+	args.inumber = inum;
+	args.dp = dp;
+	args.firstblock = firstblock;
+	args.flist = flist;
+	args.total = total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = trans;
+	args.justcheck = 0;
+	args.addname = args.oknoent = 1;
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	done = 0;
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+		newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen);
+		if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp)) {
+			retval = xfs_dir_shortform_addname(&args);
+			done = 1;
+		} else {
+			if (total == 0)
+				return XFS_ERROR(ENOSPC);
+			retval = xfs_dir_shortform_to_leaf(&args);
+			done = retval != 0;
+		}
+	}
+	if (!done && xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+		retval = xfs_dir_leaf_addname(&args);
+		done = retval != ENOSPC;
+		if (!done) {
+			if (total == 0)
+				return XFS_ERROR(ENOSPC);
+			retval = xfs_dir_leaf_to_node(&args);
+			done = retval != 0;
+		}
+	}
+	if (!done) {
+		retval = xfs_dir_node_addname(&args);
+	}
+	return(retval);
+}
+
+/*
+ * Generic handler routine to remove a name from a directory.
+ * Transitions directory from Btree to shortform as necessary.
+ */
+STATIC int							/* error */
+xfs_dir_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
+		   int namelen, xfs_ino_t ino, xfs_fsblock_t *firstblock,
+		   xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+	xfs_da_args_t args;
+	int count, totallen, newsize, retval;
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	XFS_STATS_INC(xs_dir_remove);
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = xfs_da_hashname(name, namelen);
+	args.inumber = ino;
+	args.dp = dp;
+	args.firstblock = firstblock;
+	args.flist = flist;
+	args.total = total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = trans;
+	args.justcheck = args.addname = args.oknoent = 0;
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+		retval = xfs_dir_shortform_removename(&args);
+	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+		retval = xfs_dir_leaf_removename(&args, &count, &totallen);
+		if (retval == 0) {
+			newsize = XFS_DIR_SF_ALLFIT(count, totallen);
+			if (newsize <= XFS_IFORK_DSIZE(dp)) {
+				retval = xfs_dir_leaf_to_shortform(&args);
+			}
+		}
+	} else {
+		retval = xfs_dir_node_removename(&args);
+	}
+	return(retval);
+}
+
+STATIC int							/* error */
+xfs_dir_lookup(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen,
+				   xfs_ino_t *inum)
+{
+	xfs_da_args_t args;
+	int retval;
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	if (namelen >= MAXNAMELEN) {
+		return(XFS_ERROR(EINVAL));
+	}
+
+	XFS_STATS_INC(xs_dir_lookup);
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = xfs_da_hashname(name, namelen);
+	args.inumber = 0;
+	args.dp = dp;
+	args.firstblock = NULL;
+	args.flist = NULL;
+	args.total = 0;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = trans;
+	args.justcheck = args.addname = 0;
+	args.oknoent = 1;
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+		retval = xfs_dir_shortform_lookup(&args);
+	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+		retval = xfs_dir_leaf_lookup(&args);
+	} else {
+		retval = xfs_dir_node_lookup(&args);
+	}
+	if (retval == EEXIST)
+		retval = 0;
+	*inum = args.inumber;
+	return(retval);
+}
+
+STATIC int							/* error */
+xfs_dir_replace(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen,
+				    xfs_ino_t inum, xfs_fsblock_t *firstblock,
+				    xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+	xfs_da_args_t args;
+	int retval;
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	if (namelen >= MAXNAMELEN) {
+		return(XFS_ERROR(EINVAL));
+	}
+
+	if (retval = xfs_dir_ino_validate(trans->t_mountp, inum))
+		return retval;
+
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = xfs_da_hashname(name, namelen);
+	args.inumber = inum;
+	args.dp = dp;
+	args.firstblock = firstblock;
+	args.flist = flist;
+	args.total = total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = trans;
+	args.justcheck = args.addname = args.oknoent = 0;
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+		retval = xfs_dir_shortform_replace(&args);
+	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+		retval = xfs_dir_leaf_replace(&args);
+	} else {
+		retval = xfs_dir_node_replace(&args);
+	}
+
+	return(retval);
+}
+
+
+/*========================================================================
+ * External routines when dirsize == XFS_LBSIZE(dp->i_mount).
+ *========================================================================*/
+
+/*
+ * Add a name to the leaf directory structure
+ * This is the external routine.
+ */
+int
+xfs_dir_leaf_addname(xfs_da_args_t *args)
+{
+	int index, retval;
+	xfs_dabuf_t *bp;
+
+	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
+					      XFS_DATA_FORK);
+	if (retval)
+		return(retval);
+	ASSERT(bp != NULL);
+
+	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
+	if (retval == ENOENT)
+		retval = xfs_dir_leaf_add(bp, args, index);
+	xfs_da_buf_done(bp);
+	return(retval);
+}
+
+/*
+ * Remove a name from the leaf directory structure
+ * This is the external routine.
+ */
+STATIC int
+xfs_dir_leaf_removename(xfs_da_args_t *args, int *count, int *totallen)
+{
+	xfs_dir_leafblock_t *leaf;
+	int index, retval;
+	xfs_dabuf_t *bp;
+
+	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
+					      XFS_DATA_FORK);
+	if (retval)
+		return(retval);
+	ASSERT(bp != NULL);
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
+	if (retval == EEXIST) {
+		(void)xfs_dir_leaf_remove(args->trans, bp, index);
+		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		*totallen = INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+		retval = 0;
+	}
+	xfs_da_buf_done(bp);
+	return(retval);
+}
+
+/*
+ * Look up a name in a leaf directory structure.
+ * This is the external routine.
+ */
+STATIC int
+xfs_dir_leaf_lookup(xfs_da_args_t *args)
+{
+	int index, retval;
+	xfs_dabuf_t *bp;
+
+	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
+					      XFS_DATA_FORK);
+	if (retval)
+		return(retval);
+	ASSERT(bp != NULL);
+	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
+	xfs_da_brelse(args->trans, bp);
+	return(retval);
+}
+
+/*
+ * Look up a name in a leaf directory structure, replace the inode number.
+ * This is the external routine.
+ */
+STATIC int
+xfs_dir_leaf_replace(xfs_da_args_t *args)
+{
+	int index, retval;
+	xfs_dabuf_t *bp;
+	xfs_ino_t inum;
+	xfs_dir_leafblock_t *leaf;
+	xfs_dir_leaf_entry_t *entry;
+	xfs_dir_leaf_name_t *namest;
+
+	inum = args->inumber;
+	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
+					      XFS_DATA_FORK);
+	if (retval)
+		return(retval);
+	ASSERT(bp != NULL);
+	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
+	if (retval == EEXIST) {
+		leaf = bp->data;
+		entry = &leaf->entries[index];
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+                /* XXX - replace assert? */
+		XFS_DIR_SF_PUT_DIRINO_ARCH(&inum, &namest->inumber, ARCH_CONVERT);
+		xfs_da_log_buf(args->trans, bp, 
+		    XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber)));
+		xfs_da_buf_done(bp);
+		retval = 0;
+	} else
+		xfs_da_brelse(args->trans, bp);
+	return(retval);
+}
+
+
+/*========================================================================
+ * External routines when dirsize > XFS_LBSIZE(mp).
+ *========================================================================*/
+
+/*
+ * Add a name to a Btree-format directory.
+ *
+ * This will involve walking down the Btree, and may involve splitting
+ * leaf nodes and even splitting intermediate nodes up to and including
+ * the root node (a special case of an intermediate node).
+ */
+STATIC int
+xfs_dir_node_addname(xfs_da_args_t *args)
+{
+	xfs_da_state_t *state;
+	xfs_da_state_blk_t *blk;
+	int retval, error;
+
+	/*
+	 * Fill in bucket of arguments/results/context to carry around.
+	 */
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	state->blocksize = state->mp->m_sb.sb_blocksize;
+
+	/*
+	 * Search to see if name already exists, and get back a pointer
+	 * to where it should go.
+	 */
+	error = xfs_da_node_lookup_int(state, &retval);
+	if (error)
+		retval = error;
+	if (retval != ENOENT)
+		goto error;
+	blk = &state->path.blk[ state->path.active-1 ];
+	ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
+	retval = xfs_dir_leaf_add(blk->bp, args, blk->index);
+	if (retval == 0) {
+		/*
+		 * Addition succeeded, update Btree hashvals.
+		 */
+		if (!args->justcheck)
+			xfs_da_fixhashpath(state, &state->path);
+	} else {
+		/*
+		 * Addition failed, split as many Btree elements as required.
+		 */
+		if (args->total == 0) {
+			ASSERT(retval == ENOSPC);
+			goto error;
+		}
+		retval = xfs_da_split(state);
+	}
+error:
+	xfs_da_state_free(state);
+
+	return(retval);
+}
+
+/*
+ * Remove a name from a B-tree directory.
+ *
+ * This will involve walking down the Btree, and may involve joining
+ * leaf nodes and even joining intermediate nodes up to and including
+ * the root node (a special case of an intermediate node).
+ */
+STATIC int
+xfs_dir_node_removename(xfs_da_args_t *args)
+{
+	xfs_da_state_t *state;
+	xfs_da_state_blk_t *blk;
+	int retval, error;
+
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	state->blocksize = state->mp->m_sb.sb_blocksize;
+
+	/*
+	 * Search to see if name exists, and get back a pointer to it.
+	 */
+	error = xfs_da_node_lookup_int(state, &retval);
+	if (error)
+		retval = error;
+	if (retval != EEXIST) {
+		xfs_da_state_free(state);
+		return(retval);
+	}
+
+	/*
+	 * Remove the name and update the hashvals in the tree.
+	 */
+	blk = &state->path.blk[ state->path.active-1 ];
+	ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
+	retval = xfs_dir_leaf_remove(args->trans, blk->bp, blk->index);
+	xfs_da_fixhashpath(state, &state->path);
+
+	/*
+	 * Check to see if the tree needs to be collapsed.
+	 */
+	error = 0;
+	if (retval) {
+		error = xfs_da_join(state);
+	}
+
+	xfs_da_state_free(state);
+	if (error)
+		return(error);
+	return(0);
+}
+
+/*
+ * Look up a filename in a int directory.
+ * Use an internal routine to actually do all the work.
+ */
+STATIC int
+xfs_dir_node_lookup(xfs_da_args_t *args)
+{
+	xfs_da_state_t *state;
+	int retval, error, i;
+
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	state->blocksize = state->mp->m_sb.sb_blocksize;
+
+	/*
+	 * Search to see if name exists,
+	 * and get back a pointer to it.
+	 */
+	error = xfs_da_node_lookup_int(state, &retval);
+	if (error) {
+		retval = error;
+	}
+
+	/* 
+	 * If not in a transaction, we have to release all the buffers.
+	 */
+	for (i = 0; i < state->path.active; i++) {
+		xfs_da_brelse(args->trans, state->path.blk[i].bp);
+		state->path.blk[i].bp = NULL;
+	}
+
+	xfs_da_state_free(state);
+	return(retval);
+}
+
+/*
+ * Look up a filename in an int directory, replace the inode number.
+ * Use an internal routine to actually do the lookup.
+ */
+STATIC int
+xfs_dir_node_replace(xfs_da_args_t *args)
+{
+	xfs_da_state_t *state;
+	xfs_da_state_blk_t *blk;
+	xfs_dir_leafblock_t *leaf;
+	xfs_dir_leaf_entry_t *entry;
+	xfs_dir_leaf_name_t *namest;
+	xfs_ino_t inum;
+	int retval, error, i;
+	xfs_dabuf_t *bp;
+
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	state->blocksize = state->mp->m_sb.sb_blocksize;
+	inum = args->inumber;
+
+	/*
+	 * Search to see if name exists,
+	 * and get back a pointer to it.
+	 */
+	error = xfs_da_node_lookup_int(state, &retval);
+	if (error) {
+		retval = error;
+	}
+
+	if (retval == EEXIST) {
+		blk = &state->path.blk[state->path.active - 1];
+		ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
+		bp = blk->bp;
+		leaf = bp->data;
+		entry = &leaf->entries[blk->index];
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+                /* XXX - replace assert ? */
+		XFS_DIR_SF_PUT_DIRINO_ARCH(&inum, &namest->inumber, ARCH_CONVERT);
+		xfs_da_log_buf(args->trans, bp,
+		    XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber)));
+		xfs_da_buf_done(bp);
+		blk->bp = NULL;
+		retval = 0;
+	} else {
+		i = state->path.active - 1;
+		xfs_da_brelse(args->trans, state->path.blk[i].bp);
+		state->path.blk[i].bp = NULL;
+	}
+	for (i = 0; i < state->path.active - 1; i++) {
+		xfs_da_brelse(args->trans, state->path.blk[i].bp);
+		state->path.blk[i].bp = NULL;
+	}
+
+	xfs_da_state_free(state);
+	return(retval);
+}
diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c
new file mode 100644
index 000000000..72acbb35d
--- /dev/null
+++ b/libxfs/xfs_dir2.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * XFS v2 directory implmentation.
+ * Top-level and utility routines.
+ */
+
+#include <xfs.h>
+
+
+/*
+ * Initialize directory-related fields in the mount structure.
+ */
+void
+xfs_dir2_mount(
+	xfs_mount_t	*mp)		/* filesystem mount point */
+{
+	mp->m_dirversion = 2;
+	ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
+	       XFS_MAX_BLOCKSIZE);
+	mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
+	mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
+	mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp));
+	mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
+	mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp));
+	mp->m_da_node_ents =
+		(mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
+		(uint)sizeof(xfs_da_node_entry_t);
+	mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
+}
+
+/*
+ * Initialize a directory with its "." and ".." entries.
+ */
+int				/* error */
+xfs_dir2_init(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*dp,		/* incore directory inode */
+	xfs_inode_t	*pdp)		/* incore parent directory inode */
+{
+	xfs_da_args_t	args;		/* operation arguments */
+	int		error;		/* error return value */
+
+	bzero((char *)&args, sizeof(args));
+	args.dp = dp;
+	args.trans = tp;
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	if (error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	return xfs_dir2_sf_create(&args, pdp->i_ino);
+}
+
+/*
+  Enter a name in a directory.
+ */
+STATIC int					/* error */
+xfs_dir2_createname(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_inode_t		*dp,		/* incore directory inode */
+	char			*name,		/* new entry name */
+	int			namelen,	/* new entry name length */
+	xfs_ino_t		inum,		/* new entry inode number */
+	xfs_fsblock_t		*first,		/* bmap's firstblock */
+	xfs_bmap_free_t		*flist,		/* bmap's freeblock list */
+	xfs_extlen_t		total)		/* bmap's total block count */
+{
+	xfs_da_args_t		args;		/* operation arguments */
+	int			rval;		/* return value */
+	int			v;		/* type-checking value */
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	if (rval = xfs_dir_ino_validate(tp->t_mountp, inum)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	}
+	XFS_STATS_INC(xs_dir_create);
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = xfs_da_hashname(name, namelen);
+	args.inumber = inum;
+	args.dp = dp;
+	args.firstblock = first;
+	args.flist = flist;
+	args.total = total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = tp;
+	args.justcheck = 0;
+	args.addname = args.oknoent = 1;
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+		rval = xfs_dir2_sf_addname(&args);
+	else if (rval = xfs_dir2_isblock(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	} else if (v)
+		rval = xfs_dir2_block_addname(&args);
+	else if (rval = xfs_dir2_isleaf(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	} else if (v)
+		rval = xfs_dir2_leaf_addname(&args);
+	else
+		rval = xfs_dir2_node_addname(&args);
+	return rval;
+}
+
+/*
+ * Lookup a name in a directory, give back the inode number.
+ */
+STATIC int				/* error */
+xfs_dir2_lookup(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*dp,		/* incore directory inode */
+	char		*name,		/* lookup name */
+	int		namelen,	/* lookup name length */
+	xfs_ino_t	*inum)		/* out: inode number */
+{
+	xfs_da_args_t	args;		/* operation arguments */
+	int		rval;		/* return value */
+	int		v;		/* type-checking value */
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	if (namelen >= MAXNAMELEN) {
+#pragma mips_frequency_hint NEVER
+		return XFS_ERROR(EINVAL);
+	}
+	XFS_STATS_INC(xs_dir_lookup);
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = xfs_da_hashname(name, namelen);
+	args.inumber = 0;
+	args.dp = dp;
+	args.firstblock = NULL;
+	args.flist = NULL;
+	args.total = 0;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = tp;
+	args.justcheck = args.addname = 0;
+	args.oknoent = 1;
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+		rval = xfs_dir2_sf_lookup(&args);
+	else if (rval = xfs_dir2_isblock(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	} else if (v)
+		rval = xfs_dir2_block_lookup(&args);
+	else if (rval = xfs_dir2_isleaf(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	} else if (v)
+		rval = xfs_dir2_leaf_lookup(&args);
+	else
+		rval = xfs_dir2_node_lookup(&args);
+	if (rval == EEXIST)
+		rval = 0;
+	if (rval == 0)
+		*inum = args.inumber;
+	return rval;
+}
+
+/*
+ * Remove an entry from a directory.
+ */
+STATIC int				/* error */
+xfs_dir2_removename(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*dp,		/* incore directory inode */
+	char		*name,		/* name of entry to remove */
+	int		namelen,	/* name length of entry to remove */
+	xfs_ino_t	ino,		/* inode number of entry to remove */
+	xfs_fsblock_t	*first,		/* bmap's firstblock */
+	xfs_bmap_free_t	*flist,		/* bmap's freeblock list */
+	xfs_extlen_t	total)		/* bmap's total block count */
+{
+	xfs_da_args_t	args;		/* operation arguments */
+	int		rval;		/* return value */
+	int		v;		/* type-checking value */
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	XFS_STATS_INC(xs_dir_remove);
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = xfs_da_hashname(name, namelen);
+	args.inumber = ino;
+	args.dp = dp;
+	args.firstblock = first;
+	args.flist = flist;
+	args.total = total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = tp;
+	args.justcheck = args.addname = args.oknoent = 0;
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+		rval = xfs_dir2_sf_removename(&args);
+	else if (rval = xfs_dir2_isblock(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	} else if (v)
+		rval = xfs_dir2_block_removename(&args);
+	else if (rval = xfs_dir2_isleaf(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	} else if (v)
+		rval = xfs_dir2_leaf_removename(&args);
+	else
+		rval = xfs_dir2_node_removename(&args);
+	return rval;
+}
+
+/*
+ * Replace the inode number of a directory entry.
+ */
+STATIC int				/* error */
+xfs_dir2_replace(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*dp,		/* incore directory inode */
+	char		*name,		/* name of entry to replace */
+	int		namelen,	/* name length of entry to replace */
+	xfs_ino_t	inum,		/* new inode number */
+	xfs_fsblock_t	*first,		/* bmap's firstblock */
+	xfs_bmap_free_t	*flist,		/* bmap's freeblock list */
+	xfs_extlen_t	total)		/* bmap's total block count */
+{
+	xfs_da_args_t	args;		/* operation arguments */
+	int		rval;		/* return value */
+	int		v;		/* type-checking value */
+
+	ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+	if (namelen >= MAXNAMELEN) {
+#pragma mips_frequency_hint NEVER
+		return XFS_ERROR(EINVAL);
+	}
+	if (rval = xfs_dir_ino_validate(tp->t_mountp, inum)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	}
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	args.name = name;
+	args.namelen = namelen;
+	args.hashval = xfs_da_hashname(name, namelen);
+	args.inumber = inum;
+	args.dp = dp;
+	args.firstblock = first;
+	args.flist = flist;
+	args.total = total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = tp;
+	args.justcheck = args.addname = args.oknoent = 0;
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+		rval = xfs_dir2_sf_replace(&args);
+	else if (rval = xfs_dir2_isblock(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	} else if (v)
+		rval = xfs_dir2_block_replace(&args);
+	else if (rval = xfs_dir2_isleaf(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	} else if (v)
+		rval = xfs_dir2_leaf_replace(&args);
+	else
+		rval = xfs_dir2_node_replace(&args);
+	return rval;
+}
+
+/*
+ * Utility routines.
+ */
+
+/*
+ * Add a block to the directory.
+ * This routine is for data and free blocks, not leaf/node blocks
+ * which are handled by xfs_da_grow_inode.
+ */
+int					/* error */
+xfs_dir2_grow_inode(
+	xfs_da_args_t	*args,		/* operation arguments */
+	int		space,		/* v2 dir's space XFS_DIR2_xxx_SPACE */
+	xfs_dir2_db_t	*dbp)		/* out: block number added */
+{
+	xfs_fileoff_t	bno;		/* directory offset of new block */
+	int		count;		/* count of filesystem blocks */
+	xfs_inode_t	*dp;		/* incore directory inode */
+	int		error;		/* error return value */
+	int		got;		/* blocks actually mapped */
+	int		i;		/* temp mapping index */
+	xfs_bmbt_irec_t	map;		/* single structure for bmap */
+	int		mapi;		/* mapping index */
+	xfs_bmbt_irec_t	*mapp;		/* bmap mapping structure(s) */
+	xfs_mount_t	*mp;		/* filesystem mount point */
+	int		nmap;		/* number of bmap entries */
+	xfs_trans_t	*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args_s("grow_inode", args, space);
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	/*
+	 * Set lowest possible block in the space requested.
+	 */
+	bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
+	count = mp->m_dirblkfsbs;
+	/*
+	 * Find the first hole for our block.
+	 */
+	if (error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	nmap = 1;
+	ASSERT(args->firstblock != NULL);
+	/*
+	 * Try mapping the new block contiguously (one extent).
+	 */
+	if (error = xfs_bmapi(tp, dp, bno, count,
+			XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
+			args->firstblock, args->total, &map, &nmap,
+			args->flist)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(nmap <= 1);
+	/*
+	 * Got it in 1.
+	 */
+	if (nmap == 1) {
+		mapp = &map;
+		mapi = 1;
+	}
+	/*
+	 * Didn't work and this is a multiple-fsb directory block.
+	 * Try again with contiguous flag turned on.
+	 */
+	else if (nmap == 0 && count > 1) {
+#pragma mips_frequency_hint NEVER
+		xfs_fileoff_t	b;	/* current file offset */
+
+		/*
+		 * Space for maximum number of mappings.
+		 */
+		mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
+		/*
+		 * Iterate until we get to the end of our block.
+		 */
+		for (b = bno, mapi = 0; b < bno + count; ) {
+			int	c;	/* current fsb count */
+
+			/*
+			 * Can't map more than MAX_NMAP at once.
+			 */
+			nmap = MIN(XFS_BMAP_MAX_NMAP, count);
+			c = (int)(bno + count - b);
+			if (error = xfs_bmapi(tp, dp, b, c,
+					XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
+					args->firstblock, args->total,
+					&mapp[mapi], &nmap, args->flist)) {
+				kmem_free(mapp, sizeof(*mapp) * count);
+				return error;
+			}
+			if (nmap < 1)
+				break;
+			/*
+			 * Add this bunch into our table, go to the next offset.
+			 */
+			mapi += nmap;
+			b = mapp[mapi - 1].br_startoff +
+			    mapp[mapi - 1].br_blockcount;
+		}
+	}
+	/*
+	 * Didn't work.
+	 */
+	else {
+#pragma mips_frequency_hint NEVER
+		mapi = 0;
+		mapp = NULL;
+	}
+	/*
+	 * See how many fsb's we got.
+	 */
+	for (i = 0, got = 0; i < mapi; i++)
+		got += mapp[i].br_blockcount;
+	/*
+	 * Didn't get enough fsb's, or the first/last block's are wrong.
+	 */
+	if (got != count || mapp[0].br_startoff != bno ||
+	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
+	    bno + count) {
+#pragma mips_frequency_hint NEVER
+		if (mapp != &map)
+			kmem_free(mapp, sizeof(*mapp) * count);
+		return XFS_ERROR(ENOSPC);
+	}
+	/*
+	 * Done with the temporary mapping table.
+	 */
+	if (mapp != &map)
+		kmem_free(mapp, sizeof(*mapp) * count);
+	*dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno);
+	/*
+	 * Update file's size if this is the data space and it grew.
+	 */
+	if (space == XFS_DIR2_DATA_SPACE) {
+		xfs_fsize_t	size;		/* directory file (data) size */
+
+		size = XFS_FSB_TO_B(mp, bno + count);
+		if (size > dp->i_d.di_size) {
+			dp->i_d.di_size = size;
+			xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+		}
+	}
+	return 0;
+}
+
+/*
+ * See if the directory is a single-block form directory.
+ */
+int					/* error */
+xfs_dir2_isblock(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*dp,		/* incore directory inode */
+	int		*vp)		/* out: 1 is block, 0 is not block */
+{
+	xfs_fileoff_t	last;		/* last file offset */
+	xfs_mount_t	*mp;		/* filesystem mount point */
+	int		rval;		/* return value */
+
+	mp = dp->i_mount;
+	if (rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	}
+	rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize;
+	ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize);
+	*vp = rval;
+	return 0;
+}
+
+/*
+ * See if the directory is a single-leaf form directory.
+ */
+int					/* error */
+xfs_dir2_isleaf(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_inode_t	*dp,		/* incore directory inode */
+	int		*vp)		/* out: 1 is leaf, 0 is not leaf */
+{
+	xfs_fileoff_t	last;		/* last file offset */
+	xfs_mount_t	*mp;		/* filesystem mount point */
+	int		rval;		/* return value */
+
+	mp = dp->i_mount;
+	if (rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return rval;
+	}
+	*vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog);
+	return 0;
+}
+
+/*
+ * Remove the given block from the directory.
+ * This routine is used for data and free blocks, leaf/node are done
+ * by xfs_da_shrink_inode.
+ */
+int
+xfs_dir2_shrink_inode(
+	xfs_da_args_t	*args,		/* operation arguments */
+	xfs_dir2_db_t	db,		/* directory block number */
+	xfs_dabuf_t	*bp)		/* block's buffer */
+{
+	xfs_fileoff_t	bno;		/* directory file offset */
+	xfs_dablk_t	da;		/* directory file offset */
+	int		done;		/* bunmap is finished */
+	xfs_inode_t	*dp;		/* incore directory inode */
+	int		error;		/* error return value */
+	xfs_mount_t	*mp;		/* filesystem mount point */
+	xfs_trans_t	*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args_db("shrink_inode", args, db, bp);
+	dp = args->dp;
+	mp = dp->i_mount;
+	tp = args->trans;
+	da = XFS_DIR2_DB_TO_DA(mp, db);
+	/*
+	 * Unmap the fsblock(s).
+	 */
+	if (error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
+			XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
+			&done)) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * ENOSPC actually can happen if we're in a removename with
+		 * no space reservation, and the resulting block removal
+		 * would cause a bmap btree split or conversion from extents
+		 * to btree.  This can only happen for un-fragmented
+		 * directory blocks, since you need to be punching out
+		 * the middle of an extent.
+		 * In this case we need to leave the block in the file,
+		 * and not binval it.
+		 * So the block has to be in a consistent empty state
+		 * and appropriately logged.
+		 * We don't free up the buffer, the caller can tell it 
+		 * hasn't happened since it got an error back.
+		 */
+		return error;
+	}
+	ASSERT(done);
+	/*
+	 * Invalidate the buffer from the transaction.
+	 */
+	xfs_da_binval(tp, bp);
+	/*
+	 * If it's not a data block, we're done.
+	 */
+	if (db >= XFS_DIR2_LEAF_FIRSTDB(mp)) 
+		return 0;
+	/*
+	 * If the block isn't the last one in the directory, we're done.
+	 */
+	if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0))
+		return 0;
+	bno = da;
+	if (error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * This can't really happen unless there's kernel corruption.
+		 */
+		return error;
+	}
+	if (db == mp->m_dirdatablk)
+		ASSERT(bno == 0);
+	else
+		ASSERT(bno > 0);
+	/*
+	 * Set the size to the new last block.
+	 */
+	dp->i_d.di_size = XFS_FSB_TO_B(mp, bno);
+	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+	return 0;
+}
diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c
new file mode 100644
index 000000000..9cf677314
--- /dev/null
+++ b/libxfs/xfs_dir2_block.c
@@ -0,0 +1,1094 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_block.c
+ * XFS V2 directory implementation, single-block form.
+ * See xfs_dir2_block.h for the format.
+ */
+
+#include <xfs.h>
+
+/*
+ * Add an entry to a block directory.
+ */
+int						/* error */
+xfs_dir2_block_addname(
+	xfs_da_args_t		*args)		/* directory op arguments */
+{
+	xfs_dir2_data_free_t	*bf;		/* bestfree table in block */
+	xfs_dir2_block_t	*block;		/* directory block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
+	xfs_dabuf_t		*bp;		/* buffer for block */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	int			compact;	/* need to compact leaf ents */
+	xfs_dir2_data_entry_t	*dep;		/* block data entry */
+	xfs_inode_t		*dp;		/* directory inode */
+	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
+	int			error;		/* error return value */
+	xfs_dir2_data_unused_t	*enddup;	/* unused at end of data */
+	xfs_dahash_t		hash;		/* hash value of found entry */
+	int			high;		/* high index for binary srch */
+	int			highstale;	/* high stale index */
+	int			lfloghigh;	/* last final leaf to log */
+	int			lfloglow;	/* first final leaf to log */
+	int			len;		/* length of the new entry */
+	int			low;		/* low index for binary srch */
+	int			lowstale;	/* low stale index */
+	int			mid;		/* midpoint for binary srch */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needlog;	/* need to log header */
+	int			needscan;	/* need to rescan freespace */
+	xfs_dir2_data_off_t	*tagp;		/* pointer to tag value */
+	xfs_trans_t		*tp;		/* transaction structure */
+
+	xfs_dir2_trace_args("block_addname", args);
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	/*
+	 * Read the (one and only) directory block into dabuf bp.
+	 */
+	if (error =
+	    xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(bp != NULL);
+	block = bp->data;
+	/*
+	 * Check the magic number, corrupted if wrong.
+	 */
+	if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) {
+#pragma mips_frequency_hint NEVER
+		xfs_da_brelse(tp, bp);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	len = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	/*
+	 * Set up pointers to parts of the block.
+	 */
+	bf = block->hdr.bestfree;
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	/*
+	 * No stale entries?  Need space for entry and new leaf.
+	 */
+	if (INT_GET(btp->stale, ARCH_CONVERT) == 0) {
+		/*
+		 * Tag just before the first leaf entry.
+		 */
+		tagp = (xfs_dir2_data_off_t *)blp - 1;
+		/*
+		 * Data object just before the first leaf entry.
+		 */
+		enddup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+		/*
+		 * If it's not free then can't do this add without cleaning up:
+		 * the space before the first leaf entry needs to be free so it
+		 * can be expanded to hold the pointer to the new entry.
+		 */
+		if (INT_GET(enddup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+			dup = enddup = NULL;
+		/*
+		 * Check out the biggest freespace and see if it's the same one.
+		 */
+		else {
+			dup = (xfs_dir2_data_unused_t *)
+			      ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT));
+			if (dup == enddup) {
+				/*
+				 * It is the biggest freespace, is it too small
+				 * to hold the new leaf too?
+				 */
+				if (INT_GET(dup->length, ARCH_CONVERT) < len + (uint)sizeof(*blp)) {
+#pragma mips_frequency_hint NEVER
+					/*
+					 * Yes, we use the second-largest
+					 * entry instead if it works.
+					 */
+					if (INT_GET(bf[1].length, ARCH_CONVERT) >= len)
+						dup = (xfs_dir2_data_unused_t *)
+						      ((char *)block +
+						       INT_GET(bf[1].offset, ARCH_CONVERT));
+					else
+						dup = NULL;
+				}
+			} else {
+				/*
+				 * Not the same free entry,
+				 * just check its length.
+				 */
+				if (INT_GET(dup->length, ARCH_CONVERT) < len) {
+#pragma mips_frequency_hint NEVER
+					dup = NULL;
+				}
+			}
+		}
+		compact = 0;
+	}
+	/*
+	 * If there are stale entries we'll use one for the leaf.
+	 * Is the biggest entry enough to avoid compaction?
+	 */
+	else if (INT_GET(bf[0].length, ARCH_CONVERT) >= len) {
+		dup = (xfs_dir2_data_unused_t *)
+		      ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT));
+		compact = 0;
+	}
+	/*
+	 * Will need to compact to make this work.
+	 */
+	else {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Tag just before the first leaf entry.
+		 */
+		tagp = (xfs_dir2_data_off_t *)blp - 1;
+		/*
+		 * Data object just before the first leaf entry.
+		 */
+		dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+		/*
+		 * If it's not free then the data will go where the
+		 * leaf data starts now, if it works at all.
+		 */
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			if (INT_GET(dup->length, ARCH_CONVERT) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) *
+			    (uint)sizeof(*blp) < len)
+				dup = NULL;
+		} else if ((INT_GET(btp->stale, ARCH_CONVERT) - 1) * (uint)sizeof(*blp) < len)
+			dup = NULL;
+		else
+			dup = (xfs_dir2_data_unused_t *)blp;
+		compact = 1;
+	}
+	/*
+	 * If this isn't a real add, we're done with the buffer.
+	 */
+	if (args->justcheck)
+		xfs_da_brelse(tp, bp);
+	/*
+	 * If we don't have space for the new entry & leaf ...
+	 */
+	if (!dup) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Not trying to actually do anything, or don't have
+		 * a space reservation: return no-space.
+		 */
+		if (args->justcheck || args->total == 0)
+			return XFS_ERROR(ENOSPC);
+		/*
+		 * Convert to the next larger format.
+		 * Then add the new entry in that format.
+		 */
+		error = xfs_dir2_block_to_leaf(args, bp);
+		xfs_da_buf_done(bp);
+		if (error)
+			return error;
+		return xfs_dir2_leaf_addname(args);
+	}
+	/*
+	 * Just checking, and it would work, so say so.
+	 */
+	if (args->justcheck)
+		return 0;
+	needlog = needscan = 0;
+	/*
+	 * If need to compact the leaf entries, do it now.
+	 * Leave the highest-numbered stale entry stale.
+	 * XXX should be the one closest to mid but mid is not yet computed.
+	 */
+	if (compact) {
+#pragma mips_frequency_hint NEVER
+		int	fromidx;		/* source leaf index */
+		int	toidx;			/* target leaf index */
+
+		for (fromidx = toidx = INT_GET(btp->count, ARCH_CONVERT) - 1,
+			highstale = lfloghigh = -1;
+		     fromidx >= 0;
+		     fromidx--) {
+			if (INT_GET(blp[fromidx].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+				if (highstale == -1)
+					highstale = toidx;
+				else {
+					if (lfloghigh == -1)
+						lfloghigh = toidx;
+					continue;
+				}
+			}
+			if (fromidx < toidx)
+				blp[toidx] = blp[fromidx];
+			toidx--;
+		}
+		lfloglow = toidx + 1 - (INT_GET(btp->stale, ARCH_CONVERT) - 1);
+		lfloghigh -= INT_GET(btp->stale, ARCH_CONVERT) - 1;
+		INT_MOD(btp->count, ARCH_CONVERT, -(INT_GET(btp->stale, ARCH_CONVERT) - 1));
+		xfs_dir2_data_make_free(tp, bp,
+			(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+			(xfs_dir2_data_aoff_t)((INT_GET(btp->stale, ARCH_CONVERT) - 1) * sizeof(*blp)),
+			&needlog, &needscan);
+		blp += INT_GET(btp->stale, ARCH_CONVERT) - 1;
+		INT_SET(btp->stale, ARCH_CONVERT, 1);
+		/*
+		 * If we now need to rebuild the bestfree map, do so.
+		 * This needs to happen before the next call to use_free.
+		 */
+		if (needscan) {
+			xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
+				&needlog, NULL);
+			needscan = 0;
+		}
+	}
+	/*
+	 * Set leaf logging boundaries to impossible state.
+	 * For the no-stale case they're set explicitly.
+	 */
+	else if (INT_GET(btp->stale, ARCH_CONVERT)) {
+		lfloglow = INT_GET(btp->count, ARCH_CONVERT);
+		lfloghigh = -1;
+	}
+	/*
+	 * Find the slot that's first lower than our hash value, -1 if none.
+	 */
+	for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; low <= high; ) {
+		mid = (low + high) >> 1;
+		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
+			break;
+		if (hash < args->hashval)
+			low = mid + 1;
+		else
+			high = mid - 1;
+	}
+	while (mid >= 0 && INT_GET(blp[mid].hashval, ARCH_CONVERT) >= args->hashval) {
+#pragma mips_frequency_hint NEVER
+		mid--;
+	}
+	/*
+	 * No stale entries, will use enddup space to hold new leaf.
+	 */
+	if (INT_GET(btp->stale, ARCH_CONVERT) == 0) {
+		/*
+		 * Mark the space needed for the new leaf entry, now in use.
+		 */
+		xfs_dir2_data_use_free(tp, bp, enddup,
+			(xfs_dir2_data_aoff_t)
+			((char *)enddup - (char *)block + INT_GET(enddup->length, ARCH_CONVERT) -
+			 sizeof(*blp)),
+			(xfs_dir2_data_aoff_t)sizeof(*blp),
+			&needlog, &needscan);
+		/*
+		 * Update the tail (entry count).
+		 */
+		INT_MOD(btp->count, ARCH_CONVERT, +1);
+		/*
+		 * If we now need to rebuild the bestfree map, do so.
+		 * This needs to happen before the next call to use_free.
+		 */
+		if (needscan) {
+			xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
+				&needlog, NULL);
+			needscan = 0;
+		}
+		/*
+		 * Adjust pointer to the first leaf entry, we're about to move
+		 * the table up one to open up space for the new leaf entry.
+		 * Then adjust our index to match.
+		 */
+		blp--;
+		mid++;
+		if (mid)
+			ovbcopy(&blp[1], blp, mid * sizeof(*blp));
+		lfloglow = 0;
+		lfloghigh = mid;
+	}
+	/*
+	 * Use a stale leaf for our new entry.
+	 */
+	else {
+		for (lowstale = mid;
+		     lowstale >= 0 &&
+			INT_GET(blp[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
+		     lowstale--)
+			continue;
+		for (highstale = mid + 1;
+		     highstale < INT_GET(btp->count, ARCH_CONVERT) &&
+			INT_GET(blp[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
+			(lowstale < 0 || mid - lowstale > highstale - mid);
+		     highstale++)
+			continue;
+		/*
+		 * Move entries toward the low-numbered stale entry.
+		 */
+		if (lowstale >= 0 &&
+		    (highstale == INT_GET(btp->count, ARCH_CONVERT) ||
+		     mid - lowstale <= highstale - mid)) {
+			if (mid - lowstale)
+				ovbcopy(&blp[lowstale + 1], &blp[lowstale],
+					(mid - lowstale) * sizeof(*blp));
+			lfloglow = MIN(lowstale, lfloglow);
+			lfloghigh = MAX(mid, lfloghigh);
+		}
+		/*
+		 * Move entries toward the high-numbered stale entry.
+		 */
+		else {
+			ASSERT(highstale < INT_GET(btp->count, ARCH_CONVERT));
+			mid++;
+			if (highstale - mid)
+				ovbcopy(&blp[mid], &blp[mid + 1],
+					(highstale - mid) * sizeof(*blp));
+			lfloglow = MIN(mid, lfloglow);
+			lfloghigh = MAX(highstale, lfloghigh);
+		}
+		INT_MOD(btp->stale, ARCH_CONVERT, -1);
+	}
+	/*
+	 * Point to the new data entry.
+	 */
+	dep = (xfs_dir2_data_entry_t *)dup;
+	/*
+	 * Fill in the leaf entry.
+	 */
+	INT_SET(blp[mid].hashval, ARCH_CONVERT, args->hashval);
+	INT_SET(blp[mid].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
+	/*
+	 * Mark space for the data entry used.
+	 */
+	xfs_dir2_data_use_free(tp, bp, dup,
+		(xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+		(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
+	/*
+	 * Create the new data entry.
+	 */
+	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	dep->namelen = args->namelen;
+	bcopy(args->name, dep->name, args->namelen);
+	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	/*
+	 * Clean up the bestfree array and log the header, tail, and entry.
+	 */
+	if (needscan)
+		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
+			NULL);
+	if (needlog)
+		xfs_dir2_data_log_header(tp, bp);
+	xfs_dir2_block_log_tail(tp, bp);
+	xfs_dir2_data_log_entry(tp, bp, dep);
+	xfs_dir2_data_check(dp, bp);
+	xfs_da_buf_done(bp);
+	return 0;
+}
+
+/*
+ * Log leaf entries from the block.
+ */
+STATIC void
+xfs_dir2_block_log_leaf(
+	xfs_trans_t		*tp,		/* transaction structure */
+	xfs_dabuf_t		*bp,		/* block buffer */
+	int			first,		/* index of first logged leaf */
+	int			last)		/* index of last logged leaf */
+{
+	xfs_dir2_block_t	*block;		/* directory block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+
+	mp = tp->t_mountp;
+	block = bp->data;
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
+		(uint)((char *)&blp[last + 1] - (char *)block - 1));
+}
+
+/*
+ * Log the block tail.
+ */
+STATIC void
+xfs_dir2_block_log_tail(
+	xfs_trans_t		*tp,		/* transaction structure */
+	xfs_dabuf_t		*bp)		/* block buffer */
+{
+	xfs_dir2_block_t	*block;		/* directory block structure */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+
+	mp = tp->t_mountp;
+	block = bp->data;
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
+		(uint)((char *)(btp + 1) - (char *)block - 1));
+}
+
+/*
+ * Look up an entry in the block.  This is the external routine,
+ * xfs_dir2_block_lookup_int does the real work.
+ */
+int						/* error */
+xfs_dir2_block_lookup(
+	xfs_da_args_t		*args)		/* dir lookup arguments */
+{
+	xfs_dir2_block_t	*block;		/* block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
+	xfs_dabuf_t		*bp;		/* block buffer */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_dir2_data_entry_t	*dep;		/* block data entry */
+	xfs_inode_t		*dp;		/* incore inode */
+	int			ent;		/* entry index */
+	int			error;		/* error return value */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+
+	xfs_dir2_trace_args("block_lookup", args);
+	/*
+	 * Get the buffer, look up the entry.
+	 * If not found (ENOENT) then return, have no buffer.
+	 */
+	if (error = xfs_dir2_block_lookup_int(args, &bp, &ent))
+		return error;
+	dp = args->dp;
+	mp = dp->i_mount;
+	block = bp->data;
+	xfs_dir2_data_check(dp, bp);
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	/*
+	 * Get the offset from the leaf entry, to point to the data.
+	 */
+	dep = (xfs_dir2_data_entry_t *)
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	/*
+	 * Fill in inode number, release the block.
+	 */
+	args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+	xfs_da_brelse(args->trans, bp);
+	return XFS_ERROR(EEXIST);
+}
+
+/*
+ * Internal block lookup routine.
+ */
+STATIC int					/* error */
+xfs_dir2_block_lookup_int(
+	xfs_da_args_t		*args,		/* dir lookup arguments */
+	xfs_dabuf_t		**bpp,		/* returned block buffer */
+	int			*entno)		/* returned entry number */
+{
+	xfs_dir2_dataptr_t	addr;		/* data entry address */
+	xfs_dir2_block_t	*block;		/* block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
+	xfs_dabuf_t		*bp;		/* block buffer */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_dir2_data_entry_t	*dep;		/* block data entry */
+	xfs_inode_t		*dp;		/* incore inode */
+	int			error;		/* error return value */
+	xfs_dahash_t		hash;		/* found hash value */
+	int			high;		/* binary search high index */
+	int			low;		/* binary search low index */
+	int			mid;		/* binary search current idx */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	/*
+	 * Read the buffer, return error if we can't get it.
+	 */
+	if (error =
+	    xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(bp != NULL);
+	block = bp->data;
+	xfs_dir2_data_check(dp, bp);
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	/*
+	 * Loop doing a binary search for our hash value.
+	 * Find our entry, ENOENT if it's not there.
+	 */
+	for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; ; ) {
+		ASSERT(low <= high);
+		mid = (low + high) >> 1;
+		if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
+			break;
+		if (hash < args->hashval)
+			low = mid + 1;
+		else
+			high = mid - 1;
+		if (low > high) {
+			ASSERT(args->oknoent);
+			xfs_da_brelse(tp, bp);
+			return XFS_ERROR(ENOENT);
+		}
+	}
+	/*
+	 * Back up to the first one with the right hash value.
+	 */
+	while (mid > 0 && INT_GET(blp[mid - 1].hashval, ARCH_CONVERT) == args->hashval) {
+#pragma mips_frequency_hint NEVER
+		mid--;
+	}
+	/*
+	 * Now loop forward through all the entries with the
+	 * right hash value looking for our name.
+	 */
+	do {
+		if ((addr = INT_GET(blp[mid].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
+			continue;
+		/*
+		 * Get pointer to the entry from the leaf.
+		 */
+		dep = (xfs_dir2_data_entry_t *)
+			((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+		/*
+		 * Compare, if it's right give back buffer & entry number.
+		 */
+		if (dep->namelen == args->namelen &&
+		    dep->name[0] == args->name[0] &&
+		    bcmp(dep->name, args->name, args->namelen) == 0) {
+			*bpp = bp;
+			*entno = mid;
+			return 0;
+		}
+	} while (++mid < INT_GET(btp->count, ARCH_CONVERT) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash);
+	/*
+	 * No match, release the buffer and return ENOENT.
+	 */
+	ASSERT(args->oknoent);
+	xfs_da_brelse(tp, bp);
+	return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Remove an entry from a block format directory.
+ * If that makes the block small enough to fit in shortform, transform it.
+ */
+int						/* error */
+xfs_dir2_block_removename(
+	xfs_da_args_t		*args)		/* directory operation args */
+{
+	xfs_dir2_block_t	*block;		/* block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block leaf pointer */
+	xfs_dabuf_t		*bp;		/* block buffer */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_dir2_data_entry_t	*dep;		/* block data entry */
+	xfs_inode_t		*dp;		/* incore inode */
+	int			ent;		/* block leaf entry index */
+	int			error;		/* error return value */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needlog;	/* need to log block header */
+	int			needscan;	/* need to fixup bestfree */
+	xfs_dir2_sf_hdr_t	sfh;		/* shortform header */
+	int			size;		/* shortform size */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args("block_removename", args);
+	/*
+	 * Look up the entry in the block.  Gets the buffer and entry index.
+	 * It will always be there, the vnodeops level does a lookup first.
+	 */
+	if (error = xfs_dir2_block_lookup_int(args, &bp, &ent)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	block = bp->data;
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	/*
+	 * Point to the data entry using the leaf entry.
+	 */
+	dep = (xfs_dir2_data_entry_t *)
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	/*
+	 * Mark the data entry's space free.
+	 */
+	needlog = needscan = 0;
+	xfs_dir2_data_make_free(tp, bp,
+		(xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
+		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+	/*
+	 * Fix up the block tail.
+	 */
+	INT_MOD(btp->stale, ARCH_CONVERT, +1);
+	xfs_dir2_block_log_tail(tp, bp);
+	/*
+	 * Remove the leaf entry by marking it stale.
+	 */
+	INT_SET(blp[ent].address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	xfs_dir2_block_log_leaf(tp, bp, ent, ent);
+	/*
+	 * Fix up bestfree, log the header if necessary.
+	 */
+	if (needscan)
+		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
+			NULL);
+	if (needlog)
+		xfs_dir2_data_log_header(tp, bp);
+	xfs_dir2_data_check(dp, bp);
+	/*
+	 * See if the size as a shortform is good enough.
+	 */
+	if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
+	    XFS_IFORK_DSIZE(dp)) {
+		xfs_da_buf_done(bp);
+		return 0;
+	}
+	/*
+	 * If it works, do the conversion.
+	 */
+	return xfs_dir2_block_to_sf(args, bp, size, &sfh);
+}
+
+/*
+ * Replace an entry in a V2 block directory.
+ * Change the inode number to the new value.
+ */
+int						/* error */
+xfs_dir2_block_replace(
+	xfs_da_args_t		*args)		/* directory operation args */
+{
+	xfs_dir2_block_t	*block;		/* block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
+	xfs_dabuf_t		*bp;		/* block buffer */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_dir2_data_entry_t	*dep;		/* block data entry */
+	xfs_inode_t		*dp;		/* incore inode */
+	int			ent;		/* leaf entry index */
+	int			error;		/* error return value */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+
+	xfs_dir2_trace_args("block_replace", args);
+	/*
+	 * Lookup the entry in the directory.  Get buffer and entry index.
+	 * This will always succeed since the caller has already done a lookup.
+	 */
+	if (error = xfs_dir2_block_lookup_int(args, &bp, &ent)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	dp = args->dp;
+	mp = dp->i_mount;
+	block = bp->data;
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	/*
+	 * Point to the data entry we need to change.
+	 */
+	dep = (xfs_dir2_data_entry_t *)
+	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+	ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != args->inumber);
+	/*
+	 * Change the inode number to the new value.
+	 */
+	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	xfs_dir2_data_log_entry(args->trans, bp, dep);
+	xfs_dir2_data_check(dp, bp);
+	xfs_da_buf_done(bp);
+	return 0;
+}
+
+/*
+ * Qsort comparison routine for the block leaf entries.
+ */
+static int					/* sort order */
+xfs_dir2_block_sort(
+	const void			*a,	/* first leaf entry */
+	const void			*b)	/* second leaf entry */
+{
+	const xfs_dir2_leaf_entry_t	*la;	/* first leaf entry */
+	const xfs_dir2_leaf_entry_t	*lb;	/* second leaf entry */
+
+	la = a;
+	lb = b;
+	return INT_GET(la->hashval, ARCH_CONVERT) < INT_GET(lb->hashval, ARCH_CONVERT) ? -1 :
+		(INT_GET(la->hashval, ARCH_CONVERT) > INT_GET(lb->hashval, ARCH_CONVERT) ? 1 : 0);
+}
+
+/*
+ * Convert a V2 leaf directory to a V2 block directory if possible.
+ */
+int						/* error */
+xfs_dir2_leaf_to_block(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dabuf_t		*lbp,		/* leaf buffer */
+	xfs_dabuf_t		*dbp)		/* data buffer */
+{
+	xfs_dir2_data_off_t	*bestsp;	/* leaf bests table */
+	xfs_dir2_block_t	*block;		/* block structure */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	xfs_dir2_data_unused_t	*dup;		/* unused data entry */
+	int			error;		/* error return value */
+	int			from;		/* leaf from index */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
+	xfs_mount_t		*mp;		/* file system mount point */
+	int			needlog;	/* need to log data header */
+	int			needscan;	/* need to scan for bestfree */
+	xfs_dir2_sf_hdr_t	sfh;		/* shortform header */
+	int			size;		/* bytes used */
+	xfs_dir2_data_off_t	*tagp;		/* end of entry (tag) */
+	int			to;		/* block/leaf to index */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args_bb("leaf_to_block", args, lbp, dbp);
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	leaf = lbp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	/*
+	 * If there are data blocks other than the first one, take this
+	 * opportunity to remove trailing empty data blocks that may have
+	 * been left behind during no-space-reservation operations.
+	 * These will show up in the leaf bests table.
+	 */
+	while (dp->i_d.di_size > mp->m_dirblksize) {
+		bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+		if (INT_GET(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1], ARCH_CONVERT) ==
+		    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
+#pragma mips_frequency_hint NEVER
+			if (error =
+			    xfs_dir2_leaf_trim_data(args, lbp,
+				    (xfs_dir2_db_t)(INT_GET(ltp->bestcount, ARCH_CONVERT) - 1)))
+				goto out;
+		} else {
+			error = 0;
+			goto out;
+		}
+	}
+	/*
+	 * Read the data block if we don't already have it, give up if it fails.
+	 */
+	if (dbp == NULL &&
+	    (error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp,
+		    XFS_DATA_FORK))) {
+#pragma mips_frequency_hint NEVER
+		goto out;
+	}
+	block = dbp->data;
+	ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+	/*
+	 * Size of the "leaf" area in the block.
+	 */
+	size = (uint)sizeof(block->tail) +
+	       (uint)sizeof(*lep) * (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	/*
+	 * Look at the last data entry.
+	 */
+	tagp = (xfs_dir2_data_off_t *)((char *)block + mp->m_dirblksize) - 1;
+	dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+	/*
+	 * If it's not free or is too short we can't do it.
+	 */
+	if (INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG || INT_GET(dup->length, ARCH_CONVERT) < size) {
+		error = 0;
+		goto out;
+	}
+	/*
+	 * Start converting it to block form.
+	 */
+	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC);
+	needlog = 1;
+	needscan = 0;
+	/*
+	 * Use up the space at the end of the block (blp/btp).
+	 */
+	xfs_dir2_data_use_free(tp, dbp, dup, mp->m_dirblksize - size, size,
+		&needlog, &needscan);
+	/*
+	 * Initialize the block tail.
+	 */
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	INT_SET(btp->count, ARCH_CONVERT, INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	INT_SET(btp->stale, ARCH_CONVERT, 0);
+	xfs_dir2_block_log_tail(tp, dbp);
+	/*
+	 * Initialize the block leaf area.  We compact out stale entries.
+	 */
+	lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			continue;
+		lep[to++] = leaf->ents[from];
+	}
+	ASSERT(to == INT_GET(btp->count, ARCH_CONVERT));
+	xfs_dir2_block_log_leaf(tp, dbp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1);
+	/*
+	 * Scan the bestfree if we need it and log the data block header.
+	 */
+	if (needscan)
+		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
+			NULL);
+	if (needlog)
+		xfs_dir2_data_log_header(tp, dbp);
+	/*
+	 * Pitch the old leaf block.
+	 */
+	error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp);
+	lbp = NULL;
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		goto out;
+	}
+	/*
+	 * Now see if the resulting block can be shrunken to shortform.
+	 */
+	if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
+	    XFS_IFORK_DSIZE(dp)) {
+		error = 0;
+		goto out;
+	}
+	return xfs_dir2_block_to_sf(args, dbp, size, &sfh);
+out:
+	if (lbp)
+		xfs_da_buf_done(lbp);
+	if (dbp)
+		xfs_da_buf_done(dbp);
+	return error;
+}
+
+/*
+ * Convert the shortform directory to block form.
+ */
+int						/* error */
+xfs_dir2_sf_to_block(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_dir2_db_t		blkno;		/* dir-relative block # (0) */
+	xfs_dir2_block_t	*block;		/* block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
+	xfs_dabuf_t		*bp;		/* block buffer */
+	xfs_dir2_block_tail_t	*btp;		/* block tail pointer */
+	char			buf[XFS_DIR2_SF_MAX_SIZE];	/* sf buffer */
+	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			dummy;		/* trash */
+	xfs_dir2_data_unused_t	*dup;		/* unused entry pointer */
+	int			endoffset;	/* end of data objects */
+	int			error;		/* error return value */
+	int			i;		/* index */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needlog;	/* need to log block header */
+	int			needscan;	/* need to scan block freespc */
+	int			newoffset;	/* offset from current entry */
+	int			offset;		/* target block offset */
+	xfs_dir2_sf_entry_t	*sfep;		/* sf entry pointer */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+	xfs_dir2_data_off_t	*tagp;		/* end of data entry */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args("sf_to_block", args);
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Bomb out if the shortform directory is way too short.
+	 */
+	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	/*
+	 * Copy the directory into the stack buffer.
+	 * Then pitch the incore inode data so we can make extents.
+	 */
+	bcopy(sfp, buf, dp->i_df.if_bytes);
+	xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
+	dp->i_d.di_size = 0;
+	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+	/*
+	 * Reset pointer - old sfp is gone.
+	 */
+	sfp = (xfs_dir2_sf_t *)buf;
+	/*
+	 * Add block 0 to the inode.
+	 */
+	error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	/*
+	 * Initialize the data block.
+	 */
+	error = xfs_dir2_data_init(args, blkno, &bp);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	block = bp->data;
+	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC);
+	/*
+	 * Compute size of block "tail" area.
+	 */
+	i = (uint)sizeof(*btp) +
+	    (INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+	/*
+	 * The whole thing is initialized to free by the init routine.
+	 * Say we're using the leaf and tail area.
+	 */
+	dup = (xfs_dir2_data_unused_t *)block->u;
+	needlog = needscan = 0;
+	xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
+		&needscan);
+	ASSERT(needscan == 0);
+	/*
+	 * Fill in the tail.
+	 */
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	INT_SET(btp->count, ARCH_CONVERT, INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2);	/* ., .. */
+	INT_ZERO(btp->stale, ARCH_CONVERT);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	endoffset = (uint)((char *)blp - (char *)block);
+	/*
+	 * Remove the freespace, we'll manage it.
+	 */
+	xfs_dir2_data_use_free(tp, bp, dup,
+		(xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+		INT_GET(dup->length, ARCH_CONVERT), &needlog, &needscan);
+	/*
+	 * Create entry for .
+	 */
+	dep = (xfs_dir2_data_entry_t *)
+	      ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
+	INT_SET(dep->inumber, ARCH_CONVERT, dp->i_ino);
+	dep->namelen = 1;
+	dep->name[0] = '.';
+	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	xfs_dir2_data_log_entry(tp, bp, dep);
+	INT_SET(blp[0].hashval, ARCH_CONVERT, xfs_dir_hash_dot);
+	INT_SET(blp[0].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	/*
+	 * Create entry for ..
+	 */
+	dep = (xfs_dir2_data_entry_t *)
+		((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
+	INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT));
+	dep->namelen = 2;
+	dep->name[0] = dep->name[1] = '.';
+	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+	xfs_dir2_data_log_entry(tp, bp, dep);
+	INT_SET(blp[1].hashval, ARCH_CONVERT, xfs_dir_hash_dotdot);
+	INT_SET(blp[1].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+	offset = XFS_DIR2_DATA_FIRST_OFFSET;
+	/*
+	 * Loop over existing entries, stuff them in.
+	 */
+	if ((i = 0) == INT_GET(sfp->hdr.count, ARCH_CONVERT))
+		sfep = NULL;
+	else
+		sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	/*
+	 * Need to preserve the existing offset values in the sf directory.
+	 * Insert holes (unused entries) where necessary.
+	 */
+	while (offset < endoffset) {
+		/*
+		 * sfep is null when we reach the end of the list.
+		 */
+		if (sfep == NULL)
+			newoffset = endoffset;
+		else
+			newoffset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT);
+		/*
+		 * There should be a hole here, make one.
+		 */
+		if (offset < newoffset) {
+			dup = (xfs_dir2_data_unused_t *)
+			      ((char *)block + offset);
+			INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+			INT_SET(dup->length, ARCH_CONVERT, newoffset - offset);
+			INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT,
+				(xfs_dir2_data_off_t)
+				((char *)dup - (char *)block));
+			xfs_dir2_data_log_unused(tp, bp, dup);
+			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
+				dup, &dummy);
+			offset += INT_GET(dup->length, ARCH_CONVERT);
+			continue;
+		}
+		/*
+		 * Copy a real entry.
+		 */
+		dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
+		INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
+				XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT));
+		dep->namelen = sfep->namelen;
+		bcopy(sfep->name, dep->name, dep->namelen);
+		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+		INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+		xfs_dir2_data_log_entry(tp, bp, dep);
+		INT_SET(blp[2 + i].hashval, ARCH_CONVERT, xfs_da_hashname((char *)sfep->name, sfep->namelen));
+		INT_SET(blp[2 + i].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp,
+						 (char *)dep - (char *)block));
+		offset = (int)((char *)(tagp + 1) - (char *)block);
+		if (++i == INT_GET(sfp->hdr.count, ARCH_CONVERT))
+			sfep = NULL;
+		else
+			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+	}
+	/*
+	 * Sort the leaf entries by hash value.
+	 */
+	qsort(blp, INT_GET(btp->count, ARCH_CONVERT), sizeof(*blp), xfs_dir2_block_sort);
+	/* 
+	 * Log the leaf entry area and tail.
+	 * Already logged the header in data_init, ignore needlog.
+	 */
+	ASSERT(needscan == 0);
+	xfs_dir2_block_log_leaf(tp, bp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1);
+	xfs_dir2_block_log_tail(tp, bp);
+	xfs_dir2_data_check(dp, bp);
+	xfs_da_buf_done(bp);
+	return 0;
+}
diff --git a/libxfs/xfs_dir2_data.c b/libxfs/xfs_dir2_data.c
new file mode 100644
index 000000000..d921a21cc
--- /dev/null
+++ b/libxfs/xfs_dir2_data.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_data.c
+ * Core data block handling routines for XFS V2 directories.
+ * See xfs_dir2_data.h for data structures.
+ */
+#include <xfs.h>
+
+#ifdef DEBUG
+/*
+ * Check the consistency of the data block.
+ * The input can also be a block-format directory.
+ * Pop an assert if we find anything bad.
+ */
+void
+xfs_dir2_data_check(
+	xfs_inode_t		*dp,		/* incore inode pointer */
+	xfs_dabuf_t		*bp)		/* data block's buffer */
+{
+	xfs_dir2_dataptr_t	addr;		/* addr for leaf lookup */
+	xfs_dir2_data_free_t	*bf;		/* bestfree table */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	int			count;		/* count of entries found */
+	xfs_dir2_data_t		*d;		/* data block pointer */
+	xfs_dir2_data_entry_t	*dep;		/* data entry */
+	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
+	xfs_dir2_data_unused_t	*dup;		/* unused entry */
+	char			*endp;		/* end of useful data */
+	int			freeseen;	/* mask of bestfrees seen */
+	xfs_dahash_t		hash;		/* hash of current name */
+	int			i;		/* leaf index */
+	int			lastfree;	/* last entry was unused */
+	xfs_dir2_leaf_entry_t	*lep;		/* block leaf entries */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	char			*p;		/* current data position */
+	int			stale;		/* count of stale leaves */
+
+	mp = dp->i_mount;
+	d = bp->data;
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	bf = d->hdr.bestfree;
+	p = (char *)d->u;
+	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+		lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+		endp = (char *)lep;
+	} else
+		endp = (char *)d + mp->m_dirblksize;
+	count = lastfree = freeseen = 0;
+	/*
+	 * Account for zero bestfree entries.
+	 */
+	if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) {
+		ASSERT(INT_GET(bf[0].offset, ARCH_CONVERT) == 0);
+		freeseen |= 1 << 0;
+	}
+	if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) {
+		ASSERT(INT_GET(bf[1].offset, ARCH_CONVERT) == 0);
+		freeseen |= 1 << 1;
+	}
+	if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) {
+		ASSERT(INT_GET(bf[2].offset, ARCH_CONVERT) == 0);
+		freeseen |= 1 << 2;
+	}
+	ASSERT(INT_GET(bf[0].length, ARCH_CONVERT) >= INT_GET(bf[1].length, ARCH_CONVERT));
+	ASSERT(INT_GET(bf[1].length, ARCH_CONVERT) >= INT_GET(bf[2].length, ARCH_CONVERT));
+	/*
+	 * Loop over the data/unused entries.
+	 */
+	while (p < endp) {
+		dup = (xfs_dir2_data_unused_t *)p;
+		/*
+		 * If it's unused, look for the space in the bestfree table.
+		 * If we find it, account for that, else make sure it 
+		 * doesn't need to be there.
+		 */
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			ASSERT(lastfree == 0);
+			ASSERT(INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) ==
+			       (char *)dup - (char *)d);
+			dfp = xfs_dir2_data_freefind(d, dup);
+			if (dfp) {
+				i = (int)(dfp - bf);
+				ASSERT((freeseen & (1 << i)) == 0);
+				freeseen |= 1 << i;
+			} else
+				ASSERT(INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(bf[2].length, ARCH_CONVERT));
+			p += INT_GET(dup->length, ARCH_CONVERT);
+			lastfree = 1;
+			continue;
+		}
+		/*
+		 * It's a real entry.  Validate the fields.
+		 * If this is a block directory then make sure it's 
+		 * in the leaf section of the block.
+		 * The linear search is crude but this is DEBUG code.
+		 */
+		dep = (xfs_dir2_data_entry_t *)p;
+		ASSERT(dep->namelen != 0);
+		ASSERT(xfs_dir_ino_validate(mp, INT_GET(dep->inumber, ARCH_CONVERT)) == 0);
+		ASSERT(INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) ==
+		       (char *)dep - (char *)d);
+		count++;
+		lastfree = 0;
+		if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+			addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+				(xfs_dir2_data_aoff_t)
+				((char *)dep - (char *)d));
+			hash = xfs_da_hashname((char *)dep->name, dep->namelen);
+			for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+				if (INT_GET(lep[i].address, ARCH_CONVERT) == addr &&
+				    INT_GET(lep[i].hashval, ARCH_CONVERT) == hash)
+					break;
+			}
+			ASSERT(i < INT_GET(btp->count, ARCH_CONVERT));
+		}
+		p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+	}
+	/*
+	 * Need to have seen all the entries and all the bestfree slots.
+	 */
+	ASSERT(freeseen == 7);
+	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		for (i = stale = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+			if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+				stale++;
+			if (i > 0)
+				ASSERT(INT_GET(lep[i].hashval, ARCH_CONVERT) >= INT_GET(lep[i - 1].hashval, ARCH_CONVERT));
+		}
+		ASSERT(count == INT_GET(btp->count, ARCH_CONVERT) - INT_GET(btp->stale, ARCH_CONVERT));
+		ASSERT(stale == INT_GET(btp->stale, ARCH_CONVERT));
+	}
+}
+#endif
+
+/*
+ * Given a data block and an unused entry from that block,
+ * return the bestfree entry if any that corresponds to it.
+ */
+xfs_dir2_data_free_t *
+xfs_dir2_data_freefind(
+	xfs_dir2_data_t		*d,		/* data block */
+	xfs_dir2_data_unused_t	*dup)		/* data unused entry */
+{
+	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
+	xfs_dir2_data_aoff_t	off;		/* offset value needed */
+#if defined(DEBUG) && defined(__KERNEL__)
+	int			matched;	/* matched the value */
+	int			seenzero;	/* saw a 0 bestfree entry */
+#endif
+
+	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d);
+#if defined(DEBUG) && defined(__KERNEL__)
+	/*
+	 * Validate some consistency in the bestfree table.
+	 * Check order, non-overlapping entries, and if we find the
+	 * one we're looking for it has to be exact.
+	 */
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
+	     dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+	     dfp++) {
+		if (INT_GET(dfp->offset, ARCH_CONVERT) == 0) {
+			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == 0);
+			seenzero = 1;
+			continue;
+		}
+		ASSERT(seenzero == 0);
+		if (INT_GET(dfp->offset, ARCH_CONVERT) == off) {
+			matched = 1;
+			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(dup->length, ARCH_CONVERT));
+		} else if (off < INT_GET(dfp->offset, ARCH_CONVERT))
+			ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(dfp->offset, ARCH_CONVERT));
+		else
+			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) + INT_GET(dfp->length, ARCH_CONVERT) <= off);
+		ASSERT(matched || INT_GET(dfp->length, ARCH_CONVERT) >= INT_GET(dup->length, ARCH_CONVERT));
+		if (dfp > &d->hdr.bestfree[0])
+			ASSERT(INT_GET(dfp[-1].length, ARCH_CONVERT) >= INT_GET(dfp[0].length, ARCH_CONVERT));
+	}
+#endif
+	/*
+	 * If this is smaller than the smallest bestfree entry,
+	 * it can't be there since they're sorted.
+	 */
+	if (INT_GET(dup->length, ARCH_CONVERT) < INT_GET(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length, ARCH_CONVERT))
+		return NULL;
+	/*
+	 * Look at the three bestfree entries for our guy.
+	 */
+	for (dfp = &d->hdr.bestfree[0];
+	     dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+	     dfp++) {
+		if (INT_GET(dfp->offset, ARCH_CONVERT) == 0)
+			return NULL;
+		if (INT_GET(dfp->offset, ARCH_CONVERT) == off)
+			return dfp;
+	}
+	/*
+	 * Didn't find it.  This only happens if there are duplicate lengths.
+	 */
+	return NULL;
+}
+
+/*
+ * Insert an unused-space entry into the bestfree table.
+ */
+xfs_dir2_data_free_t *				/* entry inserted */
+xfs_dir2_data_freeinsert(
+	xfs_dir2_data_t		*d,		/* data block pointer */
+	xfs_dir2_data_unused_t	*dup,		/* unused space */
+	int			*loghead)	/* log the data header (out) */
+{
+	xfs_dir2_data_free_t	*dfp;		/* bestfree table pointer */
+	xfs_dir2_data_free_t	new;		/* new bestfree entry */
+
+#ifdef __KERNEL__
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+#endif
+	dfp = d->hdr.bestfree;
+	INT_COPY(new.length, dup->length, ARCH_CONVERT); 
+	INT_SET(new.offset, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dup - (char *)d));
+	/*
+	 * Insert at position 0, 1, or 2; or not at all.
+	 */
+	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[0].length, ARCH_CONVERT)) {
+		dfp[2] = dfp[1];
+		dfp[1] = dfp[0];
+		dfp[0] = new;
+		*loghead = 1;
+		return &dfp[0];
+	}
+	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[1].length, ARCH_CONVERT)) {
+		dfp[2] = dfp[1];
+		dfp[1] = new;
+		*loghead = 1;
+		return &dfp[1];
+	}
+	if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[2].length, ARCH_CONVERT)) {
+		dfp[2] = new;
+		*loghead = 1;
+		return &dfp[2];
+	}
+	return NULL;
+}
+
+/*
+ * Remove a bestfree entry from the table.
+ */
+void
+xfs_dir2_data_freeremove(
+	xfs_dir2_data_t		*d,		/* data block pointer */
+	xfs_dir2_data_free_t	*dfp,		/* bestfree entry pointer */
+	int			*loghead)	/* out: log data header */
+{
+#ifdef __KERNEL__
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+#endif
+	/*
+	 * It's the first entry, slide the next 2 up.
+	 */
+	if (dfp == &d->hdr.bestfree[0]) {
+		d->hdr.bestfree[0] = d->hdr.bestfree[1];
+		d->hdr.bestfree[1] = d->hdr.bestfree[2];
+	}
+	/*
+	 * It's the second entry, slide the 3rd entry up.
+	 */
+	else if (dfp == &d->hdr.bestfree[1])
+		d->hdr.bestfree[1] = d->hdr.bestfree[2];
+	/*
+	 * Must be the last entry.
+	 */
+	else
+		ASSERT(dfp == &d->hdr.bestfree[2]);
+	/*
+	 * Clear the 3rd entry, must be zero now.
+	 */
+        INT_ZERO(d->hdr.bestfree[2].length, ARCH_CONVERT);
+	INT_ZERO(d->hdr.bestfree[2].offset, ARCH_CONVERT);
+	*loghead = 1;
+}
+
+/*
+ * Given a data block, reconstruct its bestfree map.
+ */
+void
+xfs_dir2_data_freescan(
+	xfs_mount_t		*mp,		/* filesystem mount point */
+	xfs_dir2_data_t		*d,		/* data block pointer */
+	int			*loghead,	/* out: log data header */
+	char			*aendp)		/* in: caller's endp */
+{
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_dir2_data_entry_t	*dep;		/* active data entry */
+	xfs_dir2_data_unused_t	*dup;		/* unused data entry */
+	char			*endp;		/* end of block's data */
+	char			*p;		/* current entry pointer */
+
+#ifdef __KERNEL__
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+#endif
+	/*
+	 * Start by clearing the table.
+	 */
+	bzero(d->hdr.bestfree, sizeof(d->hdr.bestfree));
+	*loghead = 1;
+	/*
+	 * Set up pointers.
+	 */
+	p = (char *)d->u;
+	if (aendp)
+		endp = aendp;
+	else if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+		endp = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	} else
+		endp = (char *)d + mp->m_dirblksize;
+	/*
+	 * Loop over the block's entries.
+	 */
+	while (p < endp) {
+		dup = (xfs_dir2_data_unused_t *)p;
+		/*
+		 * If it's a free entry, insert it.
+		 */
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			ASSERT((char *)dup - (char *)d ==
+			       INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT));
+			xfs_dir2_data_freeinsert(d, dup, loghead);
+			p += INT_GET(dup->length, ARCH_CONVERT);
+		}
+		/*
+		 * For active entries, check their tags and skip them.
+		 */
+		else {
+			dep = (xfs_dir2_data_entry_t *)p;
+			ASSERT((char *)dep - (char *)d ==
+			       INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT));
+			p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		}
+	}
+}
+
+/*
+ * Initialize a data block at the given block number in the directory.
+ * Give back the buffer for the created block.
+ */
+int						/* error */
+xfs_dir2_data_init(
+	xfs_da_args_t		*args,		/* directory operation args */
+	xfs_dir2_db_t		blkno,		/* logical dir block number */
+	xfs_dabuf_t		**bpp)		/* output block buffer */
+{
+	xfs_dabuf_t		*bp;		/* block buffer */
+	xfs_dir2_data_t		*d;		/* pointer to block */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	xfs_dir2_data_unused_t	*dup;		/* unused entry pointer */
+	int			error;		/* error return value */
+	int			i;		/* bestfree index */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_trans_t		*tp;		/* transaction pointer */
+        int                     t;              /* temp */
+
+	dp = args->dp;
+	mp = dp->i_mount;
+	tp = args->trans;
+	/*
+	 * Get the buffer set up for the block.
+	 */
+	error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp,
+		XFS_DATA_FORK);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(bp != NULL);
+	/*
+	 * Initialize the header.
+	 */
+	d = bp->data;
+	INT_SET(d->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
+	INT_SET(d->hdr.bestfree[0].offset, ARCH_CONVERT, (xfs_dir2_data_off_t)sizeof(d->hdr));
+	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
+                INT_ZERO(d->hdr.bestfree[i].length, ARCH_CONVERT);
+		INT_ZERO(d->hdr.bestfree[i].offset, ARCH_CONVERT);
+        }
+	/*
+	 * Set up an unused entry for the block's body.
+	 */
+	dup = &d->u[0].unused;
+	INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+        
+        t=mp->m_dirblksize - (uint)sizeof(d->hdr);
+        INT_SET(d->hdr.bestfree[0].length, ARCH_CONVERT, t);
+	INT_SET(dup->length, ARCH_CONVERT, t);
+	INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT,
+		(xfs_dir2_data_off_t)((char *)dup - (char *)d));
+	/*
+	 * Log it and return it.
+	 */
+	xfs_dir2_data_log_header(tp, bp);
+	xfs_dir2_data_log_unused(tp, bp, dup);
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Log an active data entry from the block.
+ */
+void
+xfs_dir2_data_log_entry(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp,		/* block buffer */
+	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
+{
+	xfs_dir2_data_t		*d;		/* data block pointer */
+
+	d = bp->data;
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
+		(uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) -
+		       (char *)d - 1));
+}
+
+/*
+ * Log a data block header.
+ */
+void
+xfs_dir2_data_log_header(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp)		/* block buffer */
+{
+	xfs_dir2_data_t		*d;		/* data block pointer */
+
+	d = bp->data;
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d), 
+		(uint)(sizeof(d->hdr) - 1));
+}
+
+/*
+ * Log a data unused entry.
+ */
+void
+xfs_dir2_data_log_unused(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp,		/* block buffer */
+	xfs_dir2_data_unused_t	*dup)		/* data unused pointer */
+{
+	xfs_dir2_data_t		*d;		/* data block pointer */
+
+	d = bp->data;
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	/*
+	 * Log the first part of the unused entry.
+	 */
+	xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d),
+		(uint)((char *)&dup->length + sizeof(dup->length) -
+		       1 - (char *)d));
+	/*
+	 * Log the end (tag) of the unused entry.
+	 */
+	xfs_da_log_buf(tp, bp,
+		(uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) - (char *)d),
+		(uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) - (char *)d +
+		       sizeof(xfs_dir2_data_off_t) - 1));
+}
+
+/*
+ * Make a byte range in the data block unused.
+ * Its current contents are unimportant.
+ */
+void
+xfs_dir2_data_make_free(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp,		/* block buffer */
+	xfs_dir2_data_aoff_t	offset,		/* starting byte offset */
+	xfs_dir2_data_aoff_t	len,		/* length in bytes */
+	int			*needlogp,	/* out: log header */
+	int			*needscanp)	/* out: regen bestfree */
+{
+	xfs_dir2_data_t		*d;		/* data block pointer */
+	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
+	char			*endptr;	/* end of data area */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needscan;	/* need to regen bestfree */
+	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
+	xfs_dir2_data_unused_t	*postdup;	/* unused entry after us */
+	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */
+
+	mp = tp->t_mountp;
+	d = bp->data;
+	/*
+	 * Figure out where the end of the data area is.
+	 */
+	if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC)
+		endptr = (char *)d + mp->m_dirblksize;
+	else {
+		xfs_dir2_block_tail_t	*btp;	/* block tail */
+
+		ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+		endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	}
+	/*
+	 * If this isn't the start of the block, then back up to 
+	 * the previous entry and see if it's free.
+	 */
+	if (offset > sizeof(d->hdr)) {
+		xfs_dir2_data_off_t	*tagp;	/* tag just before us */
+
+		tagp = (xfs_dir2_data_off_t *)((char *)d + offset) - 1;
+		prevdup = (xfs_dir2_data_unused_t *)((char *)d + INT_GET(*tagp, ARCH_CONVERT));
+		if (INT_GET(prevdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+			prevdup = NULL;
+	} else
+		prevdup = NULL;
+	/*
+	 * If this isn't the end of the block, see if the entry after
+	 * us is free.
+	 */
+	if ((char *)d + offset + len < endptr) {
+		postdup =
+			(xfs_dir2_data_unused_t *)((char *)d + offset + len);
+		if (INT_GET(postdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+			postdup = NULL;
+	} else
+		postdup = NULL;
+	ASSERT(*needscanp == 0);
+	needscan = 0;
+	/*
+	 * Previous and following entries are both free, 
+	 * merge everything into a single free entry.
+	 */
+	if (prevdup && postdup) {
+		xfs_dir2_data_free_t	*dfp2;	/* another bestfree pointer */
+
+		/*
+		 * See if prevdup and/or postdup are in bestfree table.
+		 */
+		dfp = xfs_dir2_data_freefind(d, prevdup);
+		dfp2 = xfs_dir2_data_freefind(d, postdup);
+		/*
+		 * We need a rescan unless there are exactly 2 free entries
+		 * namely our two.  Then we know what's happening, otherwise
+		 * since the third bestfree is there, there might be more
+		 * entries.
+		 */
+		needscan = INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT) != 0;
+		/*
+		 * Fix up the new big freespace.
+		 */
+		INT_MOD(prevdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT));
+		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(prevdup, ARCH_CONVERT), ARCH_CONVERT,
+			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
+		xfs_dir2_data_log_unused(tp, bp, prevdup);
+		if (!needscan) {
+			/*
+			 * Has to be the case that entries 0 and 1 are 
+			 * dfp and dfp2 (don't know which is which), and
+			 * entry 2 is empty.
+			 * Remove entry 1 first then entry 0.
+			 */
+			ASSERT(dfp && dfp2);
+			if (dfp == &d->hdr.bestfree[1]) {
+				dfp = &d->hdr.bestfree[0];
+				ASSERT(dfp2 == dfp);
+				dfp2 = &d->hdr.bestfree[1];
+			}
+			xfs_dir2_data_freeremove(d, dfp2, needlogp);
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			/*
+			 * Now insert the new entry.
+			 */
+			dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+			ASSERT(dfp == &d->hdr.bestfree[0]);
+			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(prevdup->length, ARCH_CONVERT));
+			ASSERT(INT_GET(dfp[1].length, ARCH_CONVERT) == 0);
+			ASSERT(INT_GET(dfp[2].length, ARCH_CONVERT) == 0);
+		}
+	}
+	/*
+	 * The entry before us is free, merge with it.
+	 */
+	else if (prevdup) {
+		dfp = xfs_dir2_data_freefind(d, prevdup);
+		INT_MOD(prevdup->length, ARCH_CONVERT, len);
+		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(prevdup, ARCH_CONVERT), ARCH_CONVERT,
+			(xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
+		xfs_dir2_data_log_unused(tp, bp, prevdup);
+		/*
+		 * If the previous entry was in the table, the new entry
+		 * is longer, so it will be in the table too.  Remove
+		 * the old one and add the new one.
+		 */
+		if (dfp) {
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			(void)xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+		}
+		/*
+		 * Otherwise we need a scan if the new entry is big enough.
+		 */
+		else
+			needscan = INT_GET(prevdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT);
+	}
+	/*
+	 * The following entry is free, merge with it.
+	 */
+	else if (postdup) {
+		dfp = xfs_dir2_data_freefind(d, postdup);
+		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+		INT_SET(newdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT));
+		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		xfs_dir2_data_log_unused(tp, bp, newdup);
+		/*
+		 * If the following entry was in the table, the new entry
+		 * is longer, so it will be in the table too.  Remove
+		 * the old one and add the new one.
+		 */
+		if (dfp) {
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			(void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+		}
+		/*
+		 * Otherwise we need a scan if the new entry is big enough.
+		 */
+		else
+			needscan = INT_GET(newdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT);
+	}
+	/*
+	 * Neither neighbor is free.  Make a new entry.
+	 */
+	else {
+		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+		INT_SET(newdup->length, ARCH_CONVERT, len);
+		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		xfs_dir2_data_log_unused(tp, bp, newdup);
+		(void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+	}
+	*needscanp = needscan;
+}
+
+/*
+ * Take a byte range out of an existing unused space and make it un-free.
+ */
+void
+xfs_dir2_data_use_free(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp,		/* data block buffer */
+	xfs_dir2_data_unused_t	*dup,		/* unused entry */
+	xfs_dir2_data_aoff_t	offset,		/* starting offset to use */
+	xfs_dir2_data_aoff_t	len,		/* length to use */
+	int			*needlogp,	/* out: need to log header */
+	int			*needscanp)	/* out: need regen bestfree */
+{
+	xfs_dir2_data_t		*d;		/* data block */
+	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
+	int			matchback;	/* matches end of freespace */
+	int			matchfront;	/* matches start of freespace */
+	int			needscan;	/* need to regen bestfree */
+	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
+	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
+	int			oldlen;		/* old unused entry's length */
+
+	d = bp->data;
+	ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+	       INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+	ASSERT(INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG);
+	ASSERT(offset >= (char *)dup - (char *)d);
+	ASSERT(offset + len <= (char *)dup + INT_GET(dup->length, ARCH_CONVERT) - (char *)d);
+	ASSERT((char *)dup - (char *)d == INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT));
+	/*
+	 * Look up the entry in the bestfree table.
+	 */
+	dfp = xfs_dir2_data_freefind(d, dup);
+	oldlen = INT_GET(dup->length, ARCH_CONVERT);
+	ASSERT(dfp || oldlen <= INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT));
+	/*
+	 * Check for alignment with front and back of the entry.
+	 */
+	matchfront = (char *)dup - (char *)d == offset;
+	matchback = (char *)dup + oldlen - (char *)d == offset + len;
+	ASSERT(*needscanp == 0);
+	needscan = 0;
+	/*
+	 * If we matched it exactly we just need to get rid of it from
+	 * the bestfree table.
+	 */
+	if (matchfront && matchback) {
+		if (dfp) {
+			needscan = INT_GET(d->hdr.bestfree[2].offset, ARCH_CONVERT) != 0;
+			if (!needscan)
+				xfs_dir2_data_freeremove(d, dfp, needlogp);
+		}
+	}
+	/*
+	 * We match the first part of the entry.
+	 * Make a new entry with the remaining freespace.
+	 */
+	else if (matchfront) {
+		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+		INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+		INT_SET(newdup->length, ARCH_CONVERT, oldlen - len);
+		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		xfs_dir2_data_log_unused(tp, bp, newdup);
+		/*
+		 * If it was in the table, remove it and add the new one.
+		 */
+		if (dfp) {
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+			ASSERT(dfp != NULL);
+			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d);
+			/*
+			 * If we got inserted at the last slot,
+			 * that means we don't know if there was a better
+			 * choice for the last slot, or not.  Rescan.
+			 */
+			needscan = dfp == &d->hdr.bestfree[2];
+		}
+	}
+	/*
+	 * We match the last part of the entry.
+	 * Trim the allocated space off the tail of the entry.
+	 */
+	else if (matchback) {
+		newdup = dup;
+		INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t)
+			(((char *)d + offset) - (char *)newdup));
+		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		xfs_dir2_data_log_unused(tp, bp, newdup);
+		/*
+		 * If it was in the table, remove it and add the new one.
+		 */
+		if (dfp) {
+			xfs_dir2_data_freeremove(d, dfp, needlogp);
+			dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+			ASSERT(dfp != NULL);
+			ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT));
+			ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d);
+			/*
+			 * If we got inserted at the last slot,
+			 * that means we don't know if there was a better
+			 * choice for the last slot, or not.  Rescan.
+			 */
+			needscan = dfp == &d->hdr.bestfree[2];
+		}
+	}
+	/*
+	 * Poking out the middle of an entry.
+	 * Make two new entries.
+	 */
+	else {
+		newdup = dup;
+		INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t)
+			(((char *)d + offset) - (char *)newdup));
+		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+			(xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+		xfs_dir2_data_log_unused(tp, bp, newdup);
+		newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+		INT_SET(newdup2->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+		INT_SET(newdup2->length, ARCH_CONVERT, oldlen - len - INT_GET(newdup->length, ARCH_CONVERT));
+		INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup2, ARCH_CONVERT), ARCH_CONVERT,
+			(xfs_dir2_data_off_t)((char *)newdup2 - (char *)d));
+		xfs_dir2_data_log_unused(tp, bp, newdup2);
+		/*
+		 * If the old entry was in the table, we need to scan
+		 * if the 3rd entry was valid, since these entries
+		 * are smaller than the old one.
+		 * If we don't need to scan that means there were 1 or 2
+		 * entries in the table, and removing the old and adding
+		 * the 2 new will work.
+		 */
+		if (dfp) {
+			needscan = INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT) != 0;
+			if (!needscan) {
+				xfs_dir2_data_freeremove(d, dfp, needlogp);
+				(void)xfs_dir2_data_freeinsert(d, newdup,
+					needlogp);
+				(void)xfs_dir2_data_freeinsert(d, newdup2,
+					needlogp);
+			}
+		}
+	}
+	*needscanp = needscan;
+}
diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c
new file mode 100644
index 000000000..89761db5e
--- /dev/null
+++ b/libxfs/xfs_dir2_leaf.c
@@ -0,0 +1,1496 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_leaf.c
+ * XFS directory version 2 implementation - single leaf form
+ * see xfs_dir2_leaf.h for data structures.
+ * These directories have multiple XFS_DIR2_DATA blocks and one
+ * XFS_DIR2_LEAF1 block containing the hash table and freespace map.
+ */
+
+#include <xfs.h>
+
+
+/*
+ * Convert a block form directory to a leaf form directory.
+ */
+int						/* error */
+xfs_dir2_block_to_leaf(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dabuf_t		*dbp)		/* input block's buffer */
+{
+	xfs_dir2_data_off_t	*bestsp;	/* leaf's bestsp entries */
+	xfs_dablk_t		blkno;		/* leaf block's bno */
+	xfs_dir2_block_t	*block;		/* block structure */
+	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
+	xfs_dir2_block_tail_t	*btp;		/* block's tail */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return code */
+	xfs_dabuf_t		*lbp;		/* leaf block's buffer */
+	xfs_dir2_db_t		ldb;		/* leaf block's bno */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf's tail */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needlog;	/* need to log block header */
+	int			needscan;	/* need to rescan bestfree */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args_b("block_to_leaf", args, dbp);
+	dp = args->dp;
+	mp = dp->i_mount;
+	tp = args->trans;
+	/*
+	 * Add the leaf block to the inode.
+	 * This interface will only put blocks in the leaf/node range.
+	 * Since that's empty now, we'll get the root (block 0 in range).
+	 */
+	if (error = xfs_da_grow_inode(args, &blkno)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ldb = XFS_DIR2_DA_TO_DB(mp, blkno);
+	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
+	/*
+	 * Initialize the leaf block, get a buffer for it.
+	 */
+	if (error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(lbp != NULL);
+	leaf = lbp->data;
+	block = dbp->data;
+	xfs_dir2_data_check(dp, dbp);
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	/*
+	 * Set the counts in the leaf header.
+	 */
+	INT_COPY(leaf->hdr.count, btp->count, ARCH_CONVERT); /* INT_: type change */
+	INT_COPY(leaf->hdr.stale, btp->stale, ARCH_CONVERT); /* INT_: type change */
+	/*
+	 * Could compact these but I think we always do the conversion
+	 * after squeezing out stale entries.
+	 */
+	bcopy(blp, leaf->ents, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
+	xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
+	needscan = 0;
+	needlog = 1;
+	/*
+	 * Make the space formerly occupied by the leaf entries and block
+	 * tail be free.
+	 */
+	xfs_dir2_data_make_free(tp, dbp,
+		(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+		(xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
+				       (char *)blp),
+		&needlog, &needscan);
+	/*
+	 * Fix up the block header, make it a data block.
+	 */
+	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
+	if (needscan)
+		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
+			NULL);
+	/*
+	 * Set up leaf tail and bests table.
+	 */
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	INT_SET(ltp->bestcount, ARCH_CONVERT, 1);
+	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+	INT_COPY(bestsp[0], block->hdr.bestfree[0].length, ARCH_CONVERT);
+	/*
+	 * Log the data header and leaf bests table.
+	 */
+	if (needlog)
+		xfs_dir2_data_log_header(tp, dbp);
+	xfs_dir2_leaf_check(dp, lbp);
+	xfs_dir2_data_check(dp, dbp);
+	xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
+	xfs_da_buf_done(lbp);
+	return 0;
+}
+
+/*
+ * Add an entry to a leaf form directory.
+ */
+int						/* error */
+xfs_dir2_leaf_addname(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_dir2_data_off_t	*bestsp;	/* freespace table in leaf */
+	int			compact;	/* need to compact leaves */
+	xfs_dir2_data_t		*data;		/* data block structure */
+	xfs_dabuf_t		*dbp;		/* data block buffer */
+	xfs_dir2_data_entry_t	*dep;		/* data block entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	xfs_dir2_data_unused_t	*dup;		/* data unused entry */
+	int			error;		/* error return value */
+	int			grown;		/* allocated new data block */
+	int			highstale;	/* index of next stale leaf */
+	int			i;		/* temporary, index */
+	int			index;		/* leaf table position */
+	xfs_dabuf_t		*lbp;		/* leaf's buffer */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	int			length;		/* length of new entry */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry table pointer */
+	int			lfloglow;	/* low leaf logging index */
+	int			lfloghigh;	/* high leaf logging index */
+	int			lowstale;	/* index of prev stale leaf */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needbytes;	/* leaf block bytes needed */
+	int			needlog;	/* need to log data header */
+	int			needscan;	/* need to rescan data free */
+	xfs_dir2_data_off_t	*tagp;		/* end of data entry */
+	xfs_trans_t		*tp;		/* transaction pointer */
+	xfs_dir2_db_t		use_block;	/* data block number */
+
+	xfs_dir2_trace_args("leaf_addname", args);
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	/*
+	 * Read the leaf block.
+	 */
+	error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
+		XFS_DATA_FORK);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(lbp != NULL);
+	/*
+	 * Look up the entry by hash value and name.
+	 * We know it's not there, our caller has already done a lookup.
+	 * So the index is of the entry to insert in front of.
+	 * But if there are dup hash values the index is of the first of those.
+	 */
+	index = xfs_dir2_leaf_search_hash(args, lbp);
+	leaf = lbp->data;
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+	length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	/*
+	 * See if there are any entries with the same hash value
+	 * and space in their block for the new entry.
+	 * This is good because it puts multiple same-hash value entries
+	 * in a data block, improving the lookup of those entries.
+	 */
+	for (use_block = -1, lep = &leaf->ents[index];
+	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     index++, lep++) {
+		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			continue;
+		i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		ASSERT(i < INT_GET(ltp->bestcount, ARCH_CONVERT));
+		ASSERT(INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF);
+		if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
+			use_block = i;
+			break;
+		}
+	}
+	/*
+	 * Didn't find a block yet, linear search all the data blocks.
+	 */
+	if (use_block == -1) {
+		for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
+			/*
+			 * Remember a block we see that's missing.
+			 */
+			if (INT_GET(bestsp[i], ARCH_CONVERT) == NULLDATAOFF && use_block == -1)
+				use_block = i;
+			else if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
+				use_block = i;
+				break;
+			}
+		}
+	}
+	/*
+	 * How many bytes do we need in the leaf block?
+	 */
+	needbytes =
+		(INT_GET(leaf->hdr.stale, ARCH_CONVERT) != 0 ? 0 : (uint)sizeof(leaf->ents[0])) +
+		(use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
+	/*
+	 * Now kill use_block if it refers to a missing block, so we
+	 * can use it as an indication of allocation needed.
+	 */
+	if (use_block != -1 && INT_GET(bestsp[use_block], ARCH_CONVERT) == NULLDATAOFF)
+		use_block = -1;
+	/*
+	 * If we don't have enough free bytes but we can make enough
+	 * by compacting out stale entries, we'll do that.
+	 */
+	if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] < needbytes &&
+	    INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1) {
+#pragma mips_frequency_hint NEVER
+		compact = 1;
+	}
+	/*
+	 * Otherwise if we don't have enough free bytes we need to
+	 * convert to node form.
+	 */
+	else if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <
+		 needbytes) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Just checking or no space reservation, give up.
+		 */
+		if (args->justcheck || args->total == 0) {
+			xfs_da_brelse(tp, lbp);
+			return XFS_ERROR(ENOSPC);
+		}
+		/*
+		 * Convert to node form.
+		 */
+		error = xfs_dir2_leaf_to_node(args, lbp);
+		xfs_da_buf_done(lbp);
+		if (error)
+			return error;
+		/*
+		 * Then add the new entry.
+		 */
+		return xfs_dir2_node_addname(args);
+	}
+	/*
+	 * Otherwise it will fit without compaction.
+	 */
+	else
+		compact = 0;
+	/*
+	 * If just checking, then it will fit unless we needed to allocate
+	 * a new data block.
+	 */
+	if (args->justcheck) {
+		xfs_da_brelse(tp, lbp);
+		return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
+	}
+	/*
+	 * If no allocations are allowed, return now before we've
+	 * changed anything.
+	 */
+	if (args->total == 0 && use_block == -1) {
+#pragma mips_frequency_hint NEVER
+		xfs_da_brelse(tp, lbp);
+		return XFS_ERROR(ENOSPC);
+	}
+	/*
+	 * Need to compact the leaf entries, removing stale ones.
+	 * Leave one stale entry behind - the one closest to our
+	 * insertion index - and we'll shift that one to our insertion
+	 * point later.
+	 */
+	if (compact) {
+#pragma mips_frequency_hint NEVER
+		xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
+			&lfloglow, &lfloghigh);
+	}
+	/*
+	 * There are stale entries, so we'll need log-low and log-high
+	 * impossibly bad values later.
+	 */
+	else if (INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
+		lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		lfloghigh = -1;
+	}
+	/*
+	 * If there was no data block space found, we need to allocate
+	 * a new one.
+	 */
+	if (use_block == -1) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Add the new data block.
+		 */
+		if (error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
+				&use_block)) {
+			xfs_da_brelse(tp, lbp);
+			return error;
+		}
+		/*
+		 * Initialize the block.
+		 */
+		if (error = xfs_dir2_data_init(args, use_block, &dbp)) {
+			xfs_da_brelse(tp, lbp);
+			return error;
+		}
+		/*
+		 * If we're adding a new data block on the end we need to
+		 * extend the bests table.  Copy it up one entry.
+		 */
+		if (use_block >= INT_GET(ltp->bestcount, ARCH_CONVERT)) {
+			bestsp--;
+			ovbcopy(&bestsp[1], &bestsp[0],
+				INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(bestsp[0]));
+			INT_MOD(ltp->bestcount, ARCH_CONVERT, +1);
+			xfs_dir2_leaf_log_tail(tp, lbp);
+			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+		}
+		/*
+		 * If we're filling in a previously empty block just log it.
+		 */
+		else
+			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
+		data = dbp->data;
+		INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
+		grown = 1;
+	}
+	/*
+	 * Already had space in some data block.
+	 * Just read that one in.
+	 */
+	else {
+		if (error =
+		    xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block),
+			    -1, &dbp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+			xfs_da_brelse(tp, lbp);
+			return error;
+		}
+		data = dbp->data;
+		grown = 0;
+	}
+	xfs_dir2_data_check(dp, dbp);
+	/*
+	 * Point to the biggest freespace in our data block.
+	 */
+	dup = (xfs_dir2_data_unused_t *)
+	      ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
+	ASSERT(INT_GET(dup->length, ARCH_CONVERT) >= length);
+	needscan = needlog = 0;
+	/*
+	 * Mark the initial part of our freespace in use for the new entry.
+	 */
+	xfs_dir2_data_use_free(tp, dbp, dup,
+		(xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+		&needlog, &needscan);
+	/*
+	 * Initialize our new entry (at last).
+	 */
+	dep = (xfs_dir2_data_entry_t *)dup;
+	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	dep->namelen = args->namelen;
+	bcopy(args->name, dep->name, dep->namelen);
+	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
+	/*
+	 * Need to scan fix up the bestfree table.
+	 */
+	if (needscan)
+		xfs_dir2_data_freescan(mp, data, &needlog, NULL);
+	/*
+	 * Need to log the data block's header.
+	 */
+	if (needlog)
+		xfs_dir2_data_log_header(tp, dbp);
+	xfs_dir2_data_log_entry(tp, dbp, dep);
+	/*
+	 * If the bests table needs to be changed, do it.
+	 * Log the change unless we've already done that.
+	 */
+	if (INT_GET(bestsp[use_block], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
+		INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
+		if (!grown)
+			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
+	}
+	/*
+	 * Now we need to make room to insert the leaf entry.
+	 * If there are no stale entries, we just insert a hole at index.
+	 */
+	if (INT_GET(leaf->hdr.stale, ARCH_CONVERT) == 0) {
+		/*
+		 * lep is still good as the index leaf entry.
+		 */
+		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+			ovbcopy(lep, lep + 1,
+				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
+		/*
+		 * Record low and high logging indices for the leaf.
+		 */
+		lfloglow = index;
+		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
+	}
+	/*
+	 * There are stale entries.
+	 * We will use one of them for the new entry.
+	 * It's probably not at the right location, so we'll have to
+	 * shift some up or down first.
+	 */
+	else {
+		/*
+		 * If we didn't compact before, we need to find the nearest
+		 * stale entries before and after our insertion point.
+		 */
+		if (compact == 0) {
+			/*
+			 * Find the first stale entry before the insertion
+			 * point, if any.
+			 */
+			for (lowstale = index - 1;
+			     lowstale >= 0 &&
+				INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
+				XFS_DIR2_NULL_DATAPTR;
+			     lowstale--)
+				continue;
+			/*
+			 * Find the next stale entry at or after the insertion
+			 * point, if any.   Stop if we go so far that the
+			 * lowstale entry would be better.
+			 */
+			for (highstale = index;
+			     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
+				XFS_DIR2_NULL_DATAPTR &&
+				(lowstale < 0 ||
+				 index - lowstale - 1 >= highstale - index);
+			     highstale++)
+				continue;
+		}
+		/*
+		 * If the low one is better, use it.
+		 */
+		if (lowstale >= 0 &&
+		    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+		     index - lowstale - 1 < highstale - index)) {
+			ASSERT(index - lowstale - 1 >= 0);
+			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
+			       XFS_DIR2_NULL_DATAPTR);
+			/*
+			 * Copy entries up to cover the stale entry
+			 * and make room for the new entry.
+			 */
+			if (index - lowstale - 1 > 0)
+				ovbcopy(&leaf->ents[lowstale + 1],
+					&leaf->ents[lowstale],
+					(index - lowstale - 1) * sizeof(*lep));
+			lep = &leaf->ents[index - 1];
+			lfloglow = MIN(lowstale, lfloglow);
+			lfloghigh = MAX(index - 1, lfloghigh);
+		}
+		/*
+		 * The high one is better, so use that one.
+		 */
+		else {
+			ASSERT(highstale - index >= 0);
+			ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
+			       XFS_DIR2_NULL_DATAPTR);
+			/*
+			 * Copy entries down to copver the stale entry
+			 * and make room for the new entry.
+			 */
+			if (highstale - index > 0)
+				ovbcopy(&leaf->ents[index],
+					&leaf->ents[index + 1],
+					(highstale - index) * sizeof(*lep));
+			lep = &leaf->ents[index];
+			lfloglow = MIN(index, lfloglow);
+			lfloghigh = MAX(highstale, lfloghigh);
+		}
+		INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
+	}
+	/*
+	 * Fill in the new leaf entry.
+	 */
+	INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
+	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, INT_GET(*tagp, ARCH_CONVERT)));
+	/*
+	 * Log the leaf fields and give up the buffers.
+	 */
+	xfs_dir2_leaf_log_header(tp, lbp);
+	xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
+	xfs_dir2_leaf_check(dp, lbp);
+	xfs_da_buf_done(lbp);
+	xfs_dir2_data_check(dp, dbp);
+	xfs_da_buf_done(dbp);
+	return 0;
+}
+
+
+#ifdef DEBUG
+/*
+ * Check the internal consistency of a leaf1 block.
+ * Pop an assert if something is wrong.
+ */
+void
+xfs_dir2_leaf_check(
+	xfs_inode_t		*dp,		/* incore directory inode */
+	xfs_dabuf_t		*bp)		/* leaf's buffer */
+{
+	int			i;		/* leaf index */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			stale;		/* count of stale leaves */
+
+	leaf = bp->data;
+	mp = dp->i_mount;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	/*
+	 * This value is not restrictive enough.
+	 * Should factor in the size of the bests table as well.
+	 * We can deduce a value for that from di_size.
+	 */
+	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	/*
+	 * Leaves and bests don't overlap.
+	 */
+	ASSERT((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <=
+	       (char *)XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT));
+	/*
+	 * Check hash value order, count stale entries.
+	 */
+	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
+		if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
+			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
+		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			stale++;
+	}
+	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
+}
+#endif	/* DEBUG */
+
+/*
+ * Compact out any stale entries in the leaf.
+ * Log the header and changed leaf entries, if any.
+ */
+void
+xfs_dir2_leaf_compact(
+	xfs_da_args_t	*args,		/* operation arguments */
+	xfs_dabuf_t	*bp)		/* leaf buffer */
+{
+	int		from;		/* source leaf index */
+	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
+	int		loglow;		/* first leaf entry to log */
+	int		to;		/* target leaf index */
+
+	leaf = bp->data;
+	if (INT_GET(leaf->hdr.stale, ARCH_CONVERT) == 0) {
+#pragma mips_frequency_hint NEVER
+		return;
+	}
+	/*
+	 * Compress out the stale entries in place.
+	 */
+	for (from = to = 0, loglow = -1; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			continue;
+		/*
+		 * Only actually copy the entries that are different.
+		 */
+		if (from > to) {
+			if (loglow == -1)
+				loglow = to;
+			leaf->ents[to] = leaf->ents[from];
+		}
+		to++;
+	}
+	/*
+	 * Update and log the header, log the leaf entries.
+	 */
+	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == from - to);
+	INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(INT_GET(leaf->hdr.stale, ARCH_CONVERT)));
+	INT_SET(leaf->hdr.stale, ARCH_CONVERT, 0);
+	xfs_dir2_leaf_log_header(args->trans, bp);
+	if (loglow != -1)
+		xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
+}
+
+/*
+ * Compact the leaf entries, removing stale ones.
+ * Leave one stale entry behind - the one closest to our
+ * insertion index - and the caller will shift that one to our insertion
+ * point later.
+ * Return new insertion index, where the remaining stale entry is,
+ * and leaf logging indices.
+ */
+void
+xfs_dir2_leaf_compact_x1(
+	xfs_dabuf_t	*bp,		/* leaf buffer */
+	int		*indexp,	/* insertion index */
+	int		*lowstalep,	/* out: stale entry before us */
+	int		*highstalep,	/* out: stale entry after us */
+	int		*lowlogp,	/* out: low log index */
+	int		*highlogp)	/* out: high log index */
+{
+	int		from;		/* source copy index */
+	int		highstale;	/* stale entry at/after index */
+	int		index;		/* insertion index */
+	int		keepstale;	/* source index of kept stale */
+	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
+	int		lowstale;	/* stale entry before index */
+	int		newindex;	/* new insertion index */
+	int		to;		/* destination copy index */
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1);
+	index = *indexp;
+	/*
+	 * Find the first stale entry before our index, if any.
+	 */
+	for (lowstale = index - 1;
+	     lowstale >= 0 &&
+		INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
+	     lowstale--)
+		continue;
+	/*
+	 * Find the first stale entry at or after our index, if any.
+	 * Stop if the answer would be worse than lowstale.
+	 */
+	for (highstale = index;
+	     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+		INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
+		(lowstale < 0 || index - lowstale > highstale - index);
+	     highstale++)
+		continue;
+	/*
+	 * Pick the better of lowstale and highstale.
+	 */
+	if (lowstale >= 0 &&
+	    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+	     index - lowstale <= highstale - index))
+		keepstale = lowstale;
+	else
+		keepstale = highstale;
+	/*
+	 * Copy the entries in place, removing all the stale entries
+	 * except keepstale.
+	 */
+	for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+		/*
+		 * Notice the new value of index.
+		 */
+		if (index == from)
+			newindex = to;
+		if (from != keepstale &&
+		    INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+			if (from == to)
+				*lowlogp = to;
+			continue;
+		}
+		/*
+		 * Record the new keepstale value for the insertion.
+		 */
+		if (from == keepstale)
+			lowstale = highstale = to;
+		/*
+		 * Copy only the entries that have moved.
+		 */
+		if (from > to)
+			leaf->ents[to] = leaf->ents[from];
+		to++;
+	}
+	ASSERT(from > to);
+	/*
+	 * If the insertion point was past the last entry,
+	 * set the new insertion point accordingly.
+	 */
+	if (index == from)
+		newindex = to;
+	*indexp = newindex;
+	/*
+	 * Adjust the leaf header values.
+	 */
+	INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(from - to));
+	INT_SET(leaf->hdr.stale, ARCH_CONVERT, 1);
+	/*
+	 * Remember the low/high stale value only in the "right"
+	 * direction.
+	 */
+	if (lowstale >= newindex)
+		lowstale = -1;
+	else
+		highstale = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	*highlogp = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1;
+	*lowstalep = lowstale;
+	*highstalep = highstale;
+}
+
+/*
+ * Initialize a new leaf block, leaf1 or leafn magic accepted.
+ */
+int
+xfs_dir2_leaf_init(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dir2_db_t		bno,		/* directory block number */
+	xfs_dabuf_t		**bpp,		/* out: leaf buffer */
+	int			magic)		/* magic number for block */
+{
+	xfs_dabuf_t		*bp;		/* leaf buffer */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return code */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	dp = args->dp;
+	ASSERT(dp != NULL);
+	tp = args->trans;
+	mp = dp->i_mount;
+	ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
+	       bno < XFS_DIR2_FREE_FIRSTDB(mp));
+	/*
+	 * Get the buffer for the block.
+	 */
+	error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp,
+		XFS_DATA_FORK);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(bp != NULL);
+	leaf = bp->data;
+	/*
+	 * Initialize the header.
+	 */
+	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, magic);
+	INT_ZERO(leaf->hdr.info.forw, ARCH_CONVERT);
+        INT_ZERO(leaf->hdr.info.back, ARCH_CONVERT);
+	INT_ZERO(leaf->hdr.count, ARCH_CONVERT);
+	INT_ZERO(leaf->hdr.stale, ARCH_CONVERT);
+	xfs_dir2_leaf_log_header(tp, bp);
+	/*
+	 * If it's a leaf-format directory initialize the tail.
+	 * In this case our caller has the real bests table to copy into
+	 * the block.
+	 */
+	if (magic == XFS_DIR2_LEAF1_MAGIC) {
+		ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+		INT_SET(ltp->bestcount, ARCH_CONVERT, 0);
+		xfs_dir2_leaf_log_tail(tp, bp);
+	}
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Log the bests entries indicated from a leaf1 block.
+ */
+void
+xfs_dir2_leaf_log_bests(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp,		/* leaf buffer */
+	int			first,		/* first entry to log */
+	int			last)		/* last entry to log */
+{
+	xfs_dir2_data_off_t	*firstb;	/* pointer to first entry */
+	xfs_dir2_data_off_t	*lastb;		/* pointer to last entry */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf);
+	firstb = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + first;
+	lastb = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + last;
+	xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
+		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
+}
+
+/*
+ * Log the leaf entries indicated from a leaf1 or leafn block.
+ */
+void
+xfs_dir2_leaf_log_ents(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp,		/* leaf buffer */
+	int			first,		/* first entry to log */
+	int			last)		/* last entry to log */
+{
+	xfs_dir2_leaf_entry_t	*firstlep;	/* pointer to first entry */
+	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
+	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	firstlep = &leaf->ents[first];
+	lastlep = &leaf->ents[last];
+	xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
+		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
+}
+
+/*
+ * Log the header of the leaf1 or leafn block.
+ */
+void
+xfs_dir2_leaf_log_header(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp)		/* leaf buffer */
+{
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
+	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
+		(uint)(sizeof(leaf->hdr) - 1));
+}
+
+/*
+ * Log the tail of the leaf1 block.
+ */
+void
+xfs_dir2_leaf_log_tail(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp)		/* leaf buffer */
+{
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+
+	mp = tp->t_mountp;
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
+		(uint)(mp->m_dirblksize - 1));
+}
+
+/*
+ * Look up the entry referred to by args in the leaf format directory.
+ * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
+ * is also used by the node-format code.
+ */
+int
+xfs_dir2_leaf_lookup(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_dabuf_t		*dbp;		/* data block buffer */
+	xfs_dir2_data_entry_t	*dep;		/* data block entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return code */
+	int			index;		/* found entry index */
+	xfs_dabuf_t		*lbp;		/* leaf buffer */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args("leaf_lookup", args);
+	/*
+	 * Look up name in the leaf block, returning both buffers and index.
+	 */
+	if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	tp = args->trans;
+	dp = args->dp;
+	xfs_dir2_leaf_check(dp, lbp);
+	leaf = lbp->data;
+	/*
+	 * Get to the leaf entry and contained data entry address.
+	 */
+	lep = &leaf->ents[index];
+	/*
+	 * Point to the data entry.
+	 */
+	dep = (xfs_dir2_data_entry_t *)
+	      ((char *)dbp->data +
+	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
+	/*
+	 * Return the found inode number.
+	 */
+	args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+	xfs_da_brelse(tp, dbp);
+	xfs_da_brelse(tp, lbp);
+	return XFS_ERROR(EEXIST);
+}
+
+/*
+ * Look up name/hash in the leaf block.
+ * Fill in indexp with the found index, and dbpp with the data buffer.
+ * If not found dbpp will be NULL, and ENOENT comes back.
+ * lbpp will always be filled in with the leaf buffer unless there's an error.
+ */
+STATIC int					/* error */
+xfs_dir2_leaf_lookup_int(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dabuf_t		**lbpp,		/* out: leaf buffer */
+	int			*indexp,	/* out: index in leaf block */
+	xfs_dabuf_t		**dbpp)		/* out: data buffer */
+{
+	xfs_dir2_db_t		curdb;		/* current data block number */
+	xfs_dabuf_t		*dbp;		/* data buffer */
+	xfs_dir2_data_entry_t	*dep;		/* data entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return code */
+	int			index;		/* index in leaf block */
+	xfs_dabuf_t		*lbp;		/* leaf buffer */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_dir2_db_t		newdb;		/* new data block number */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	/*
+	 * Read the leaf block into the buffer.
+	 */
+	if (error =
+	    xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
+		    XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	*lbpp = lbp;
+	leaf = lbp->data;
+	xfs_dir2_leaf_check(dp, lbp);
+	/*
+	 * Look for the first leaf entry with our hash value.
+	 */
+	index = xfs_dir2_leaf_search_hash(args, lbp);
+	/*
+	 * Loop over all the entries with the right hash value
+	 * looking to match the name.
+	 */
+	for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
+	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     lep++, index++) {
+		/*
+		 * Skip over stale leaf entries.
+		 */
+		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			continue;
+		/*
+		 * Get the new data block number.
+		 */
+		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		/*
+		 * If it's not the same as the old data block number,
+		 * need to pitch the old one and read the new one.
+		 */
+		if (newdb != curdb) {
+			if (dbp)
+				xfs_da_brelse(tp, dbp);
+			if (error =
+			    xfs_da_read_buf(tp, dp,
+				    XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp,
+				    XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+				xfs_da_brelse(tp, lbp);
+				return error;
+			}
+			xfs_dir2_data_check(dp, dbp);
+			curdb = newdb;
+		}
+		/*
+		 * Point to the data entry.
+		 */
+		dep = (xfs_dir2_data_entry_t *)
+		      ((char *)dbp->data +
+		       XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+		/*
+		 * If it matches then return it.
+		 */
+		if (dep->namelen == args->namelen &&
+		    dep->name[0] == args->name[0] &&
+		    bcmp(dep->name, args->name, args->namelen) == 0) {
+			*dbpp = dbp;
+			*indexp = index;
+			return 0;
+		}
+	}
+	/*
+	 * No match found, return ENOENT.
+	 */
+	ASSERT(args->oknoent);
+	if (dbp)
+		xfs_da_brelse(tp, dbp);
+	xfs_da_brelse(tp, lbp);
+	return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Remove an entry from a leaf format directory.
+ */
+int						/* error */
+xfs_dir2_leaf_removename(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_dir2_data_off_t	*bestsp;	/* leaf block best freespace */
+	xfs_dir2_data_t		*data;		/* data block structure */
+	xfs_dir2_db_t		db;		/* data block number */
+	xfs_dabuf_t		*dbp;		/* data block buffer */
+	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return code */
+	xfs_dir2_db_t		i;		/* temporary data block # */
+	int			index;		/* index into leaf entries */
+	xfs_dabuf_t		*lbp;		/* leaf buffer */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needlog;	/* need to log data header */
+	int			needscan;	/* need to rescan data frees */
+	xfs_dir2_data_off_t	oldbest;	/* old value of best free */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args("leaf_removename", args);
+	/*
+	 * Lookup the leaf entry, get the leaf and data blocks read in.
+	 */
+	if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	leaf = lbp->data;
+	data = dbp->data;
+	xfs_dir2_data_check(dp, dbp);
+	/*
+	 * Point to the leaf entry, use that to point to the data entry.
+	 */
+	lep = &leaf->ents[index];
+	db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+	dep = (xfs_dir2_data_entry_t *)
+	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+	needscan = needlog = 0;
+	oldbest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+	ASSERT(INT_GET(bestsp[db], ARCH_CONVERT) == oldbest);
+	/*
+	 * Mark the former data entry unused.
+	 */
+	xfs_dir2_data_make_free(tp, dbp,
+		(xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
+		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+	/*
+	 * We just mark the leaf entry stale by putting a null in it.
+	 */
+	INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
+	xfs_dir2_leaf_log_header(tp, lbp);
+	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
+	/*
+	 * Scan the freespace in the data block again if necessary,
+	 * log the data block header if necessary.
+	 */
+	if (needscan)
+		xfs_dir2_data_freescan(mp, data, &needlog, NULL);
+	if (needlog)
+		xfs_dir2_data_log_header(tp, dbp);
+	/*
+	 * If the longest freespace in the data block has changed,
+	 * put the new value in the bests table and log that.
+	 */
+	if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) != oldbest) {
+		INT_COPY(bestsp[db], data->hdr.bestfree[0].length, ARCH_CONVERT);
+		xfs_dir2_leaf_log_bests(tp, lbp, db, db);
+	}
+	xfs_dir2_data_check(dp, dbp);
+	/*
+	 * If the data block is now empty then get rid of the data block.
+	 */
+	if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
+	    mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(db != mp->m_dirdatablk);
+		if (error = xfs_dir2_shrink_inode(args, db, dbp)) {
+			/*
+			 * Nope, can't get rid of it because it caused
+			 * allocation of a bmap btree block to do so.
+			 * Just go on, returning success, leaving the
+			 * empty block in place.
+			 */
+			if (error == ENOSPC && args->total == 0) {
+				xfs_da_buf_done(dbp);
+				error = 0;
+			}
+			xfs_dir2_leaf_check(dp, lbp);
+			xfs_da_buf_done(lbp);
+			return error;
+		}
+		dbp = NULL;
+		/*
+		 * If this is the last data block then compact the
+		 * bests table by getting rid of entries.
+		 */
+		if (db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1) {
+			/*
+			 * Look for the last active entry (i).
+			 */
+			for (i = db - 1; i > 0; i--) {
+				if (INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF)
+					break;
+			}
+			/*
+			 * Copy the table down so inactive entries at the
+			 * end are removed.
+			 */
+			ovbcopy(bestsp, &bestsp[db - i],
+				(INT_GET(ltp->bestcount, ARCH_CONVERT) - (db - i)) * sizeof(*bestsp));
+			INT_MOD(ltp->bestcount, ARCH_CONVERT, -(db - i));
+			xfs_dir2_leaf_log_tail(tp, lbp);
+			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+		} else
+			INT_SET(bestsp[db], ARCH_CONVERT, NULLDATAOFF);
+	}
+	/*
+	 * If the data block was not the first one, drop it.
+	 */
+	else if (db != mp->m_dirdatablk && dbp != NULL) {
+		xfs_da_buf_done(dbp);
+		dbp = NULL;
+	}
+	xfs_dir2_leaf_check(dp, lbp);
+	/*
+	 * See if we can convert to block form.
+	 */
+	return xfs_dir2_leaf_to_block(args, lbp, dbp);
+}
+
+/*
+ * Replace the inode number in a leaf format directory entry.
+ */
+int						/* error */
+xfs_dir2_leaf_replace(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_dabuf_t		*dbp;		/* data block buffer */
+	xfs_dir2_data_entry_t	*dep;		/* data block entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return code */
+	int			index;		/* index of leaf entry */
+	xfs_dabuf_t		*lbp;		/* leaf buffer */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args("leaf_replace", args);
+	/*
+	 * Look up the entry.
+	 */
+	if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	dp = args->dp;
+	leaf = lbp->data;
+	/*
+	 * Point to the leaf entry, get data address from it.
+	 */
+	lep = &leaf->ents[index];
+	/*
+	 * Point to the data entry.
+	 */
+	dep = (xfs_dir2_data_entry_t *)
+	      ((char *)dbp->data +
+	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
+	ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT));
+	/*
+	 * Put the new inode number in, log it.
+	 */
+	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	tp = args->trans;
+	xfs_dir2_data_log_entry(tp, dbp, dep);
+	xfs_da_buf_done(dbp);
+	xfs_dir2_leaf_check(dp, lbp);
+	xfs_da_brelse(tp, lbp);
+	return 0;
+}
+
+/*
+ * Return index in the leaf block (lbp) which is either the first
+ * one with this hash value, or if there are none, the insert point
+ * for that hash value.
+ */
+int						/* index value */
+xfs_dir2_leaf_search_hash(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dabuf_t		*lbp)		/* leaf buffer */
+{
+	xfs_dahash_t		hash;		/* hash from this entry */
+	xfs_dahash_t		hashwant;	/* hash value looking for */
+	int			high;		/* high leaf index */
+	int			low;		/* low leaf index */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	int			mid;		/* current leaf index */
+
+	leaf = lbp->data;
+#ifndef __KERNEL__
+	if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0)
+		return 0;
+#endif
+	/*
+	 * Note, the table cannot be empty, so we have to go through the loop.
+	 * Binary search the leaf entries looking for our hash value.
+	 */
+	for (lep = leaf->ents, low = 0, high = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1,
+		hashwant = args->hashval;
+	     low <= high; ) {
+		mid = (low + high) >> 1;
+		if ((hash = INT_GET(lep[mid].hashval, ARCH_CONVERT)) == hashwant)
+			break;
+		if (hash < hashwant)
+			low = mid + 1;
+		else
+			high = mid - 1;
+	}
+	/*
+	 * Found one, back up through all the equal hash values.
+	 */
+	if (hash == hashwant) {
+		while (mid > 0 && INT_GET(lep[mid - 1].hashval, ARCH_CONVERT) == hashwant) {
+#pragma mips_frequency_hint NEVER
+			mid--;
+		}
+	}
+	/*
+	 * Need to point to an entry higher than ours.
+	 */
+	else if (hash < hashwant)
+		mid++;
+	return mid;
+}
+
+/*
+ * Trim off a trailing data block.  We know it's empty since the leaf
+ * freespace table says so.
+ */
+int						/* error */
+xfs_dir2_leaf_trim_data(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dabuf_t		*lbp,		/* leaf buffer */
+	xfs_dir2_db_t		db)		/* data block number */
+{
+	xfs_dir2_data_off_t	*bestsp;	/* leaf bests table */
+#ifdef DEBUG
+	xfs_dir2_data_t		*data;		/* data block structure */
+#endif
+	xfs_dabuf_t		*dbp;		/* data block buffer */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return value */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	dp = args->dp;
+	mp = dp->i_mount;
+	tp = args->trans;
+	/*
+	 * Read the offending data block.  We need its buffer.
+	 */
+	if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp,
+			XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+#ifdef DEBUG
+	data = dbp->data;
+	ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+#endif
+	/* this seems to be an error 
+	 * data is only valid if DEBUG is defined?
+	 * RMC 09/08/1999
+	 */
+
+	leaf = lbp->data;
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
+	       mp->m_dirblksize - (uint)sizeof(data->hdr));
+	ASSERT(db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+	/*
+	 * Get rid of the data block.
+	 */
+	if (error = xfs_dir2_shrink_inode(args, db, dbp)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(error != ENOSPC);
+		xfs_da_brelse(tp, dbp);
+		return error;
+	}
+	/*
+	 * Eliminate the last bests entry from the table.
+	 */
+	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+	INT_MOD(ltp->bestcount, ARCH_CONVERT, -1);
+	ovbcopy(&bestsp[0], &bestsp[1], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
+	xfs_dir2_leaf_log_tail(tp, lbp);
+	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+	return 0;
+}
+
+/*
+ * Convert node form directory to leaf form directory.
+ * The root of the node form dir needs to already be a LEAFN block.
+ * Just return if we can't do anything.
+ */
+int						/* error */
+xfs_dir2_node_to_leaf(
+	xfs_da_state_t		*state)		/* directory operation state */
+{
+	xfs_da_args_t		*args;		/* operation arguments */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return code */
+	xfs_dabuf_t		*fbp;		/* buffer for freespace block */
+	xfs_fileoff_t		fo;		/* freespace file offset */
+	xfs_dir2_free_t		*free;		/* freespace structure */
+	xfs_dabuf_t		*lbp;		/* buffer for leaf block */
+	xfs_dir2_leaf_tail_t	*ltp;		/* tail of leaf structure */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			rval;		/* successful free trim? */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	/*
+	 * There's more than a leaf level in the btree, so there must
+	 * be multiple leafn blocks.  Give up.
+	 */
+	if (state->path.active > 1)
+		return 0;
+	args = state->args;
+	xfs_dir2_trace_args("node_to_leaf", args);
+	mp = state->mp;
+	dp = args->dp;
+	tp = args->trans;
+	/*
+	 * Get the last offset in the file.
+	 */
+	if (error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	fo -= mp->m_dirblkfsbs;
+	/*
+	 * If there are freespace blocks other than the first one,
+	 * take this opportunity to remove trailing empty freespace blocks
+	 * that may have been left behind during no-space-reservation
+	 * operations.
+	 */
+	while (fo > mp->m_dirfreeblk) {
+		if (error = xfs_dir2_node_trim_free(args, fo, &rval)) {
+#pragma mips_frequency_hint NEVER
+			return error;
+		}
+		if (rval)
+			fo -= mp->m_dirblkfsbs;
+		else
+			return 0;
+	}
+	/*
+	 * Now find the block just before the freespace block.
+	 */
+	if (error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	/*
+	 * If it's not the single leaf block, give up.
+	 */
+	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
+		return 0;
+	lbp = state->path.blk[0].bp;
+	leaf = lbp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	/*
+	 * Read the freespace block.
+	 */
+	if (error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp,
+			XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	free = fbp->data;
+	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	ASSERT(INT_ISZERO(free->hdr.firstdb, ARCH_CONVERT));
+	/*
+	 * Now see if the leafn and free data will fit in a leaf1.
+	 * If not, release the buffer and give up.
+	 */
+	if ((uint)sizeof(leaf->hdr) +
+	    (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)) * (uint)sizeof(leaf->ents[0]) +
+	    INT_GET(free->hdr.nvalid, ARCH_CONVERT) * (uint)sizeof(leaf->bests[0]) +
+	    (uint)sizeof(leaf->tail) >
+	    mp->m_dirblksize) {
+		xfs_da_brelse(tp, fbp);
+		return 0;
+	}
+	/*
+	 * If the leaf has any stale entries in it, compress them out.
+	 * The compact routine will log the header.
+	 */
+	if (INT_GET(leaf->hdr.stale, ARCH_CONVERT))
+		xfs_dir2_leaf_compact(args, lbp);
+	else
+		xfs_dir2_leaf_log_header(tp, lbp);
+	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAF1_MAGIC);
+	/*
+	 * Set up the leaf tail from the freespace block.
+	 */
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	INT_COPY(ltp->bestcount, free->hdr.nvalid, ARCH_CONVERT);
+	/*
+	 * Set up the leaf bests table.
+	 */
+	bcopy(free->bests, XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT),
+		INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(leaf->bests[0]));
+	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+	xfs_dir2_leaf_log_tail(tp, lbp);
+	xfs_dir2_leaf_check(dp, lbp);
+	/*
+	 * Get rid of the freespace block.
+	 */
+	error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * This can't fail here because it can only happen when
+		 * punching out the middle of an extent, and this is an
+		 * isolated block.
+		 */
+		ASSERT(error != ENOSPC);
+		return error;
+	}
+	fbp = NULL;
+	/*
+	 * Now see if we can convert the single-leaf directory
+	 * down to a block form directory.
+	 * This routine always kills the dabuf for the leaf, so
+	 * eliminate it from the path.
+	 */
+	error = xfs_dir2_leaf_to_block(args, lbp, NULL);
+	state->path.blk[0].bp = NULL;
+	return error;
+}
diff --git a/libxfs/xfs_dir2_node.c b/libxfs/xfs_dir2_node.c
new file mode 100644
index 000000000..b6050ba6a
--- /dev/null
+++ b/libxfs/xfs_dir2_node.c
@@ -0,0 +1,1988 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_node.c
+ * XFS directory implementation, version 2, node form files
+ * See data structures in xfs_dir2_node.h and xfs_da_btree.h.
+ */
+
+#include <xfs.h>
+
+/*
+ * Log entries from a freespace block.
+ */
+void
+xfs_dir2_free_log_bests(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp,		/* freespace buffer */
+	int			first,		/* first entry to log */
+	int			last)		/* last entry to log */
+{
+	xfs_dir2_free_t		*free;		/* freespace structure */
+
+	free = bp->data;
+	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	xfs_da_log_buf(tp, bp,
+		(uint)((char *)&free->bests[first] - (char *)free),
+		(uint)((char *)&free->bests[last] - (char *)free +
+		       sizeof(free->bests[0]) - 1));
+}
+
+/*
+ * Log header from a freespace block.
+ */
+static void
+xfs_dir2_free_log_header(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_dabuf_t		*bp)		/* freespace buffer */
+{
+	xfs_dir2_free_t		*free;		/* freespace structure */
+
+	free = bp->data;
+	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
+		(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
+}
+
+/*
+ * Convert a leaf-format directory to a node-format directory.
+ * We need to change the magic number of the leaf block, and copy
+ * the freespace table out of the leaf block into its own block.
+ */
+int						/* error */
+xfs_dir2_leaf_to_node(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dabuf_t		*lbp)		/* leaf buffer */
+{
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return value */
+	xfs_dabuf_t		*fbp;		/* freespace buffer */
+	xfs_dir2_db_t		fdb;		/* freespace block number */
+	xfs_dir2_free_t		*free;		/* freespace structure */
+	xfs_dir2_data_off_t	*from;		/* pointer to freespace entry */
+	int			i;		/* leaf freespace index */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			n;		/* count of live freespc ents */
+	xfs_dir2_data_off_t	off;		/* freespace entry value */
+	xfs_dir2_data_off_t	*to;		/* pointer to freespace entry */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args_b("leaf_to_node", args, lbp);
+	dp = args->dp;
+	mp = dp->i_mount;
+	tp = args->trans;
+	/*
+	 * Add a freespace block to the directory.
+	 */
+	if (error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp));
+	/*
+	 * Get the buffer for the new freespace block.
+	 */
+	if (error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp,
+			XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	ASSERT(fbp != NULL);
+	free = fbp->data;
+	leaf = lbp->data;
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	/*
+	 * Initialize the freespace block header.
+	 */
+	INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
+	INT_ZERO(free->hdr.firstdb, ARCH_CONVERT);
+	ASSERT(INT_GET(ltp->bestcount, ARCH_CONVERT) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
+	INT_COPY(free->hdr.nvalid, ltp->bestcount, ARCH_CONVERT);
+	/*
+	 * Copy freespace entries from the leaf block to the new block.
+	 * Count active entries.
+	 */
+	for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT), to = free->bests;
+	     i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) {
+		if ((off = INT_GET(*from, ARCH_CONVERT)) != NULLDATAOFF)
+			n++;
+		INT_SET(*to, ARCH_CONVERT, off);
+	}
+	INT_SET(free->hdr.nused, ARCH_CONVERT, n);
+	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC);
+	/*
+	 * Log everything.
+	 */
+	xfs_dir2_leaf_log_header(tp, lbp);
+	xfs_dir2_free_log_header(tp, fbp);
+	xfs_dir2_free_log_bests(tp, fbp, 0, INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1);
+	xfs_da_buf_done(fbp);
+	xfs_dir2_leafn_check(dp, lbp);
+	return 0;
+}
+
+/*
+ * Add a leaf entry to a leaf block in a node-form directory.
+ * The other work necessary is done from the caller.
+ */
+static int					/* error */
+xfs_dir2_leafn_add(
+	xfs_dabuf_t		*bp,		/* leaf buffer */
+	xfs_da_args_t		*args,		/* operation arguments */
+	int			index)		/* insertion pt for new entry */
+{
+	int			compact;	/* compacting stale leaves */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			highstale;	/* next stale entry */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	int			lfloghigh;	/* high leaf entry logging */
+	int			lfloglow;	/* low leaf entry logging */
+	int			lowstale;	/* previous stale entry */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args_sb("leafn_add", args, index, bp);
+	dp = args->dp;
+	mp = dp->i_mount;
+	tp = args->trans;
+	leaf = bp->data;
+	/*
+	 * If there are already the maximum number of leaf entries in
+	 * the block, if there are no stale entries it won't fit.
+	 * Caller will do a split.  If there are stale entries we'll do
+	 * a compact.
+	 */
+	if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+#pragma mips_frequency_hint NEVER
+		if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT))
+			return XFS_ERROR(ENOSPC);
+		compact = INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1;
+	} else
+		compact = 0;
+	ASSERT(index == 0 || INT_GET(leaf->ents[index - 1].hashval, ARCH_CONVERT) <= args->hashval);
+	ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+	       INT_GET(leaf->ents[index].hashval, ARCH_CONVERT) >= args->hashval);
+	
+	if (args->justcheck)
+		return 0;
+
+	/*
+	 * Compact out all but one stale leaf entry.  Leaves behind
+	 * the entry closest to index.
+	 */
+	if (compact) {
+#pragma mips_frequency_hint NEVER
+		xfs_dir2_leaf_compact_x1(bp, &index, &lowstale, &highstale,
+			&lfloglow, &lfloghigh);
+	}
+	/*
+	 * Set impossible logging indices for this case.
+	 */
+	else if (!INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) {
+		lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		lfloghigh = -1;
+	}
+	/*
+	 * No stale entries, just insert a space for the new entry.
+	 */
+	if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) {
+		lep = &leaf->ents[index];
+		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+			ovbcopy(lep, lep + 1,
+				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
+		lfloglow = index;
+		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
+	}
+	/*
+	 * There are stale entries.  We'll use one for the new entry.
+	 */
+	else {
+		/*
+		 * If we didn't do a compact then we need to figure out
+		 * which stale entry will be used.
+		 */
+		if (compact == 0) {
+			/*
+			 * Find first stale entry before our insertion point.
+			 */
+			for (lowstale = index - 1;
+			     lowstale >= 0 &&
+				INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
+				XFS_DIR2_NULL_DATAPTR;
+			     lowstale--)
+				continue;
+			/*
+			 * Find next stale entry after insertion point.
+			 * Stop looking if the answer would be worse than
+			 * lowstale already found.
+			 */
+			for (highstale = index;
+			     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
+				XFS_DIR2_NULL_DATAPTR &&
+				(lowstale < 0 ||
+				 index - lowstale - 1 >= highstale - index);
+			     highstale++)
+				continue;
+		}
+		/*
+		 * Using the low stale entry.
+		 * Shift entries up toward the stale slot.
+		 */
+		if (lowstale >= 0 &&
+		    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+		     index - lowstale - 1 < highstale - index)) {
+			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
+			       XFS_DIR2_NULL_DATAPTR);
+			ASSERT(index - lowstale - 1 >= 0);
+			if (index - lowstale - 1 > 0)
+				ovbcopy(&leaf->ents[lowstale + 1],
+					&leaf->ents[lowstale],
+					(index - lowstale - 1) * sizeof(*lep));
+			lep = &leaf->ents[index - 1];
+			lfloglow = MIN(lowstale, lfloglow);
+			lfloghigh = MAX(index - 1, lfloghigh);
+		}
+		/*
+		 * Using the high stale entry.
+		 * Shift entries down toward the stale slot.
+		 */
+		else {
+			ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
+			       XFS_DIR2_NULL_DATAPTR);
+			ASSERT(highstale - index >= 0);
+			if (highstale - index > 0)
+				ovbcopy(&leaf->ents[index],
+					&leaf->ents[index + 1],
+					(highstale - index) * sizeof(*lep));
+			lep = &leaf->ents[index];
+			lfloglow = MIN(index, lfloglow);
+			lfloghigh = MAX(highstale, lfloghigh);
+		}
+		INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
+	}
+	/*
+	 * Insert the new entry, log everything.
+	 */
+	INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
+	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, args->blkno, args->index));
+	xfs_dir2_leaf_log_header(tp, bp);
+	xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
+	xfs_dir2_leafn_check(dp, bp);
+	return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Check internal consistency of a leafn block.
+ */
+void
+xfs_dir2_leafn_check(
+	xfs_inode_t	*dp,			/* incore directory inode */
+	xfs_dabuf_t	*bp)			/* leaf buffer */
+{
+	int		i;			/* leaf index */
+	xfs_dir2_leaf_t	*leaf;			/* leaf structure */
+	xfs_mount_t	*mp;			/* filesystem mount point */
+	int		stale;			/* count of stale leaves */
+
+	leaf = bp->data;
+	mp = dp->i_mount;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
+		if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
+			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
+			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
+                }
+		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			stale++;
+	}
+	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
+}
+#endif	/* DEBUG */
+
+/*
+ * Return the last hash value in the leaf.
+ * Stale entries are ok.
+ */
+xfs_dahash_t					/* hash value */
+xfs_dir2_leafn_lasthash(
+	xfs_dabuf_t	*bp,			/* leaf buffer */
+	int		*count)			/* count of entries in leaf */
+{
+	xfs_dir2_leaf_t	*leaf;			/* leaf structure */
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	if (count)
+		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	if (INT_ISZERO(leaf->hdr.count, ARCH_CONVERT))
+		return 0;
+	return INT_GET(leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+}
+
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * If this is an addname then the extrablk in state is a freespace block,
+ * otherwise it's a data block.
+ */
+int
+xfs_dir2_leafn_lookup_int(
+	xfs_dabuf_t		*bp,		/* leaf buffer */
+	xfs_da_args_t		*args,		/* operation arguments */
+	int			*indexp,	/* out: leaf entry index */
+	xfs_da_state_t		*state)		/* state to fill in */
+{
+	xfs_dabuf_t		*curbp;		/* current data/free buffer */
+	xfs_dir2_db_t		curdb;		/* current data block number */
+	xfs_dir2_db_t		curfdb;		/* current free block number */
+	xfs_dir2_data_entry_t	*dep;		/* data block entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return value */
+	int			fi;		/* free entry index */
+	xfs_dir2_free_t		*free;		/* free block structure */
+	int			index;		/* leaf entry index */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	int			length;		/* length of new data entry */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_dir2_db_t		newdb;		/* new data block number */
+	xfs_dir2_db_t		newfdb;		/* new free block number */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+#ifdef __KERNEL__
+	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > 0);
+#endif
+	xfs_dir2_leafn_check(dp, bp);
+	/*
+	 * Look up the hash value in the leaf entries.
+	 */
+	index = xfs_dir2_leaf_search_hash(args, bp);
+	/*
+	 * Do we have a buffer coming in?
+	 */
+	if (state->extravalid)
+		curbp = state->extrablk.bp;
+	else
+		curbp = NULL;
+	/*
+	 * For addname, it's a free block buffer, get the block number.
+	 */
+	if (args->addname) {
+		curfdb = curbp ? state->extrablk.blkno : -1;
+		curdb = -1;
+		length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+		if (free = (curbp ? curbp->data : NULL))
+			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	}
+	/*
+	 * For others, it's a data block buffer, get the block number.
+	 */
+	else {
+		curfdb = -1;
+		curdb = curbp ? state->extrablk.blkno : -1;
+	}
+	/*
+	 * Loop over leaf entries with the right hash value.
+	 */
+	for (lep = &leaf->ents[index];
+	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+	     lep++, index++) {
+		/*
+		 * Skip stale leaf entries.
+		 */
+		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			continue;
+		/*
+		 * Pull the data block number from the entry.
+		 */
+		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+		/*
+		 * For addname, we're looking for a place to put the new entry.
+		 * We want to use a data block with an entry of equal
+		 * hash value to ours if there is one with room.
+		 */
+		if (args->addname) {
+			/*
+			 * If this block isn't the data block we already have
+			 * in hand, take a look at it.
+			 */
+			if (newdb != curdb) {
+				curdb = newdb;
+				/*
+				 * Convert the data block to the free block
+				 * holding its freespace information.
+				 */
+				newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb);
+				/*
+				 * If it's not the one we have in hand,
+				 * read it in.
+				 */
+				if (newfdb != curfdb) {
+					/*
+					 * If we had one before, drop it.
+					 */
+					if (curbp)
+						xfs_da_brelse(tp, curbp);
+					/*
+					 * Read the free block.
+					 */
+					if (error = xfs_da_read_buf(tp, dp,
+							XFS_DIR2_DB_TO_DA(mp,
+								newfdb),
+							-1, &curbp,
+							XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+						return error;
+					}
+					curfdb = newfdb;
+					free = curbp->data;
+					ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) ==
+					       XFS_DIR2_FREE_MAGIC);
+					ASSERT((INT_GET(free->hdr.firstdb, ARCH_CONVERT) %
+						XFS_DIR2_MAX_FREE_BESTS(mp)) ==
+					       0);
+					ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) <= curdb);
+					ASSERT(curdb <
+					       INT_GET(free->hdr.firstdb, ARCH_CONVERT) +
+					       INT_GET(free->hdr.nvalid, ARCH_CONVERT));
+				}
+				/*
+				 * Get the index for our entry.
+				 */
+				fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb);
+				/*
+				 * If it has room, return it.
+				 */
+				if (INT_GET(free->bests[fi], ARCH_CONVERT) == NULLDATAOFF) {
+#pragma mips_frequency_hint NEVER
+					return XFS_ERROR(EFSCORRUPTED);
+				}
+				if (INT_GET(free->bests[fi], ARCH_CONVERT) >= length) {
+					*indexp = index;
+					state->extravalid = 1;
+					state->extrablk.bp = curbp;
+					state->extrablk.blkno = curfdb;
+					state->extrablk.index = fi;
+					state->extrablk.magic =
+						XFS_DIR2_FREE_MAGIC;
+					ASSERT(args->oknoent);
+					return XFS_ERROR(ENOENT);
+				}
+			}
+		}
+		/*
+		 * Not adding a new entry, so we really want to find
+		 * the name given to us.
+		 */
+		else {
+			/*
+			 * If it's a different data block, go get it.
+			 */
+			if (newdb != curdb) {
+				/*
+				 * If we had a block before, drop it.
+				 */
+				if (curbp)
+					xfs_da_brelse(tp, curbp);
+				/*
+				 * Read the data block.
+				 */
+				if (error =
+				    xfs_da_read_buf(tp, dp,
+					    XFS_DIR2_DB_TO_DA(mp, newdb), -1,
+					    &curbp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+					return error;
+				}
+				xfs_dir2_data_check(dp, curbp);
+				curdb = newdb;
+			}
+			/*
+			 * Point to the data entry.
+			 */
+			dep = (xfs_dir2_data_entry_t *)
+			      ((char *)curbp->data +
+			       XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+			/*
+			 * Compare the entry, return it if it matches.
+			 */
+			if (dep->namelen == args->namelen &&
+			    dep->name[0] == args->name[0] &&
+			    bcmp(dep->name, args->name, args->namelen) == 0) {
+				args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+				*indexp = index;
+				state->extravalid = 1;
+				state->extrablk.bp = curbp;
+				state->extrablk.blkno = curdb;
+				state->extrablk.index =
+					(int)((char *)dep -
+					      (char *)curbp->data);
+				state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+				return XFS_ERROR(EEXIST);
+			}
+		}
+	}
+	/*
+	 * Didn't find a match.
+	 * If we are holding a buffer, give it back in case our caller
+	 * finds it useful.
+	 */
+	if (state->extravalid = (curbp != NULL)) {
+		state->extrablk.bp = curbp;
+		state->extrablk.index = -1;
+		/*
+		 * For addname, giving back a free block.
+		 */
+		if (args->addname) {
+			state->extrablk.blkno = curfdb;
+			state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+		}
+		/*
+		 * For other callers, giving back a data block.
+		 */
+		else {
+			state->extrablk.blkno = curdb;
+			state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+		}
+	}
+	/*
+	 * Return the final index, that will be the insertion point.
+	 */
+	*indexp = index;
+	ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
+	return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Move count leaf entries from source to destination leaf.
+ * Log entries and headers.  Stale entries are preserved.
+ */
+static void
+xfs_dir2_leafn_moveents(
+	xfs_da_args_t	*args,			/* operation arguments */
+	xfs_dabuf_t	*bp_s,			/* source leaf buffer */
+	int		start_s,		/* source leaf index */
+	xfs_dabuf_t	*bp_d,			/* destination leaf buffer */
+	int		start_d,		/* destination leaf index */
+	int		count)			/* count of leaves to copy */
+{
+	xfs_dir2_leaf_t	*leaf_d;		/* destination leaf structure */
+	xfs_dir2_leaf_t	*leaf_s;		/* source leaf structure */
+	int		stale;			/* count stale leaves copied */
+	xfs_trans_t	*tp;			/* transaction pointer */
+
+	xfs_dir2_trace_args_bibii("leafn_moveents", args, bp_s, start_s, bp_d,
+		start_d, count);
+	/*
+	 * Silently return if nothing to do.
+	 */
+	if (count == 0) {
+#pragma mips_frequency_hint NEVER
+		return;
+	}
+	tp = args->trans;
+	leaf_s = bp_s->data;
+	leaf_d = bp_d->data;
+	/*
+	 * If the destination index is not the end of the current
+	 * destination leaf entries, open up a hole in the destination
+	 * to hold the new entries.
+	 */
+	if (start_d < INT_GET(leaf_d->hdr.count, ARCH_CONVERT)) {
+		ovbcopy(&leaf_d->ents[start_d], &leaf_d->ents[start_d + count],
+			(INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - start_d) *
+			sizeof(xfs_dir2_leaf_entry_t));
+		xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count,
+			count + INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - 1);
+	}
+	/*
+	 * If the source has stale leaves, count the ones in the copy range
+	 * so we can update the header correctly.
+	 */
+	if (!INT_ISZERO(leaf_s->hdr.stale, ARCH_CONVERT)) {
+		int	i;			/* temp leaf index */
+
+		for (i = start_s, stale = 0; i < start_s + count; i++) {
+			if (INT_GET(leaf_s->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+				stale++;
+		}
+	} else
+		stale = 0;
+	/*
+	 * Copy the leaf entries from source to destination.
+	 */
+	bcopy(&leaf_s->ents[start_s], &leaf_d->ents[start_d],
+		count * sizeof(xfs_dir2_leaf_entry_t));
+	xfs_dir2_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1);
+	/*
+	 * If there are source entries after the ones we copied,
+	 * delete the ones we copied by sliding the next ones down.
+	 */
+	if (start_s + count < INT_GET(leaf_s->hdr.count, ARCH_CONVERT)) {
+		ovbcopy(&leaf_s->ents[start_s + count], &leaf_s->ents[start_s],
+			count * sizeof(xfs_dir2_leaf_entry_t));
+		xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
+	}
+	/*
+	 * Update the headers and log them.
+	 */
+	INT_MOD(leaf_s->hdr.count, ARCH_CONVERT, -(count));
+	INT_MOD(leaf_s->hdr.stale, ARCH_CONVERT, -(stale));
+	INT_MOD(leaf_d->hdr.count, ARCH_CONVERT, count);
+	INT_MOD(leaf_d->hdr.stale, ARCH_CONVERT, stale);
+	xfs_dir2_leaf_log_header(tp, bp_s);
+	xfs_dir2_leaf_log_header(tp, bp_d);
+	xfs_dir2_leafn_check(args->dp, bp_s);
+	xfs_dir2_leafn_check(args->dp, bp_d);
+}
+
+/*
+ * Determine the sort order of two leaf blocks.
+ * Returns 1 if both are valid and leaf2 should be before leaf1, else 0.
+ */
+int						/* sort order */
+xfs_dir2_leafn_order(
+	xfs_dabuf_t	*leaf1_bp,		/* leaf1 buffer */
+	xfs_dabuf_t	*leaf2_bp)		/* leaf2 buffer */
+{
+	xfs_dir2_leaf_t	*leaf1;			/* leaf1 structure */
+	xfs_dir2_leaf_t	*leaf2;			/* leaf2 structure */
+
+	leaf1 = leaf1_bp->data;
+	leaf2 = leaf2_bp->data;
+	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0 &&
+	    INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0 &&
+	    (INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT) < INT_GET(leaf1->ents[0].hashval, ARCH_CONVERT) ||
+	     INT_GET(leaf2->ents[INT_GET(leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT) <
+	     INT_GET(leaf1->ents[INT_GET(leaf1->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT)))
+		return 1;
+	return 0;
+}
+
+/*
+ * Rebalance leaf entries between two leaf blocks.
+ * This is actually only called when the second block is new,
+ * though the code deals with the general case.
+ * A new entry will be inserted in one of the blocks, and that
+ * entry is taken into account when balancing.
+ */
+static void
+xfs_dir2_leafn_rebalance(
+	xfs_da_state_t		*state,		/* btree cursor */
+	xfs_da_state_blk_t	*blk1,		/* first btree block */
+	xfs_da_state_blk_t	*blk2)		/* second btree block */
+{
+	xfs_da_args_t		*args;		/* operation arguments */
+	int			count;		/* count (& direction) leaves */
+	int			isleft;		/* new goes in left leaf */
+	xfs_dir2_leaf_t		*leaf1;		/* first leaf structure */
+	xfs_dir2_leaf_t		*leaf2;		/* second leaf structure */
+	int			mid;		/* midpoint leaf index */
+#ifdef DEBUG
+	int			oldstale;	/* old count of stale leaves */
+#endif
+	int			oldsum;		/* old total leaf count */
+	int			swap;		/* swapped leaf blocks */
+
+	args = state->args;
+	/*
+	 * If the block order is wrong, swap the arguments.
+	 */
+	if (swap = xfs_dir2_leafn_order(blk1->bp, blk2->bp)) {
+#pragma mips_frequency_hint NEVER
+		xfs_da_state_blk_t	*tmp;	/* temp for block swap */
+
+		tmp = blk1;
+		blk1 = blk2;
+		blk2 = tmp;
+	}
+	leaf1 = blk1->bp->data;
+	leaf2 = blk2->bp->data;
+	oldsum = INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT);
+#ifdef DEBUG
+	oldstale = INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT);
+#endif
+	mid = oldsum >> 1;
+	/*
+	 * If the old leaf count was odd then the new one will be even,
+	 * so we need to divide the new count evenly.
+	 */
+	if (oldsum & 1) {
+		xfs_dahash_t	midhash;	/* middle entry hash value */
+
+		if (mid >= INT_GET(leaf1->hdr.count, ARCH_CONVERT))
+			midhash = INT_GET(leaf2->ents[mid - INT_GET(leaf1->hdr.count, ARCH_CONVERT)].hashval, ARCH_CONVERT);
+		else
+			midhash = INT_GET(leaf1->ents[mid].hashval, ARCH_CONVERT);
+		isleft = args->hashval <= midhash;
+	}
+	/*
+	 * If the old count is even then the new count is odd, so there's
+	 * no preferred side for the new entry.
+	 * Pick the left one.
+	 */
+	else
+		isleft = 1;
+	/*
+	 * Calculate moved entry count.  Positive means left-to-right,
+	 * negative means right-to-left.  Then move the entries.
+	 */
+	count = INT_GET(leaf1->hdr.count, ARCH_CONVERT) - mid + (isleft == 0);
+	if (count > 0)
+		xfs_dir2_leafn_moveents(args, blk1->bp,
+			INT_GET(leaf1->hdr.count, ARCH_CONVERT) - count, blk2->bp, 0, count);
+	else if (count < 0)
+		xfs_dir2_leafn_moveents(args, blk2->bp, 0, blk1->bp,
+			INT_GET(leaf1->hdr.count, ARCH_CONVERT), count);
+	ASSERT(INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT) == oldsum);
+	ASSERT(INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT) == oldstale);
+	/*
+	 * Mark whether we're inserting into the old or new leaf.
+	 */
+	if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) < INT_GET(leaf2->hdr.count, ARCH_CONVERT))
+		state->inleaf = swap;
+	else if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > INT_GET(leaf2->hdr.count, ARCH_CONVERT))
+		state->inleaf = !swap;
+	else
+		state->inleaf =
+			swap ^ (args->hashval < INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT));
+	/*
+	 * Adjust the expected index for insertion.
+	 */
+	if (!state->inleaf)
+		blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+}
+
+/*
+ * Remove an entry from a node directory.
+ * This removes the leaf entry and the data entry,
+ * and updates the free block if necessary.
+ */
+STATIC int					/* error */
+xfs_dir2_leafn_remove(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dabuf_t		*bp,		/* leaf buffer */
+	int			index,		/* leaf entry index */
+	xfs_da_state_blk_t	*dblk,		/* data block */
+	int			*rval)		/* resulting block needs join */
+{
+	xfs_dir2_data_t		*data;		/* data block structure */
+	xfs_dir2_db_t		db;		/* data block number */
+	xfs_dabuf_t		*dbp;		/* data block buffer */
+	xfs_dir2_data_entry_t	*dep;		/* data block entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	int			longest;	/* longest data free entry */
+	int			off;		/* data block entry offset */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needlog;	/* need to log data header */
+	int			needscan;	/* need to rescan data frees */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	xfs_dir2_trace_args_sb("leafn_remove", args, index, bp);
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	/*
+	 * Point to the entry we're removing.
+	 */
+	lep = &leaf->ents[index];
+	/*
+	 * Extract the data block and offset from the entry.
+	 */
+	db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+	ASSERT(dblk->blkno == db);
+	off = XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT));
+	ASSERT(dblk->index == off);
+	/*
+	 * Kill the leaf entry by marking it stale.
+	 * Log the leaf block changes.
+	 */
+	INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
+	xfs_dir2_leaf_log_header(tp, bp);
+	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+	xfs_dir2_leaf_log_ents(tp, bp, index, index);
+	/*
+	 * Make the data entry free.  Keep track of the longest freespace
+	 * in the data block in case it changes.
+	 */
+	dbp = dblk->bp;
+	data = dbp->data;
+	dep = (xfs_dir2_data_entry_t *)((char *)data + off);
+	longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+	needlog = needscan = 0;
+	xfs_dir2_data_make_free(tp, dbp, off,
+		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+	/*
+	 * Rescan the data block freespaces for bestfree.
+	 * Log the data block header if needed.
+	 */
+	if (needscan)
+		xfs_dir2_data_freescan(mp, data, &needlog, NULL);
+	if (needlog)
+		xfs_dir2_data_log_header(tp, dbp);
+	xfs_dir2_data_check(dp, dbp);
+	/*
+	 * If the longest data block freespace changes, need to update
+	 * the corresponding freeblock entry.
+	 */
+	if (longest < INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
+		int		error;		/* error return value */
+		xfs_dabuf_t	*fbp;		/* freeblock buffer */
+		xfs_dir2_db_t	fdb;		/* freeblock block number */
+		int		findex;		/* index in freeblock entries */
+		xfs_dir2_free_t	*free;		/* freeblock structure */
+		int		logfree;	/* need to log free entry */
+
+		/*
+		 * Convert the data block number to a free block,
+		 * read in the free block.
+		 */
+		fdb = XFS_DIR2_DB_TO_FDB(mp, db);
+		if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb),
+				-1, &fbp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+			return error;
+		}
+		free = fbp->data;
+		ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+		ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) ==
+		       XFS_DIR2_MAX_FREE_BESTS(mp) *
+		       (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
+		/*
+		 * Calculate which entry we need to fix.
+		 */
+		findex = XFS_DIR2_DB_TO_FDINDEX(mp, db);
+		longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+		/*
+		 * If the data block is now empty we can get rid of it
+		 * (usually).
+		 */
+		if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+#pragma mips_frequency_hint NEVER
+			/*
+			 * Try to punch out the data block.
+			 */
+			error = xfs_dir2_shrink_inode(args, db, dbp);
+			if (error == 0) {
+				dblk->bp = NULL;
+				data = NULL;
+			}
+			/*
+			 * We can get ENOSPC if there's no space reservation.
+			 * In this case just drop the buffer and some one else
+			 * will eventually get rid of the empty block.
+			 */
+			else if (error == ENOSPC && args->total == 0)
+				xfs_da_buf_done(dbp);
+			else
+				return error;
+		}
+		/*
+		 * If we got rid of the data block, we can eliminate that entry
+		 * in the free block.
+		 */
+		if (data == NULL) {
+#pragma mips_frequency_hint NEVER
+			/*
+			 * One less used entry in the free table.
+			 */
+			INT_MOD(free->hdr.nused, ARCH_CONVERT, -1);
+			xfs_dir2_free_log_header(tp, fbp);
+			/*
+			 * If this was the last entry in the table, we can
+			 * trim the table size back.  There might be other
+			 * entries at the end referring to non-existent
+			 * data blocks, get those too.
+			 */
+			if (findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1) {
+				int	i;		/* free entry index */
+
+				for (i = findex - 1;
+				     i >= 0 && INT_GET(free->bests[i], ARCH_CONVERT) == NULLDATAOFF;
+				     i--)
+					continue;
+				INT_SET(free->hdr.nvalid, ARCH_CONVERT, i + 1);
+				logfree = 0;
+			}
+			/*
+			 * Not the last entry, just punch it out.
+			 */
+			else {
+				INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF);
+				logfree = 1;
+			}
+			/*
+			 * If there are no useful entries left in the block,
+			 * get rid of the block if we can.
+			 */
+			if (INT_GET(free->hdr.nused, ARCH_CONVERT) == 0) {
+				error = xfs_dir2_shrink_inode(args, fdb, fbp);
+				if (error == 0) {
+					fbp = NULL;
+					logfree = 0;
+				} else if (error != ENOSPC || args->total != 0)
+					return error;
+				/*
+				 * It's possible to get ENOSPC if there is no
+				 * space reservation.  In this case some one
+				 * else will eventually get rid of this block.
+				 */
+			}
+		}
+		/*
+		 * Data block is not empty, just set the free entry to
+		 * the new value.
+		 */
+		else {
+			INT_SET(free->bests[findex], ARCH_CONVERT, longest);
+			logfree = 1;
+		}
+		/*
+		 * Log the free entry that changed, unless we got rid of it.
+		 */
+		if (logfree)
+			xfs_dir2_free_log_bests(tp, fbp, findex, findex);
+		/*
+		 * Drop the buffer if we still have it.
+		 */
+		if (fbp)
+			xfs_da_buf_done(fbp);
+	}
+	xfs_dir2_leafn_check(dp, bp);
+	/*
+	 * Return indication of whether this leaf block is emtpy enough
+	 * to justify trying to join it with a neighbor.
+	 */
+	*rval =
+		((uint)sizeof(leaf->hdr) +
+		 (uint)sizeof(leaf->ents[0]) *
+		 (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT))) <
+		mp->m_dir_magicpct;
+	return 0;
+}
+
+/*
+ * Split the leaf entries in the old block into old and new blocks.
+ */
+int						/* error */
+xfs_dir2_leafn_split(
+	xfs_da_state_t		*state,		/* btree cursor */
+	xfs_da_state_blk_t	*oldblk,	/* original block */
+	xfs_da_state_blk_t	*newblk)	/* newly created block */
+{
+	xfs_da_args_t		*args;		/* operation arguments */
+	xfs_dablk_t		blkno;		/* new leaf block number */
+	int			error;		/* error return value */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+
+	/*
+	 * Allocate space for a new leaf node.
+	 */
+	args = state->args;
+	mp = args->dp->i_mount;
+	ASSERT(args != NULL);
+	ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC);
+	error = xfs_da_grow_inode(args, &blkno);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	/*
+	 * Initialize the new leaf block.
+	 */
+	error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno),
+		&newblk->bp, XFS_DIR2_LEAFN_MAGIC);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	newblk->blkno = blkno;
+	newblk->magic = XFS_DIR2_LEAFN_MAGIC;
+	/*
+	 * Rebalance the entries across the two leaves, link the new
+	 * block into the leaves.
+	 */
+	xfs_dir2_leafn_rebalance(state, oldblk, newblk);
+	error = xfs_da_blk_link(state, oldblk, newblk);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	/*
+	 * Insert the new entry in the correct block.
+	 */
+	if (state->inleaf)
+		error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index);
+	else
+		error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index);
+	/*
+	 * Update last hashval in each block since we added the name.
+	 */
+	oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL);
+	newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL);
+	xfs_dir2_leafn_check(args->dp, oldblk->bp);
+	xfs_dir2_leafn_check(args->dp, newblk->bp);
+	return error;
+}
+
+/*
+ * Check a leaf block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ */
+int						/* error */
+xfs_dir2_leafn_toosmall(
+	xfs_da_state_t		*state,		/* btree cursor */
+	int			*action)	/* resulting action to take */
+{
+	xfs_da_state_blk_t	*blk;		/* leaf block */
+	xfs_dablk_t		blkno;		/* leaf block number */
+	xfs_dabuf_t		*bp;		/* leaf buffer */
+	int			bytes;		/* bytes in use */
+	int			count;		/* leaf live entry count */
+	int			error;		/* error return value */
+	int			forward;	/* sibling block direction */
+	int			i;		/* sibling counter */
+	xfs_da_blkinfo_t	*info;		/* leaf block header */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	int			rval;		/* result from path_shift */
+
+	/*
+	 * Check for the degenerate case of the block being over 50% full.
+	 * If so, it's not worth even looking to see if we might be able
+	 * to coalesce with a sibling.
+	 */
+	blk = &state->path.blk[state->path.active - 1];
+	info = blk->bp->data;
+	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	leaf = (xfs_dir2_leaf_t *)info;
+	count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+	bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
+	if (bytes > (state->blocksize >> 1)) {
+		/*
+		 * Blk over 50%, don't try to join.
+		 */
+		*action = 0;
+		return 0;
+	}
+	/*
+	 * Check for the degenerate case of the block being empty.
+	 * If the block is empty, we'll simply delete it, no need to
+	 * coalesce it with a sibling block.  We choose (arbitrarily)
+	 * to merge with the forward block unless it is NULL.
+	 */
+	if (count == 0) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Make altpath point to the block we want to keep and
+		 * path point to the block we want to drop (this one).
+		 */
+		forward = !INT_ISZERO(info->forw, ARCH_CONVERT);
+		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		error = xfs_da_path_shift(state, &state->altpath, forward, 0,
+			&rval);
+		if (error)
+			return error;
+		*action = rval ? 2 : 0;
+		return 0;
+	}
+	/*
+	 * Examine each sibling block to see if we can coalesce with
+	 * at least 25% free space to spare.  We need to figure out
+	 * whether to merge with the forward or the backward block.
+	 * We prefer coalescing with the lower numbered sibling so as
+	 * to shrink a directory over time.
+	 */
+	forward = INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT);
+	for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
+		blkno = forward ?INT_GET( info->forw, ARCH_CONVERT) : INT_GET(info->back, ARCH_CONVERT);
+		if (blkno == 0)
+			continue;
+		/*
+		 * Read the sibling leaf block.
+		 */
+		if (error =
+		    xfs_da_read_buf(state->args->trans, state->args->dp, blkno,
+			    -1, &bp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+			return error;
+		}
+		ASSERT(bp != NULL);
+		/*
+		 * Count bytes in the two blocks combined.
+		 */
+		leaf = (xfs_dir2_leaf_t *)info;
+		count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+		bytes = state->blocksize - (state->blocksize >> 2);
+		leaf = bp->data;
+		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+		count += INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+		bytes -= count * (uint)sizeof(leaf->ents[0]);
+		/*
+		 * Fits with at least 25% to spare.
+		 */
+		if (bytes >= 0)
+			break;
+		xfs_da_brelse(state->args->trans, bp);
+	}
+	/*
+	 * Didn't like either block, give up.
+	 */
+	if (i >= 2) {
+		*action = 0;
+		return 0;
+	}
+	/*
+	 * Done with the sibling leaf block here, drop the dabuf
+	 * so path_shift can get it.
+	 */
+	xfs_da_buf_done(bp);
+	/*
+	 * Make altpath point to the block we want to keep (the lower
+	 * numbered block) and path point to the block we want to drop.
+	 */
+	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	if (blkno < blk->blkno)
+		error = xfs_da_path_shift(state, &state->altpath, forward, 0,
+			&rval);
+	else
+		error = xfs_da_path_shift(state, &state->path, forward, 0,
+			&rval);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	*action = rval ? 0 : 1;
+	return 0;
+}
+
+/*
+ * Move all the leaf entries from drop_blk to save_blk.
+ * This is done as part of a join operation.
+ */
+void
+xfs_dir2_leafn_unbalance(
+	xfs_da_state_t		*state,		/* cursor */
+	xfs_da_state_blk_t	*drop_blk,	/* dead block */
+	xfs_da_state_blk_t	*save_blk)	/* surviving block */
+{
+	xfs_da_args_t		*args;		/* operation arguments */
+	xfs_dir2_leaf_t		*drop_leaf;	/* dead leaf structure */
+	xfs_dir2_leaf_t		*save_leaf;	/* surviving leaf structure */
+
+	args = state->args;
+	ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
+	drop_leaf = drop_blk->bp->data;
+	save_leaf = save_blk->bp->data;
+	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+	/*
+	 * If there are any stale leaf entries, take this opportunity
+	 * to purge them.
+	 */
+	if (INT_GET(drop_leaf->hdr.stale, ARCH_CONVERT))
+		xfs_dir2_leaf_compact(args, drop_blk->bp);
+	if (INT_GET(save_leaf->hdr.stale, ARCH_CONVERT))
+		xfs_dir2_leaf_compact(args, save_blk->bp);
+	/*
+	 * Move the entries from drop to the appropriate end of save.
+	 */
+	drop_blk->hashval = INT_GET(drop_leaf->ents[INT_GET(drop_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
+		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0,
+			INT_GET(drop_leaf->hdr.count, ARCH_CONVERT));
+	else
+		xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp,
+			INT_GET(save_leaf->hdr.count, ARCH_CONVERT), INT_GET(drop_leaf->hdr.count, ARCH_CONVERT));
+	save_blk->hashval = INT_GET(save_leaf->ents[INT_GET(save_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+	xfs_dir2_leafn_check(args->dp, save_blk->bp);
+}
+
+/*
+ * Top-level node form directory addname routine.
+ */
+int						/* error */
+xfs_dir2_node_addname(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_da_state_blk_t	*blk;		/* leaf block for insert */
+	int			error;		/* error return value */
+	int			rval;		/* sub-return value */
+	xfs_da_state_t		*state;		/* btree cursor */
+
+	xfs_dir2_trace_args("node_addname", args);
+	/*
+	 * Allocate and initialize the state (btree cursor).
+	 */
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	state->blocksize = state->mp->m_dirblksize;
+	/*
+	 * Look up the name.  We're not supposed to find it, but
+	 * this gives us the insertion point.
+	 */
+	error = xfs_da_node_lookup_int(state, &rval);
+	if (error)
+		rval = error;
+	if (rval != ENOENT) {
+#pragma mips_frequency_hint NEVER
+		goto done;
+	}
+	/*
+	 * Add the data entry to a data block.
+	 * Extravalid is set to a freeblock found by lookup.
+	 */
+	rval = xfs_dir2_node_addname_int(args,
+		state->extravalid ? &state->extrablk : NULL);
+	if (rval) {
+#pragma mips_frequency_hint NEVER
+		goto done;
+	}
+	blk = &state->path.blk[state->path.active - 1];
+	ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+	/*
+	 * Add the new leaf entry.
+	 */
+	rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
+	if (rval == 0) {
+		/*
+		 * It worked, fix the hash values up the btree.
+		 */
+		if (!args->justcheck)
+			xfs_da_fixhashpath(state, &state->path);
+	} else {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * It didn't work, we need to split the leaf block.
+		 */
+		if (args->total == 0) {
+			ASSERT(rval == ENOSPC);
+			goto done;
+		}
+		/*
+		 * Split the leaf block and insert the new entry.
+		 */
+		rval = xfs_da_split(state);
+	}
+done:
+	xfs_da_state_free(state);
+	return rval;
+}
+
+
+/*
+ * Add the data entry for a node-format directory name addition.
+ * The leaf entry is added in xfs_dir2_leafn_add.
+ * We may enter with a freespace block that the lookup found.
+ */
+STATIC int					/* error */
+xfs_dir2_node_addname_int(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_da_state_blk_t	*fblk)		/* optional freespace block */
+{
+	xfs_dir2_data_t		*data;		/* data block structure */
+	xfs_dir2_db_t		dbno;		/* data block number */
+	xfs_dabuf_t		*dbp;		/* data block buffer */
+	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	xfs_dir2_data_unused_t	*dup;		/* data unused entry pointer */
+	int			error;		/* error return value */
+	xfs_dir2_db_t		fbno;		/* freespace block number */
+	xfs_dabuf_t		*fbp;		/* freespace buffer */
+	int			findex;		/* freespace entry index */
+	xfs_dir2_db_t		foundbno;	/* found freespace block no */
+	int			foundindex;	/* found freespace entry idx */
+	xfs_dir2_free_t		*free;		/* freespace block structure */
+	xfs_dir2_db_t		ifbno;		/* initial freespace block no */
+	xfs_dir2_db_t		lastfbno;	/* highest freespace block no */
+	int			length;		/* length of the new entry */
+	int			logfree;	/* need to log free entry */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			needlog;	/* need to log data header */
+	int			needscan;	/* need to rescan data frees */
+	xfs_dir2_data_off_t	*tagp;		/* data entry tag pointer */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	dp = args->dp;
+	mp = dp->i_mount;
+	tp = args->trans;
+	length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	/*
+	 * If we came in with a freespace block that means that lookup
+	 * found an entry with our hash value.  This is the freespace
+	 * block for that data entry.
+	 */
+	if (fblk) {
+		fbp = fblk->bp;
+		/*
+		 * Remember initial freespace block number.
+		 */
+		ifbno = fblk->blkno;
+		free = fbp->data;
+		ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+		findex = fblk->index;
+		/*
+		 * This means the free entry showed that the data block had
+		 * space for our entry, so we remembered it.
+		 * Use that data block.
+		 */
+		if (findex >= 0) {
+			ASSERT(findex < INT_GET(free->hdr.nvalid, ARCH_CONVERT));
+			ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF);
+			ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) >= length);
+			dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex;
+		}
+		/*
+		 * The data block looked at didn't have enough room.
+		 * We'll start at the beginning of the freespace entries.
+		 */
+		else {
+			dbno = -1;
+			findex = 0;
+		}
+	}
+	/*
+	 * Didn't come in with a freespace block, so don't have a data block.
+	 */
+	else {
+		ifbno = dbno = -1;
+		fbp = NULL;
+		findex = 0;
+	}
+	/*
+	 * If we don't have a data block yet, we're going to scan the 
+	 * freespace blocks looking for one.  Figure out what the
+	 * highest freespace block number is.
+	 */
+	if (dbno == -1) {
+		xfs_fileoff_t	fo;		/* freespace block number */
+
+		if (error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))
+			return error;
+		lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo);
+		fbno = ifbno;
+		foundindex = -1;
+	}
+	/*
+	 * While we haven't identified a data block, search the freeblock
+	 * data for a good data block.  If we find a null freeblock entry,
+	 * indicating a hole in the data blocks, remember that.
+	 */
+	while (dbno == -1) {
+		/*
+		 * If we don't have a freeblock in hand, get the next one.
+		 */
+		if (fbp == NULL) {
+			/*
+			 * Happens the first time through unless lookup gave
+			 * us a freespace block to start with.
+			 */
+			if (++fbno == 0)
+				fbno = XFS_DIR2_FREE_FIRSTDB(mp);
+			/*
+			 * If it's ifbno we already looked at it.
+			 */
+			if (fbno == ifbno)
+				fbno++;
+			/*
+			 * If it's off the end we're done.
+			 */
+			if (fbno >= lastfbno)
+				break;
+			/*
+			 * Read the block.  There can be holes in the
+			 * freespace blocks, so this might not succeed.
+			 * This should be really rare, so there's no reason
+			 * to avoid it.
+			 */
+			if (error = xfs_da_read_buf(tp, dp,
+					XFS_DIR2_DB_TO_DA(mp, fbno), -1, &fbp,
+					XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+				return error;
+			}
+			if (fbp == NULL) {
+#pragma mips_frequency_hint NEVER
+				continue;
+			}
+			free = fbp->data;
+			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+			findex = 0;
+		}
+		/*
+		 * Look at the current free entry.  Is it good enough?
+		 */
+		if (INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF &&
+		    INT_GET(free->bests[findex], ARCH_CONVERT) >= length)
+			dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex;
+		else {
+			/*
+			 * If we haven't found an empty entry yet, and this
+			 * one is empty, remember this slot.
+			 */
+			if (foundindex == -1 &&
+			    INT_GET(free->bests[findex], ARCH_CONVERT) == NULLDATAOFF) {
+				foundindex = findex;
+				foundbno = fbno;
+			}
+			/*
+			 * Are we done with the freeblock?
+			 */
+			if (++findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+				/*
+				 * If there is space left in this freeblock,
+				 * and we don't have an empty entry yet,
+				 * remember this slot.
+				 */
+				if (foundindex == -1 &&
+				    findex < XFS_DIR2_MAX_FREE_BESTS(mp)) {
+					foundindex = findex;
+					foundbno = fbno;
+				}
+				/*
+				 * Drop the block.
+				 */
+				xfs_da_brelse(tp, fbp);
+				fbp = NULL;
+				if (fblk && fblk->bp)
+					fblk->bp = NULL;
+			}
+		}
+	}
+	/*
+	 * If we don't have a data block, and there's no free slot in a
+	 * freeblock, we need to add a new freeblock.
+	 */
+	if (dbno == -1 && foundindex == -1) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Not allowed to allocate, so return failure.
+		 */
+		if (args->justcheck || args->total == 0) {
+			return XFS_ERROR(ENOSPC);
+		}
+		/*
+		 * Add the new freeblock.
+		 */
+		if (error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
+				&fbno)) {
+			return error;
+		}
+		/*
+		 * Get a buffer for the new block.
+		 */
+		if (error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fbno),
+				-1, &fbp, XFS_DATA_FORK)) {
+			return error;
+		}
+		ASSERT(fbp != NULL);
+		/*
+		 * Initialize the new block to be empty, and remember
+		 * its first slot as our empty slot.
+		 */
+		free = fbp->data;
+		INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
+		INT_SET(free->hdr.firstdb, ARCH_CONVERT, (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
+			XFS_DIR2_MAX_FREE_BESTS(mp));
+                INT_ZERO(free->hdr.nused, ARCH_CONVERT);
+		INT_ZERO(free->hdr.nvalid, ARCH_CONVERT);
+		foundindex = 0;
+		foundbno = fbno;
+	}
+	/*
+	 * If we don't have a data block, and we don't have a freeblock buffer
+	 * in hand (we dropped the one with the free slot in it),
+	 * go read the freeblock again.
+	 */
+	if (dbno == -1 && fbp == NULL) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * We're going to use the empty slot we found before.
+		 */
+		findex = foundindex;
+		fbno = foundbno;
+		if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fbno),
+				-1, &fbp, XFS_DATA_FORK)) {
+			return error;
+		}
+		ASSERT(fbp != NULL);
+		free = fbp->data;
+		ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	}
+	/*
+	 * If we don't have a data block, we need to allocate one and make
+	 * the freespace entries refer to it.
+	 */
+	if (dbno == -1) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Not allowed to allocate, return failure.
+		 */
+		if (args->justcheck || args->total == 0) {
+			/*
+			 * Drop the freespace buffer unless it came from our
+			 * caller.
+			 */
+			if (fblk == NULL || fblk->bp == NULL)
+				xfs_da_buf_done(fbp);
+			return XFS_ERROR(ENOSPC);
+		}
+		/*
+		 * Allocate and initialize the new data block.
+		 */
+		if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
+				&dbno)) ||
+		    (error = xfs_dir2_data_init(args, dbno, &dbp))) {
+			/*
+			 * Drop the freespace buffer unless it came from our
+			 * caller.
+			 */
+			if (fblk == NULL || fblk->bp == NULL)
+				xfs_da_buf_done(fbp);
+			return error;
+		}
+		/*
+		 * If the freespace entry for this data block is not in the
+		 * freespace block we have in hand, drop the one we have
+		 * and get the right one.
+		 */
+		if (XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno) {
+			xfs_da_brelse(tp, fbp);
+			if (fblk && fblk->bp)
+				fblk->bp = NULL;
+			fbno = XFS_DIR2_DB_TO_FDB(mp, dbno);
+			if (error = xfs_da_read_buf(tp, dp,
+					XFS_DIR2_DB_TO_DA(mp, fbno), -1, &fbp,
+					XFS_DATA_FORK)) {
+				xfs_da_buf_done(dbp);
+				return error;
+			}
+			ASSERT(fbp != NULL);
+			free = fbp->data;
+			ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+		}
+		/*
+		 * Set the freespace block index from the data block number.
+		 */
+		findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno);
+		/*
+		 * If it's after the end of the current entries in the
+		 * freespace block, extend that table.
+		 */
+		if (findex >= INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+			ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
+			INT_SET(free->hdr.nvalid, ARCH_CONVERT, findex + 1);
+			/*
+			 * Tag new entry so nused will go up.
+			 */
+			INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF);
+		}
+		/*
+		 * If this entry was for an empty data block
+		 * (this should always be true) then update the header.
+		 */
+		if (INT_GET(free->bests[findex], ARCH_CONVERT) == NULLDATAOFF) {
+			INT_MOD(free->hdr.nused, ARCH_CONVERT, +1);
+			xfs_dir2_free_log_header(tp, fbp);
+		}
+		/*
+		 * Update the real value in the table.
+		 * We haven't allocated the data entry yet so this will
+		 * change again.
+		 */
+		data = dbp->data;
+		INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT); 
+		logfree = 1;
+	}
+	/*
+	 * We had a data block so we don't have to make a new one.
+	 */
+	else {
+		/*
+		 * If just checking, we succeeded.
+		 */
+		if (args->justcheck) {
+			if (fblk == NULL || fblk->bp == NULL)
+				xfs_da_buf_done(fbp);
+			return 0;
+		}
+		/*
+		 * Read the data block in.
+		 */
+		if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno),
+				-1, &dbp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+			if (fblk == NULL || fblk->bp == NULL)
+				xfs_da_buf_done(fbp);
+			return error;
+		}
+		data = dbp->data;
+		logfree = 0;
+	}
+	ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) >= length);
+	/*
+	 * Point to the existing unused space.
+	 */
+	dup = (xfs_dir2_data_unused_t *)
+	      ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
+	needscan = needlog = 0;
+	/*
+	 * Mark the first part of the unused space, inuse for us.
+	 */
+	xfs_dir2_data_use_free(tp, dbp, dup,
+		(xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+		&needlog, &needscan);
+	/*
+	 * Fill in the new entry and log it.
+	 */
+	dep = (xfs_dir2_data_entry_t *)dup;
+	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	dep->namelen = args->namelen;
+	bcopy(args->name, dep->name, dep->namelen);
+	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
+	xfs_dir2_data_log_entry(tp, dbp, dep);
+	/*
+	 * Rescan the block for bestfree if needed.
+	 */
+	if (needscan)
+		xfs_dir2_data_freescan(mp, data, &needlog, NULL);
+	/*
+	 * Log the data block header if needed.
+	 */
+	if (needlog)
+		xfs_dir2_data_log_header(tp, dbp);
+	/*
+	 * If the freespace entry is now wrong, update it.
+	 */
+	if (INT_GET(free->bests[findex], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
+		INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT);
+		logfree = 1;
+	}
+	/*
+	 * Log the freespace entry if needed.
+	 */
+	if (logfree)
+		xfs_dir2_free_log_bests(tp, fbp, findex, findex);
+	/*
+	 * If the caller didn't hand us the freespace block, drop it.
+	 */
+	if (fblk == NULL || fblk->bp == NULL)
+		xfs_da_buf_done(fbp);
+	/*
+	 * Return the data block and offset in args, then drop the data block.
+	 */
+	args->blkno = (xfs_dablk_t)dbno;
+	args->index = INT_GET(*tagp, ARCH_CONVERT);
+	xfs_da_buf_done(dbp);
+	return 0;
+}
+
+/*
+ * Lookup an entry in a node-format directory.
+ * All the real work happens in xfs_da_node_lookup_int.
+ * The only real output is the inode number of the entry.
+ */
+int						/* error */
+xfs_dir2_node_lookup(
+	xfs_da_args_t	*args)			/* operation arguments */
+{
+	int		error;			/* error return value */
+	int		i;			/* btree level */
+	int		rval;			/* operation return value */
+	xfs_da_state_t	*state;			/* btree cursor */
+
+	xfs_dir2_trace_args("node_lookup", args);
+	/*
+	 * Allocate and initialize the btree cursor.
+	 */
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	state->blocksize = state->mp->m_dirblksize;
+	/*
+	 * Fill in the path to the entry in the cursor.
+	 */
+	error = xfs_da_node_lookup_int(state, &rval);
+	if (error)
+		rval = error;
+	/*
+	 * Release the btree blocks and leaf block.
+	 */
+	for (i = 0; i < state->path.active; i++) {
+		xfs_da_brelse(args->trans, state->path.blk[i].bp);
+		state->path.blk[i].bp = NULL;
+	}
+	/*
+	 * Release the data block if we have it.
+	 */
+	if (state->extravalid && state->extrablk.bp) {
+		xfs_da_brelse(args->trans, state->extrablk.bp);
+		state->extrablk.bp = NULL;
+	}
+	xfs_da_state_free(state);
+	return rval;
+}
+
+/*
+ * Remove an entry from a node-format directory.
+ */
+int						/* error */
+xfs_dir2_node_removename(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_da_state_blk_t	*blk;		/* leaf block */
+	int			error;		/* error return value */
+	int			rval;		/* operation return value */
+	xfs_da_state_t		*state;		/* btree cursor */
+
+	xfs_dir2_trace_args("node_removename", args);
+	/*
+	 * Allocate and initialize the btree cursor.
+	 */
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	state->blocksize = state->mp->m_dirblksize;
+	/*
+	 * Look up the entry we're deleting, set up the cursor.
+	 */
+	error = xfs_da_node_lookup_int(state, &rval);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		rval = error;
+	}
+	/*
+	 * Didn't find it, upper layer screwed up.
+	 */
+	if (rval != EEXIST) {
+#pragma mips_frequency_hint NEVER
+		xfs_da_state_free(state);
+		return rval;
+	}
+	blk = &state->path.blk[state->path.active - 1];
+	ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+	ASSERT(state->extravalid);
+	/*
+	 * Remove the leaf and data entries.
+	 * Extrablk refers to the data block.
+	 */
+	error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
+		&state->extrablk, &rval);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	/*
+	 * Fix the hash values up the btree.
+	 */
+	xfs_da_fixhashpath(state, &state->path);
+	/*
+	 * If we need to join leaf blocks, do it.
+	 */
+	if (rval && state->path.active > 1)
+		error = xfs_da_join(state);
+	/*
+	 * If no errors so far, try conversion to leaf format.
+	 */
+	if (!error)
+		error = xfs_dir2_node_to_leaf(state);
+	xfs_da_state_free(state);
+	return error;
+}
+
+/*
+ * Replace an entry's inode number in a node-format directory.
+ */
+int						/* error */
+xfs_dir2_node_replace(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_da_state_blk_t	*blk;		/* leaf block */
+	xfs_dir2_data_t		*data;		/* data block structure */
+	xfs_dir2_data_entry_t	*dep;		/* data entry changed */
+	int			error;		/* error return value */
+	int			i;		/* btree level */
+	xfs_ino_t		inum;		/* new inode number */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry being changed */
+	int			rval;		/* internal return value */
+	xfs_da_state_t		*state;		/* btree cursor */
+
+	xfs_dir2_trace_args("node_replace", args);
+	/*
+	 * Allocate and initialize the btree cursor.
+	 */
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	state->blocksize = state->mp->m_dirblksize;
+	inum = args->inumber;
+	/*
+	 * Lookup the entry to change in the btree.
+	 */
+	error = xfs_da_node_lookup_int(state, &rval);
+	if (error) {
+#pragma mips_frequency_hint NEVER
+		rval = error;
+	}
+	/*
+	 * It should be found, since the vnodeops layer has looked it up
+	 * and locked it.  But paranoia is good.
+	 */
+	if (rval == EEXIST) {
+		/*
+		 * Find the leaf entry.
+		 */
+		blk = &state->path.blk[state->path.active - 1];
+		ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+		leaf = blk->bp->data;
+		lep = &leaf->ents[blk->index];
+		ASSERT(state->extravalid);
+		/*
+		 * Point to the data entry.
+		 */
+		data = state->extrablk.bp->data;
+		ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+		dep = (xfs_dir2_data_entry_t *)
+		      ((char *)data +
+		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, INT_GET(lep->address, ARCH_CONVERT)));
+		ASSERT(inum != INT_GET(dep->inumber, ARCH_CONVERT));
+		/*
+		 * Fill in the new inode number and log the entry.
+		 */
+		INT_SET(dep->inumber, ARCH_CONVERT, inum);
+		xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep);
+		rval = 0;
+	}
+	/*
+	 * Didn't find it, and we're holding a data block.  Drop it.
+	 */
+	else if (state->extravalid) {
+#pragma mips_frequency_hint NEVER
+		xfs_da_brelse(args->trans, state->extrablk.bp);
+		state->extrablk.bp = NULL;
+	}
+	/*
+	 * Release all the buffers in the cursor.
+	 */
+	for (i = 0; i < state->path.active; i++) {
+		xfs_da_brelse(args->trans, state->path.blk[i].bp);
+		state->path.blk[i].bp = NULL;
+	}
+	xfs_da_state_free(state);
+	return rval;
+}
+
+/*
+ * Trim off a trailing empty freespace block.
+ * Return (in rvalp) 1 if we did it, 0 if not.
+ */
+int						/* error */
+xfs_dir2_node_trim_free(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_fileoff_t		fo,		/* free block number */
+	int			*rvalp)		/* out: did something */
+{
+	xfs_dabuf_t		*bp;		/* freespace buffer */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return code */
+	xfs_dir2_free_t		*free;		/* freespace structure */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	dp = args->dp;
+	mp = dp->i_mount;
+	tp = args->trans;
+	/*
+	 * Read the freespace block.
+	 */
+	if (error = xfs_da_read_buf(tp, dp, (xfs_dablk_t)fo, -1, &bp,
+			XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+		return error;
+	}
+	free = bp->data;
+	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+	/*
+	 * If there are used entries, there's nothing to do.
+	 */
+	if (INT_GET(free->hdr.nused, ARCH_CONVERT) > 0) {
+		xfs_da_brelse(tp, bp);
+		*rvalp = 0;
+		return 0;
+	}
+	/*
+	 * Blow the block away.
+	 */
+	if (error =
+	    xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo),
+		    bp)) {
+		/*
+		 * Can't fail with ENOSPC since that only happens with no
+		 * space reservation, when breaking up an extent into two
+		 * pieces.  This is the last block of an extent.
+		 */
+		ASSERT(error != ENOSPC);
+		xfs_da_brelse(tp, bp);
+		return error;
+	}
+	/*
+	 * Return that we succeeded.
+	 */
+	*rvalp = 1;
+	return 0;
+}
diff --git a/libxfs/xfs_dir2_sf.c b/libxfs/xfs_dir2_sf.c
new file mode 100644
index 000000000..a021822b2
--- /dev/null
+++ b/libxfs/xfs_dir2_sf.c
@@ -0,0 +1,1119 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_sf.c
+ * Shortform directory implementation for v2 directories.
+ */
+
+#include <xfs.h>
+
+
+/*
+ * Given a block directory (dp/block), calculate its size as a shortform (sf)
+ * directory and a header for the sf directory, if it will fit it the
+ * space currently present in the inode.  If it won't fit, the output
+ * size is too big (but not accurate).
+ */
+int						/* size for sf form */
+xfs_dir2_block_sfsize(
+	xfs_inode_t		*dp,		/* incore inode pointer */
+	xfs_dir2_block_t	*block,		/* block directory data */
+	xfs_dir2_sf_hdr_t	*sfhp)		/* output: header for sf form */
+{
+	xfs_dir2_dataptr_t	addr;		/* data entry address */
+	xfs_dir2_leaf_entry_t	*blp;		/* leaf area of the block */
+	xfs_dir2_block_tail_t	*btp;		/* tail area of the block */
+	int			count;		/* shortform entry count */
+	xfs_dir2_data_entry_t	*dep;		/* data entry in the block */
+	int			i;		/* block entry index */
+	int			i8count;	/* count of big-inode entries */
+	int			isdot;		/* entry is "." */
+	int			isdotdot;	/* entry is ".." */
+	xfs_mount_t		*mp;		/* mount structure pointer */
+	int			namelen;	/* total name bytes */
+	xfs_ino_t		parent;		/* parent inode number */
+	int			size;		/* total computed size */
+
+	mp = dp->i_mount;
+
+	count = i8count = namelen = 0;
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+        
+	/*
+	 * Iterate over the block's data entries by using the leaf pointers.
+	 */
+	for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+		if ((addr = INT_GET(blp[i].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
+			continue;
+		/*
+		 * Calculate the pointer to the entry at hand.
+		 */
+		dep = (xfs_dir2_data_entry_t *)
+		      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+		/*
+		 * Detect . and .., so we can special-case them.
+		 * . is not included in sf directories.
+		 * .. is included by just the parent inode number.
+		 */
+		isdot = dep->namelen == 1 && dep->name[0] == '.';
+		isdotdot =
+			dep->namelen == 2 &&
+			dep->name[0] == '.' && dep->name[1] == '.';
+#if XFS_BIG_FILESYSTEMS
+		if (!isdot)
+			i8count += INT_GET(dep->inumber, ARCH_CONVERT) > XFS_DIR2_MAX_SHORT_INUM;
+#endif
+		if (!isdot && !isdotdot) {
+			count++;
+			namelen += dep->namelen;
+		} else if (isdotdot)
+			parent = INT_GET(dep->inumber, ARCH_CONVERT);
+		/*
+		 * Calculate the new size, see if we should give up yet.
+		 */
+		size = XFS_DIR2_SF_HDR_SIZE(i8count) +		/* header */
+		       count +					/* namelen */
+		       count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
+		       namelen +				/* name */
+		       (i8count ?				/* inumber */
+				(uint)sizeof(xfs_dir2_ino8_t) * count :
+				(uint)sizeof(xfs_dir2_ino4_t) * count);
+		if (size > XFS_IFORK_DSIZE(dp))
+			return size;		/* size value is a failure */
+	}
+	/*
+	 * Create the output header, if it worked.
+	 */
+	sfhp->count = count;
+	sfhp->i8count = i8count;
+	XFS_DIR2_SF_PUT_INUMBER_ARCH((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent, ARCH_CONVERT);
+	return size;
+}
+
+/*
+ * Convert a block format directory to shortform.
+ * Caller has already checked that it will fit, and built us a header.
+ */
+int						/* error */
+xfs_dir2_block_to_sf(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dabuf_t		*bp,		/* block buffer */
+	int			size,		/* shortform directory size */
+	xfs_dir2_sf_hdr_t	*sfhp)		/* shortform directory hdr */
+{
+	xfs_dir2_block_t	*block;		/* block structure */
+	xfs_dir2_block_tail_t	*btp;		/* block tail pointer */
+	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	xfs_dir2_data_unused_t	*dup;		/* unused data pointer */
+	char			*endptr;	/* end of data entries */
+	int			error;		/* error return value */
+	int			logflags;	/* inode logging flags */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	char			*ptr;		/* current data pointer */
+	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+        xfs_ino_t               temp;
+                
+	xfs_dir2_trace_args_sb("block_to_sf", args, size, bp);
+	dp = args->dp;
+	mp = dp->i_mount;
+
+	/*
+	 * Make a copy of the block data, so we can shrink the inode
+	 * and add local data.
+	 */
+	block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
+	bcopy(bp->data, block, mp->m_dirblksize);
+	logflags = XFS_ILOG_CORE;
+	if (error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(error != ENOSPC);
+		goto out;
+	}
+	/*
+	 * The buffer is now unconditionally gone, whether
+	 * xfs_dir2_shrink_inode worked or not.
+	 *
+	 * Convert the inode to local format.
+	 */
+	dp->i_df.if_flags &= ~XFS_IFEXTENTS;
+	dp->i_df.if_flags |= XFS_IFINLINE;
+	dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+	ASSERT(dp->i_df.if_bytes == 0);
+	xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+	logflags |= XFS_ILOG_DDATA;
+	/*
+	 * Copy the header into the newly allocate local space.
+	 */
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	bcopy(sfhp, sfp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
+	dp->i_d.di_size = size;
+	/*
+	 * Set up to loop over the block's entries.
+	 */
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	ptr = (char *)block->u;
+	endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	/*
+	 * Loop over the active and unused entries.
+	 * Stop when we reach the leaf/tail portion of the block.
+	 */
+	while (ptr < endptr) {
+		/*
+		 * If it's unused, just skip over it.
+		 */
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+			continue;
+		}
+		dep = (xfs_dir2_data_entry_t *)ptr;
+		/*
+		 * Skip .
+		 */
+		if (dep->namelen == 1 && dep->name[0] == '.')
+			ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == dp->i_ino);
+		/*
+		 * Skip .., but make sure the inode number is right.
+		 */
+		else if (dep->namelen == 2 &&
+			 dep->name[0] == '.' && dep->name[1] == '.')
+			ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) ==
+			       XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT));
+		/*
+		 * Normal entry, copy it into shortform.
+		 */
+		else {
+			sfep->namelen = dep->namelen;
+			XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,
+				(xfs_dir2_data_aoff_t)
+				((char *)dep - (char *)block), ARCH_CONVERT);
+			bcopy(dep->name, sfep->name, dep->namelen);
+                        temp=INT_GET(dep->inumber, ARCH_CONVERT);
+			XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &temp,
+				XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+		}
+		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+	}
+	ASSERT((char *)sfep - (char *)sfp == size);
+	xfs_dir2_sf_check(args);
+out:
+	xfs_trans_log_inode(args->trans, dp, logflags);
+	kmem_free(block, mp->m_dirblksize);
+	return error;
+}
+
+/*
+ * Add a name to a shortform directory.
+ * There are two algorithms, "easy" and "hard" which we decide on 
+ * before changing anything.
+ * Convert to block form if necessary, if the new entry won't fit.
+ */
+int						/* error */
+xfs_dir2_sf_addname(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	int			add_entsize;	/* size of the new entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return value */
+	int			incr_isize;	/* total change in size */
+	int			new_isize;	/* di_size after adding name */
+	int			objchange;	/* changing to 8-byte inodes */
+	xfs_dir2_data_aoff_t	offset;		/* offset for new entry */
+	int			old_isize;	/* di_size before adding name */
+	int			pick;		/* which algorithm to use */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */
+
+	xfs_dir2_trace_args("sf_addname", args);
+	ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
+	dp = args->dp;
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Make sure the shortform value has some of its header.
+	 */
+	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	/*
+	 * Compute entry (and change in) size.
+	 */
+	add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+	incr_isize = add_entsize;
+#if XFS_BIG_FILESYSTEMS
+	/*
+	 * Do we have to change to 8 byte inodes?
+	 */
+	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Yes, adjust the entry size and the total size.
+		 */
+		add_entsize +=
+			(uint)sizeof(xfs_dir2_ino8_t) -
+			(uint)sizeof(xfs_dir2_ino4_t);
+		incr_isize +=
+			(sfp->hdr.count + 2) *
+			((uint)sizeof(xfs_dir2_ino8_t) -
+			 (uint)sizeof(xfs_dir2_ino4_t));
+		objchange = 1;
+	} else
+		objchange = 0;
+#else
+	objchange = 0;
+#endif
+	old_isize = (int)dp->i_d.di_size;
+	new_isize = old_isize + incr_isize;
+	/*
+	 * Won't fit as shortform any more (due to size),
+	 * or the pick routine says it won't (due to offset values).
+	 */
+	if (new_isize > XFS_IFORK_DSIZE(dp) ||
+	    (pick =
+	     xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset)) == 0) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * Just checking or no space reservation, it doesn't fit.
+		 */
+		if (args->justcheck || args->total == 0)
+			return XFS_ERROR(ENOSPC);
+		/*
+		 * Convert to block form then add the name.
+		 */
+		error = xfs_dir2_sf_to_block(args);
+		if (error)
+			return error;
+		return xfs_dir2_block_addname(args);
+	}
+	/*
+	 * Just checking, it fits.
+	 */
+	if (args->justcheck)
+		return 0;
+	/*
+	 * Do it the easy way - just add it at the end.
+	 */
+	if (pick == 1)
+		xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize);
+	/*
+	 * Do it the hard way - look for a place to insert the new entry.
+	 * Convert to 8 byte inode numbers first if necessary.
+	 */
+	else {
+		ASSERT(pick == 2);
+#if XFS_BIG_FILESYSTEMS
+		if (objchange)
+			xfs_dir2_sf_toino8(args);
+#endif
+		xfs_dir2_sf_addname_hard(args, objchange, new_isize);
+	}
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+	return 0;
+}
+
+/*
+ * Add the new entry the "easy" way.
+ * This is copying the old directory and adding the new entry at the end.
+ * Since it's sorted by "offset" we need room after the last offset
+ * that's already there, and then room to convert to a block directory.
+ * This is already checked by the pick routine.
+ */
+STATIC void
+xfs_dir2_sf_addname_easy(
+	xfs_da_args_t		*args,		/* operation arguments */
+	xfs_dir2_sf_entry_t	*sfep,		/* pointer to new entry */
+	xfs_dir2_data_aoff_t	offset,		/* offset to use for new ent */
+	int			new_isize)	/* new directory size */
+{
+	int			byteoff;	/* byte offset in sf dir */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+
+	dp = args->dp;
+
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	byteoff = (int)((char *)sfep - (char *)sfp);
+	/*
+	 * Grow the in-inode space.
+	 */
+	xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen),
+		XFS_DATA_FORK);
+	/*
+	 * Need to set up again due to realloc of the inode data.
+	 */
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
+	/*
+	 * Fill in the new entry.
+	 */
+	sfep->namelen = args->namelen;
+	XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
+	bcopy(args->name, sfep->name, sfep->namelen);
+	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
+		XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+	/*
+	 * Update the header and inode.
+	 */
+	sfp->hdr.count++;
+#if XFS_BIG_FILESYSTEMS
+	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
+		sfp->hdr.i8count++;
+#endif
+	dp->i_d.di_size = new_isize;
+	xfs_dir2_sf_check(args);
+}
+
+/*
+ * Add the new entry the "hard" way.
+ * The caller has already converted to 8 byte inode numbers if necessary,
+ * in which case we need to leave the i8count at 1.
+ * Find a hole that the new entry will fit into, and copy
+ * the first part of the entries, the new entry, and the last part of
+ * the entries.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_dir2_sf_addname_hard(
+	xfs_da_args_t		*args,		/* operation arguments */
+	int			objchange,	/* changing inode number size */
+	int			new_isize)	/* new directory size */
+{
+	int			add_datasize;	/* data size need for new ent */
+	char			buf[XFS_DIR2_SF_MAX_SIZE]; /* buffer for old */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			eof;		/* reached end of old dir */
+	int			nbytes;		/* temp for byte copies */
+	xfs_dir2_data_aoff_t	new_offset;	/* next offset value */
+	xfs_dir2_data_aoff_t	offset;		/* current offset value */
+	int			old_isize;	/* previous di_size */
+	xfs_dir2_sf_entry_t	*oldsfep;	/* entry in original dir */
+	xfs_dir2_sf_t		*oldsfp;	/* original shortform dir */
+	xfs_dir2_sf_entry_t	*sfep;		/* entry in new dir */
+	xfs_dir2_sf_t		*sfp;		/* new shortform dir */
+
+	/*
+	 * Copy the old directory to the stack buffer.
+	 */
+	dp = args->dp;
+
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	old_isize = (int)dp->i_d.di_size;
+	oldsfp = (xfs_dir2_sf_t *)buf;
+	bcopy(sfp, oldsfp, old_isize);
+	/*
+	 * Loop over the old directory finding the place we're going
+	 * to insert the new entry.
+	 * If it's going to end up at the end then oldsfep will point there.
+	 */
+	for (offset = XFS_DIR2_DATA_FIRST_OFFSET,
+	      oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp),
+	      add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen),
+	      eof = (char *)oldsfep == &buf[old_isize];
+	     !eof;
+	     offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen),
+	      oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep),
+	      eof = (char *)oldsfep == &buf[old_isize]) {
+		new_offset = XFS_DIR2_SF_GET_OFFSET_ARCH(oldsfep, ARCH_CONVERT);
+		if (offset + add_datasize <= new_offset)
+			break;
+	}
+	/*
+	 * Get rid of the old directory, then allocate space for
+	 * the new one.  We do this so xfs_idata_realloc won't copy
+	 * the data.
+	 */
+	xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK);
+	xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK);
+	/*
+	 * Reset the pointer since the buffer was reallocated.
+	 */
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	/*
+	 * Copy the first part of the directory, including the header.
+	 */
+	nbytes = (int)((char *)oldsfep - (char *)oldsfp);
+	bcopy(oldsfp, sfp, nbytes);
+	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes);
+	/*
+	 * Fill in the new entry, and update the header counts.
+	 */
+	sfep->namelen = args->namelen;
+	XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
+	bcopy(args->name, sfep->name, sfep->namelen);
+	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
+		XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+	sfp->hdr.count++;
+#if XFS_BIG_FILESYSTEMS
+	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
+		sfp->hdr.i8count++;
+#endif
+	/*
+	 * If there's more left to copy, do that.
+	 */
+	if (!eof) {
+		sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+		bcopy(oldsfep, sfep, old_isize - nbytes);
+	}
+	dp->i_d.di_size = new_isize;
+	xfs_dir2_sf_check(args);
+}
+
+/*
+ * Decide if the new entry will fit at all.
+ * If it will fit, pick between adding the new entry to the end (easy)
+ * or somewhere else (hard).
+ * Return 0 (won't fit), 1 (easy), 2 (hard).
+ */
+/*ARGSUSED*/
+STATIC int					/* pick result */
+xfs_dir2_sf_addname_pick(
+	xfs_da_args_t		*args,		/* operation arguments */
+	int			objchange,	/* inode # size changes */
+	xfs_dir2_sf_entry_t	**sfepp,	/* out(1): new entry ptr */
+	xfs_dir2_data_aoff_t	*offsetp)	/* out(1): new offset */
+{
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			holefit;	/* found hole it will fit in */
+	int			i;		/* entry number */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_dir2_data_aoff_t	offset;		/* data block offset */
+	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+	int			size;		/* entry's data size */
+	int			used;		/* data bytes used */
+
+	dp = args->dp;
+	mp = dp->i_mount;
+
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	size = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	offset = XFS_DIR2_DATA_FIRST_OFFSET;
+	sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	holefit = 0;
+	/*
+	 * Loop over sf entries.
+	 * Keep track of data offset and whether we've seen a place
+	 * to insert the new entry.
+	 */
+	for (i = 0; i < sfp->hdr.count; i++) {
+		if (!holefit)
+			holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT);
+		offset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) +
+			 XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+		sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+	}
+	/*
+	 * Calculate data bytes used excluding the new entry, if this
+	 * was a data block (block form directory).
+	 */
+	used = offset +
+	       (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	       (uint)sizeof(xfs_dir2_block_tail_t);
+	/*
+	 * If it won't fit in a block form then we can't insert it,
+	 * we'll go back, convert to block, then try the insert and convert
+	 * to leaf.
+	 */
+	if (used + (holefit ? 0 : size) > mp->m_dirblksize)
+		return 0;
+	/*
+	 * If changing the inode number size, do it the hard way.
+	 */
+#if XFS_BIG_FILESYSTEMS
+	if (objchange) {
+#pragma mips_frequency_hint NEVER
+		return 2;
+	}
+#else
+	ASSERT(objchange == 0);
+#endif
+	/*
+	 * If it won't fit at the end then do it the hard way (use the hole).
+	 */
+	if (used + size > mp->m_dirblksize)
+		return 2;
+	/*
+	 * Do it the easy way.
+	 */
+	*sfepp = sfep;
+	*offsetp = offset;
+	return 1;
+}
+
+#ifdef DEBUG
+/*
+ * Check consistency of shortform directory, assert if bad.
+ */
+STATIC void
+xfs_dir2_sf_check(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			i;		/* entry number */
+	int			i8count;	/* number of big inode#s */
+	xfs_ino_t		ino;		/* entry inode number */
+	int			offset;		/* data offset */
+	xfs_dir2_sf_entry_t	*sfep;		/* shortform dir entry */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+
+	dp = args->dp;
+
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	offset = XFS_DIR2_DATA_FIRST_OFFSET;
+	ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT);
+	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+        
+	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	     i < sfp->hdr.count;
+	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+		ASSERT(XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) >= offset);
+		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+		offset =
+			XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) +
+			XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+	}
+	ASSERT(i8count == sfp->hdr.i8count);
+#if !XFS_BIG_FILESYSTEMS
+	ASSERT(i8count == 0);
+#endif
+	ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
+	ASSERT(offset +
+	       (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	       (uint)sizeof(xfs_dir2_block_tail_t) <=
+	       dp->i_mount->m_dirblksize);
+}
+#endif	/* DEBUG */
+
+/* 
+ * Create a new (shortform) directory.
+ */
+int					/* error, always 0 */
+xfs_dir2_sf_create(
+	xfs_da_args_t	*args,		/* operation arguments */
+	xfs_ino_t	pino)		/* parent inode number */
+{
+	xfs_inode_t	*dp;		/* incore directory inode */
+	int		i8count;	/* parent inode is an 8-byte number */
+	xfs_dir2_sf_t	*sfp;		/* shortform structure */
+	int		size;		/* directory size */
+
+	xfs_dir2_trace_args_i("sf_create", args, pino);
+	dp = args->dp;
+
+	ASSERT(dp != NULL);
+	ASSERT(dp->i_d.di_size == 0);
+	/*
+	 * If it's currently a zero-length extent file, 
+	 * convert it to local format.
+	 */
+	if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
+		dp->i_df.if_flags &= ~XFS_IFEXTENTS;	/* just in case */
+		dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+		dp->i_df.if_flags |= XFS_IFINLINE;
+	}
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	ASSERT(dp->i_df.if_bytes == 0);
+	i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
+	size = XFS_DIR2_SF_HDR_SIZE(i8count);
+	/*
+	 * Make a buffer for the data.
+	 */
+	xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+	/*
+	 * Fill in the header,
+	 */
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	sfp->hdr.i8count = i8count;
+	/*
+	 * Now can put in the inode number, since i8count is set.
+	 */
+	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &pino, &sfp->hdr.parent, ARCH_CONVERT);
+	sfp->hdr.count = 0;
+	dp->i_d.di_size = size;
+	xfs_dir2_sf_check(args);
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+	return 0;
+}
+
+/*
+ * Lookup an entry in a shortform directory.
+ * Returns EEXIST if found, ENOENT if not found.
+ */
+int						/* error */
+xfs_dir2_sf_lookup(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			i;		/* entry index */
+	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+
+	xfs_dir2_trace_args("sf_lookup", args);
+	xfs_dir2_sf_check(args);
+	dp = args->dp;
+
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Bail out if the directory is way too short.
+	 */
+	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	/*
+	 * Special case for .
+	 */
+	if (args->namelen == 1 && args->name[0] == '.') {
+		args->inumber = dp->i_ino;
+		return XFS_ERROR(EEXIST);
+	}
+	/*
+	 * Special case for ..
+	 */
+	if (args->namelen == 2 &&
+	    args->name[0] == '.' && args->name[1] == '.') {
+		args->inumber = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT);
+		return XFS_ERROR(EEXIST);
+	}
+	/*
+	 * Loop over all the entries trying to match ours.
+	 */
+	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	     i < sfp->hdr.count;
+	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+		if (sfep->namelen == args->namelen &&
+		    sfep->name[0] == args->name[0] &&
+		    bcmp(args->name, sfep->name, args->namelen) == 0) {
+			args->inumber =
+				XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
+					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+			return XFS_ERROR(EEXIST);
+		}
+	}
+	/*
+	 * Didn't find it.
+	 */
+	ASSERT(args->oknoent);
+	return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Remove an entry from a shortform directory.
+ */
+int						/* error */
+xfs_dir2_sf_removename(
+	xfs_da_args_t		*args)
+{
+	int			byteoff;	/* offset of removed entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			entsize;	/* this entry's size */
+	int			i;		/* shortform entry index */
+	int			newsize;	/* new inode size */
+	int			oldsize;	/* old inode size */
+	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+
+	xfs_dir2_trace_args("sf_removename", args);
+	dp = args->dp;
+
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	oldsize = (int)dp->i_d.di_size;
+	/*
+	 * Bail out if the directory is way too short.
+	 */
+	if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == oldsize);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	/*
+	 * Loop over the old directory entries.
+	 * Find the one we're deleting.
+	 */
+	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	     i < sfp->hdr.count;
+	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+		if (sfep->namelen == args->namelen &&
+		    sfep->name[0] == args->name[0] &&
+		    bcmp(sfep->name, args->name, args->namelen) == 0) {
+			ASSERT(XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
+					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT) ==
+				args->inumber);
+			break;
+		}
+	}
+	/*
+	 * Didn't find it.
+	 */
+	if (i == sfp->hdr.count) {
+#pragma mips_frequency_hint NEVER
+		return XFS_ERROR(ENOENT);
+	}
+	/*
+	 * Calculate sizes.
+	 */
+	byteoff = (int)((char *)sfep - (char *)sfp);
+	entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+	newsize = oldsize - entsize;
+	/*
+	 * Copy the part if any after the removed entry, sliding it down.
+	 */
+	if (byteoff + entsize < oldsize)
+		ovbcopy((char *)sfp + byteoff + entsize, (char *)sfp + byteoff,
+			oldsize - (byteoff + entsize));
+	/*
+	 * Fix up the header and file size.
+	 */
+	sfp->hdr.count--;
+	dp->i_d.di_size = newsize;
+	/*
+	 * Reallocate, making it smaller.
+	 */
+	xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+#if XFS_BIG_FILESYSTEMS
+	/*
+	 * Are we changing inode number size?
+	 */
+	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
+#pragma mips_frequency_hint NEVER
+		if (sfp->hdr.i8count == 1)
+			xfs_dir2_sf_toino4(args);
+		else
+			sfp->hdr.i8count--;
+	}
+#endif
+	xfs_dir2_sf_check(args);
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+	return 0;
+}
+
+/*
+ * Replace the inode number of an entry in a shortform directory.
+ */
+int						/* error */
+xfs_dir2_sf_replace(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			i;		/* entry index */
+#if XFS_BIG_FILESYSTEMS || defined(DEBUG)
+	xfs_ino_t		ino;		/* entry old inode number */
+#endif
+	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
+	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+
+	xfs_dir2_trace_args("sf_replace", args);
+	dp = args->dp;
+
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Bail out if the shortform directory is way too small.
+	 */
+	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+#if XFS_BIG_FILESYSTEMS
+	/*
+	 * New inode number is large, and need to convert to 8-byte inodes.
+	 */
+	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+#pragma mips_frequency_hint NEVER
+		int	error;			/* error return value */
+		int	newsize;		/* new inode size */
+
+		newsize =
+			dp->i_df.if_bytes +
+			(sfp->hdr.count + 1) *
+			((uint)sizeof(xfs_dir2_ino8_t) -
+			 (uint)sizeof(xfs_dir2_ino4_t));
+		/*
+		 * Won't fit as shortform, convert to block then do replace.
+		 */
+		if (newsize > XFS_IFORK_DSIZE(dp)) {
+			error = xfs_dir2_sf_to_block(args);
+			if (error) {
+				return error;
+			}
+			return xfs_dir2_block_replace(args);
+		}
+		/*
+		 * Still fits, convert to 8-byte now.
+		 */
+		xfs_dir2_sf_toino8(args);
+		sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	}
+#endif
+	ASSERT(args->namelen != 1 || args->name[0] != '.');
+	/*
+	 * Replace ..'s entry.
+	 */
+	if (args->namelen == 2 &&
+	    args->name[0] == '.' && args->name[1] == '.') {
+#if XFS_BIG_FILESYSTEMS || defined(DEBUG)
+		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT);
+		ASSERT(args->inumber != ino);
+#endif
+		XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber, &sfp->hdr.parent, ARCH_CONVERT);
+	}
+	/*
+	 * Normal entry, look for the name.
+	 */
+	else {
+		for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+		     i < sfp->hdr.count; 
+		     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+			if (sfep->namelen == args->namelen &&
+			    sfep->name[0] == args->name[0] &&
+			    bcmp(args->name, sfep->name, args->namelen) == 0) {
+#if XFS_BIG_FILESYSTEMS || defined(DEBUG)
+				ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
+					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+				ASSERT(args->inumber != ino);
+#endif
+				XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
+					XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+				break;
+			}
+		}
+		/*
+		 * Didn't find it.
+		 */
+		if (i == sfp->hdr.count) {
+#pragma mips_frequency_hint NEVER
+			ASSERT(args->oknoent);
+			return XFS_ERROR(ENOENT);
+		}
+	}
+#if XFS_BIG_FILESYSTEMS
+	/*
+	 * See if the old number was large, the new number is small.
+	 */
+	if (ino > XFS_DIR2_MAX_SHORT_INUM &&
+	    args->inumber <= XFS_DIR2_MAX_SHORT_INUM) {
+#pragma mips_frequency_hint NEVER
+		/*
+		 * And the old count was one, so need to convert to small.
+		 */
+		if (sfp->hdr.i8count == 1)
+			xfs_dir2_sf_toino4(args);
+		else
+			sfp->hdr.i8count--;
+	}
+#endif
+	xfs_dir2_sf_check(args);
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
+	return 0;
+}
+
+#if XFS_BIG_FILESYSTEMS
+/*
+ * Convert from 8-byte inode numbers to 4-byte inode numbers.
+ * The last 8-byte inode number is gone, but the count is still 1.
+ */
+STATIC void
+xfs_dir2_sf_toino4(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	char			*buf;		/* old dir's buffer */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			i;		/* entry index */
+	xfs_ino_t		ino;		/* entry inode number */
+	int			newsize;	/* new inode size */
+	xfs_dir2_sf_entry_t	*oldsfep;	/* old sf entry */
+	xfs_dir2_sf_t		*oldsfp;	/* old sf directory */
+	int			oldsize;	/* old inode size */
+	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
+	xfs_dir2_sf_t		*sfp;		/* new sf directory */
+
+	xfs_dir2_trace_args("sf_toino4", args);
+	dp = args->dp;
+
+	/*
+	 * Copy the old directory to the buffer.
+	 * Then nuke it from the inode, and add the new buffer to the inode.
+	 * Don't want xfs_idata_realloc copying the data here.
+	 */
+	oldsize = dp->i_df.if_bytes;
+	buf = kmem_alloc(oldsize, KM_SLEEP);
+	oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(oldsfp->hdr.i8count == 1);
+	bcopy(oldsfp, buf, oldsize);
+	/*
+	 * Compute the new inode size.
+	 */
+	newsize =
+		oldsize -
+		(oldsfp->hdr.count + 1) *
+		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
+	xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
+	xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
+	/*
+	 * Reset our pointers, the data has moved.
+	 */
+	oldsfp = (xfs_dir2_sf_t *)buf;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	/*
+	 * Fill in the new header.
+	 */
+	sfp->hdr.count = oldsfp->hdr.count;
+	sfp->hdr.i8count = 0;
+	ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT);
+	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, &sfp->hdr.parent, ARCH_CONVERT);
+	/*
+	 * Copy the entries field by field.
+	 */
+	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
+		    oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+	     i < sfp->hdr.count;
+	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
+		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+		sfep->namelen = oldsfep->namelen;
+		sfep->offset = oldsfep->offset;
+		bcopy(oldsfep->name, sfep->name, sfep->namelen);
+		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
+			XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
+		XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+	}
+	/*
+	 * Clean up the inode.
+	 */
+	kmem_free(buf, oldsize);
+	dp->i_d.di_size = newsize;
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+}
+
+/*
+ * Convert from 4-byte inode numbers to 8-byte inode numbers.
+ * The new 8-byte inode number is not there yet, we leave with the
+ * count 1 but no corresponding entry.
+ */
+STATIC void
+xfs_dir2_sf_toino8(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	char			*buf;		/* old dir's buffer */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			i;		/* entry index */
+	xfs_ino_t		ino;		/* entry inode number */
+	int			newsize;	/* new inode size */
+	xfs_dir2_sf_entry_t	*oldsfep;	/* old sf entry */
+	xfs_dir2_sf_t		*oldsfp;	/* old sf directory */
+	int			oldsize;	/* old inode size */
+	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
+	xfs_dir2_sf_t		*sfp;		/* new sf directory */
+
+	xfs_dir2_trace_args("sf_toino8", args);
+	dp = args->dp;
+
+	/*
+	 * Copy the old directory to the buffer.
+	 * Then nuke it from the inode, and add the new buffer to the inode.
+	 * Don't want xfs_idata_realloc copying the data here.
+	 */
+	oldsize = dp->i_df.if_bytes;
+	buf = kmem_alloc(oldsize, KM_SLEEP);
+	oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	ASSERT(oldsfp->hdr.i8count == 0);
+	bcopy(oldsfp, buf, oldsize);
+	/*
+	 * Compute the new inode size.
+	 */
+	newsize =
+		oldsize +
+		(oldsfp->hdr.count + 1) *
+		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
+	xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
+	xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
+	/*
+	 * Reset our pointers, the data has moved.
+	 */
+	oldsfp = (xfs_dir2_sf_t *)buf;
+	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+	/*
+	 * Fill in the new header.
+	 */
+	sfp->hdr.count = oldsfp->hdr.count;
+	sfp->hdr.i8count = 1;
+	ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT);
+	XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, &sfp->hdr.parent, ARCH_CONVERT);
+	/*
+	 * Copy the entries field by field.
+	 */
+	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
+		    oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+	     i < sfp->hdr.count;
+	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
+		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+		sfep->namelen = oldsfep->namelen;
+		sfep->offset = oldsfep->offset;
+		bcopy(oldsfep->name, sfep->name, sfep->namelen);
+		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
+			XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
+		XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+	}
+	/*
+	 * Clean up the inode.
+	 */
+	kmem_free(buf, oldsize);
+	dp->i_d.di_size = newsize;
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+}
+#endif	/* XFS_BIG_FILESYSTEMS */
diff --git a/libxfs/xfs_dir_leaf.c b/libxfs/xfs_dir_leaf.c
new file mode 100644
index 000000000..40c12148e
--- /dev/null
+++ b/libxfs/xfs_dir_leaf.c
@@ -0,0 +1,1695 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_dir_leaf.c
+ *
+ * Routines to implement leaf blocks of directories as Btrees of hashed names.
+ */
+
+/*
+ * Validate a given inode number.
+ */
+int
+xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino)
+{
+	xfs_agblock_t	agblkno;
+	xfs_agino_t	agino;
+	xfs_agnumber_t	agno;
+	int		ino_ok;
+	int		ioff;
+
+	agno = XFS_INO_TO_AGNO(mp, ino);
+	agblkno = XFS_INO_TO_AGBNO(mp, ino);
+	ioff = XFS_INO_TO_OFFSET(mp, ino);
+	agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
+	ino_ok =
+		agno < mp->m_sb.sb_agcount &&
+		agblkno < mp->m_sb.sb_agblocks &&
+		agblkno != 0 &&
+		ioff < (1 << mp->m_sb.sb_inopblog) &&
+		XFS_AGINO_TO_INO(mp, agno, agino) == ino;
+	if (XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
+			XFS_RANDOM_DIR_INO_VALIDATE)) {
+		xfs_fs_cmn_err(CE_WARN, mp,
+			"Invalid inode number 0x%Lx\n", ino);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	return 0;
+}
+
+/*
+ * Create the initial contents of a shortform directory.
+ */
+int
+xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent)
+{
+	xfs_dir_sf_hdr_t *hdr;
+	xfs_inode_t *dp;
+
+	dp = args->dp;
+	ASSERT(dp != NULL);
+	ASSERT(dp->i_d.di_size == 0);
+	if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
+		dp->i_df.if_flags &= ~XFS_IFEXTENTS;	/* just in case */
+		dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+		dp->i_df.if_flags |= XFS_IFINLINE;
+	}
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	ASSERT(dp->i_df.if_bytes == 0);
+	xfs_idata_realloc(dp, sizeof(*hdr), XFS_DATA_FORK);
+	hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data;
+	XFS_DIR_SF_PUT_DIRINO_ARCH(&parent, &hdr->parent, ARCH_CONVERT);
+
+	INT_ZERO(hdr->count, ARCH_CONVERT);
+	dp->i_d.di_size = sizeof(*hdr);
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+	return(0);
+}
+
+/*
+ * Add a name to the shortform directory structure.
+ * Overflow from the inode has already been checked for.
+ */
+int
+xfs_dir_shortform_addname(xfs_da_args_t *args)
+{
+	xfs_dir_shortform_t *sf;
+	xfs_dir_sf_entry_t *sfe;
+	int i, offset, size;
+	xfs_inode_t *dp;
+
+	dp = args->dp;
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Catch the case where the conversion from shortform to leaf
+	 * failed part way through.
+	 */
+	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+	sfe = &sf->list[0];
+	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+		if (sfe->namelen == args->namelen &&
+		    args->name[0] == sfe->name[0] &&
+		    bcmp(args->name, sfe->name, args->namelen) == 0)
+			return(XFS_ERROR(EEXIST));
+		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+	}
+
+	offset = (int)((char *)sfe - (char *)sf);
+	size = XFS_DIR_SF_ENTSIZE_BYNAME(args->namelen);
+	xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+	sfe = (xfs_dir_sf_entry_t *)((char *)sf + offset);
+
+	XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT);
+	sfe->namelen = args->namelen;
+	bcopy(args->name, sfe->name, sfe->namelen);
+	INT_MOD(sf->hdr.count, ARCH_CONVERT, +1);
+
+	dp->i_d.di_size += size;
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+
+	return(0);
+}
+
+/*
+ * Remove a name from the shortform directory structure.
+ */
+int
+xfs_dir_shortform_removename(xfs_da_args_t *args)
+{
+	xfs_dir_shortform_t *sf;
+	xfs_dir_sf_entry_t *sfe;
+	int base, size, i;
+	xfs_inode_t *dp;
+
+	dp = args->dp;
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Catch the case where the conversion from shortform to leaf
+	 * failed part way through.
+	 */
+	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	base = sizeof(xfs_dir_sf_hdr_t);
+	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+	sfe = &sf->list[0];
+	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+		size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
+		if (sfe->namelen == args->namelen &&
+		    sfe->name[0] == args->name[0] &&
+		    bcmp(sfe->name, args->name, args->namelen) == 0)
+			break;
+		base += size;
+		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+	}
+	if (i < 0) {
+		ASSERT(args->oknoent);
+		return(XFS_ERROR(ENOENT));
+	}
+
+	if ((base + size) != dp->i_d.di_size) {
+		ovbcopy(&((char *)sf)[base+size], &((char *)sf)[base],
+					      dp->i_d.di_size - (base+size));
+	}
+	INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+
+	xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
+	dp->i_d.di_size -= size;
+	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+
+	return(0);
+}
+
+/*
+ * Look up a name in a shortform directory structure.
+ */
+int
+xfs_dir_shortform_lookup(xfs_da_args_t *args)
+{
+	xfs_dir_shortform_t *sf;
+	xfs_dir_sf_entry_t *sfe;
+	int i;
+	xfs_inode_t *dp;
+
+	dp = args->dp;
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Catch the case where the conversion from shortform to leaf
+	 * failed part way through.
+	 */
+	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+	if (args->namelen == 2 &&
+	    args->name[0] == '.' && args->name[1] == '.') {
+		XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, &args->inumber, ARCH_CONVERT);
+		return(XFS_ERROR(EEXIST));
+	}
+	if (args->namelen == 1 && args->name[0] == '.') {
+		args->inumber = dp->i_ino;
+		return(XFS_ERROR(EEXIST));
+	}
+	sfe = &sf->list[0];
+	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+		if (sfe->namelen == args->namelen &&
+		    sfe->name[0] == args->name[0] &&
+		    bcmp(args->name, sfe->name, args->namelen) == 0) {
+			XFS_DIR_SF_GET_DIRINO_ARCH(&sfe->inumber, &args->inumber, ARCH_CONVERT);
+			return(XFS_ERROR(EEXIST));
+		}
+		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+	}
+	ASSERT(args->oknoent);
+	return(XFS_ERROR(ENOENT));
+}
+
+/*
+ * Convert from using the shortform to the leaf.
+ */
+int
+xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
+{
+	xfs_inode_t *dp;
+	xfs_dir_shortform_t *sf;
+	xfs_dir_sf_entry_t *sfe;
+	xfs_da_args_t args;
+	xfs_ino_t inumber;
+	char *tmpbuffer;
+	int retval, i, size;
+	xfs_dablk_t blkno;
+	xfs_dabuf_t *bp;
+
+	dp = iargs->dp;
+	/*
+	 * Catch the case where the conversion from shortform to leaf
+	 * failed part way through.
+	 */
+	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	size = dp->i_df.if_bytes;
+	tmpbuffer = kmem_alloc(size, KM_SLEEP);
+	ASSERT(tmpbuffer != NULL);
+
+	bcopy(dp->i_df.if_u1.if_data, tmpbuffer, size);
+
+	sf = (xfs_dir_shortform_t *)tmpbuffer;
+	XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, &inumber, ARCH_CONVERT);
+        
+	xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
+	dp->i_d.di_size = 0;
+	xfs_trans_log_inode(iargs->trans, dp, XFS_ILOG_CORE);
+	retval = xfs_da_grow_inode(iargs, &blkno);
+	if (retval)
+		goto out;
+
+	ASSERT(blkno == 0);
+	retval = xfs_dir_leaf_create(iargs, blkno, &bp);
+	if (retval)
+		goto out;
+	xfs_da_buf_done(bp);
+
+	args.name = ".";
+	args.namelen = 1;
+	args.hashval = xfs_dir_hash_dot;
+	args.inumber = dp->i_ino;
+	args.dp = dp;
+	args.firstblock = iargs->firstblock;
+	args.flist = iargs->flist;
+	args.total = iargs->total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = iargs->trans;
+	args.justcheck = 0;
+	args.addname = args.oknoent = 1;
+	retval = xfs_dir_leaf_addname(&args);
+	if (retval)
+		goto out;
+
+	args.name = "..";
+	args.namelen = 2;
+	args.hashval = xfs_dir_hash_dotdot;
+	args.inumber = inumber;
+	retval = xfs_dir_leaf_addname(&args);
+	if (retval)
+		goto out;
+
+	sfe = &sf->list[0];
+	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+		args.name = (char *)(sfe->name);
+		args.namelen = sfe->namelen;
+		args.hashval = xfs_da_hashname((char *)(sfe->name),
+					       sfe->namelen);
+		XFS_DIR_SF_GET_DIRINO_ARCH(&sfe->inumber, &args.inumber, ARCH_CONVERT);
+		retval = xfs_dir_leaf_addname(&args);
+		if (retval)
+			goto out;
+		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+	}
+	retval = 0;
+
+out:
+	kmem_free(tmpbuffer, size);
+	return(retval);
+}
+
+/*
+ * Look up a name in a shortform directory structure, replace the inode number.
+ */
+int
+xfs_dir_shortform_replace(xfs_da_args_t *args)
+{
+	xfs_dir_shortform_t *sf;
+	xfs_dir_sf_entry_t *sfe;
+	xfs_inode_t *dp;
+	int i;
+
+	dp = args->dp;
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Catch the case where the conversion from shortform to leaf
+	 * failed part way through.
+	 */
+	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+		return XFS_ERROR(EIO);
+	}
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+	if (args->namelen == 2 &&
+	    args->name[0] == '.' && args->name[1] == '.') {
+                /* XXX - replace assert? */
+		XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sf->hdr.parent, ARCH_CONVERT);
+		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
+		return(0);
+	}
+	ASSERT(args->namelen != 1 || args->name[0] != '.');
+	sfe = &sf->list[0];
+	for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+		if (sfe->namelen == args->namelen &&
+		    sfe->name[0] == args->name[0] &&
+		    bcmp(args->name, sfe->name, args->namelen) == 0) {
+			ASSERT(bcmp((char *)&args->inumber,
+				(char *)&sfe->inumber, sizeof(xfs_ino_t)));
+			XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT);
+			xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
+			return(0);
+		}
+		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+	}
+	ASSERT(args->oknoent);
+	return(XFS_ERROR(ENOENT));
+}
+
+/*
+ * Convert a leaf directory to shortform structure
+ */
+int
+xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
+{
+	xfs_dir_leafblock_t *leaf;
+	xfs_dir_leaf_hdr_t *hdr;
+	xfs_dir_leaf_entry_t *entry;
+	xfs_dir_leaf_name_t *namest;
+	xfs_da_args_t args;
+	xfs_inode_t *dp;
+	xfs_ino_t parent;
+	char *tmpbuffer;
+	int retval, i;
+	xfs_dabuf_t *bp;
+
+	dp = iargs->dp;
+	tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
+	ASSERT(tmpbuffer != NULL);
+
+	retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp,
+					       XFS_DATA_FORK);
+	if (retval)
+		return(retval);
+	ASSERT(bp != NULL);
+	bcopy(bp->data, tmpbuffer, XFS_LBSIZE(dp->i_mount));
+	leaf = (xfs_dir_leafblock_t *)tmpbuffer;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	bzero(bp->data, XFS_LBSIZE(dp->i_mount));
+
+	/*
+	 * Find and special case the parent inode number
+	 */
+	hdr = &leaf->hdr;
+	entry = &leaf->entries[0];
+	for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+		if ((entry->namelen == 2) &&
+		    (namest->name[0] == '.') &&
+		    (namest->name[1] == '.')) {
+			XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &parent, ARCH_CONVERT);
+			INT_ZERO(entry->nameidx, ARCH_CONVERT);
+		} else if ((entry->namelen == 1) && (namest->name[0] == '.')) {
+			INT_ZERO(entry->nameidx, ARCH_CONVERT);
+		}
+	}
+	retval = xfs_da_shrink_inode(iargs, 0, bp);
+	if (retval)
+		goto out;
+	retval = xfs_dir_shortform_create(iargs, parent);
+	if (retval)
+		goto out;
+
+	/*
+	 * Copy the rest of the filenames
+	 */
+	entry = &leaf->entries[0];
+	args.dp = dp;
+	args.firstblock = iargs->firstblock;
+	args.flist = iargs->flist;
+	args.total = iargs->total;
+	args.whichfork = XFS_DATA_FORK;
+	args.trans = iargs->trans;
+	args.justcheck = 0;
+	args.addname = args.oknoent = 1;
+	for (i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) {
+		if (INT_GET(entry->nameidx, ARCH_CONVERT) == 0)
+			continue;
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+		args.name = (char *)(namest->name);
+		args.namelen = entry->namelen;
+		args.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+		XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &args.inumber, ARCH_CONVERT);
+		xfs_dir_shortform_addname(&args);
+	}
+
+out:
+	kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
+	return(retval);
+}
+
+/*
+ * Convert from using a single leaf to a root node and a leaf.
+ */
+int
+xfs_dir_leaf_to_node(xfs_da_args_t *args)
+{
+	xfs_dir_leafblock_t *leaf;
+	xfs_da_intnode_t *node;
+	xfs_inode_t *dp;
+	xfs_dabuf_t *bp1, *bp2;
+	xfs_dablk_t blkno;
+	int retval;
+
+	dp = args->dp;
+	retval = xfs_da_grow_inode(args, &blkno);
+	ASSERT(blkno == 1);
+	if (retval)
+		return(retval);
+	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
+					      XFS_DATA_FORK);
+	if (retval)
+		return(retval);
+	ASSERT(bp1 != NULL);
+	retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2,
+					     XFS_DATA_FORK);
+	if (retval) {
+		xfs_da_buf_done(bp1);
+		return(retval);
+	}
+	ASSERT(bp2 != NULL);
+	bcopy(bp1->data, bp2->data, XFS_LBSIZE(dp->i_mount));
+	xfs_da_buf_done(bp1);
+	xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
+
+	/*
+	 * Set up the new root node.
+	 */
+	retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK);
+	if (retval) {
+		xfs_da_buf_done(bp2);
+		return(retval);
+	}
+	node = bp1->data;
+	leaf = bp2->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+	xfs_da_buf_done(bp2);
+	INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
+	INT_SET(node->hdr.count, ARCH_CONVERT, 1);
+	xfs_da_log_buf(args->trans, bp1,
+		XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
+	xfs_da_buf_done(bp1);
+
+	return(retval);
+}
+
+
+/*========================================================================
+ * Routines used for growing the Btree.
+ *========================================================================*/
+
+/*
+ * Create the initial contents of a leaf directory
+ * or a leaf in a node directory.
+ */
+int
+xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
+{
+	xfs_dir_leafblock_t *leaf;
+	xfs_dir_leaf_hdr_t *hdr;
+	xfs_inode_t *dp;
+	xfs_dabuf_t *bp;
+	int retval;
+
+	dp = args->dp;
+	ASSERT(dp != NULL);
+	retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK);
+	if (retval)
+		return(retval);
+	ASSERT(bp != NULL);
+	leaf = bp->data;
+	bzero((char *)leaf, XFS_LBSIZE(dp->i_mount));
+	hdr = &leaf->hdr;
+	INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_DIR_LEAF_MAGIC);
+	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
+	if (INT_ISZERO(hdr->firstused, ARCH_CONVERT))
+		INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1);
+	INT_SET(hdr->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
+	INT_SET(hdr->freemap[0].size, ARCH_CONVERT, INT_GET(hdr->firstused, ARCH_CONVERT) - INT_GET(hdr->freemap[0].base, ARCH_CONVERT));
+
+	xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
+
+	*bpp = bp;
+	return(0);
+}
+
+/*
+ * Split the leaf node, rebalance, then add the new entry.
+ */
+int
+xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
+				  xfs_da_state_blk_t *newblk)
+{
+	xfs_dablk_t blkno;
+	xfs_da_args_t *args;
+	int error;
+
+	/*
+	 * Allocate space for a new leaf node.
+	 */
+	args = state->args;
+	ASSERT(args != NULL);
+	ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC);
+	error = xfs_da_grow_inode(args, &blkno);
+	if (error)
+		return(error);
+	error = xfs_dir_leaf_create(args, blkno, &newblk->bp);
+	if (error)
+		return(error);
+	newblk->blkno = blkno;
+	newblk->magic = XFS_DIR_LEAF_MAGIC;
+
+	/*
+	 * Rebalance the entries across the two leaves.
+	 */
+	xfs_dir_leaf_rebalance(state, oldblk, newblk);
+	error = xfs_da_blk_link(state, oldblk, newblk);
+	if (error)
+		return(error);
+
+	/*
+	 * Insert the new entry in the correct block.
+	 */
+	if (state->inleaf) {
+		error = xfs_dir_leaf_add(oldblk->bp, args, oldblk->index);
+	} else {
+		error = xfs_dir_leaf_add(newblk->bp, args, newblk->index);
+	}
+
+	/*
+	 * Update last hashval in each block since we added the name.
+	 */
+	oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL);
+	newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL);
+	return(error);
+}
+
+/*
+ * Add a name to the leaf directory structure.
+ *
+ * Must take into account fragmented leaves and leaves where spacemap has
+ * lost some freespace information (ie: holes).
+ */
+int
+xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
+{
+	xfs_dir_leafblock_t *leaf;
+	xfs_dir_leaf_hdr_t *hdr;
+	xfs_dir_leaf_map_t *map;
+	int tablesize, entsize, sum, i, tmp, error;
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
+	hdr = &leaf->hdr;
+	entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen);
+
+	/*
+	 * Search through freemap for first-fit on new name length.
+	 * (may need to figure in size of entry struct too)
+	 */
+	tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1) * (uint)sizeof(xfs_dir_leaf_entry_t)
+			+ (uint)sizeof(xfs_dir_leaf_hdr_t);
+	map = &hdr->freemap[XFS_DIR_LEAF_MAPSIZE-1];
+	for (sum = 0, i = XFS_DIR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
+		if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
+			sum += INT_GET(map->size, ARCH_CONVERT);
+			continue;
+		}
+		if (INT_GET(map->size, ARCH_CONVERT) == 0)
+			continue;	/* no space in this map */
+		tmp = entsize;
+		if (INT_GET(map->base, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
+			tmp += (uint)sizeof(xfs_dir_leaf_entry_t);
+		if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
+			if (!args->justcheck)
+				xfs_dir_leaf_add_work(bp, args, index, i);
+			return(0);
+		}
+		sum += INT_GET(map->size, ARCH_CONVERT);
+	}
+
+	/*
+	 * If there are no holes in the address space of the block,
+	 * and we don't have enough freespace, then compaction will do us
+	 * no good and we should just give up.
+	 */
+	if (!hdr->holes && (sum < entsize))
+		return(XFS_ERROR(ENOSPC));
+
+	/*
+	 * Compact the entries to coalesce free space.
+	 * Pass the justcheck flag so the checking pass can return 
+	 * an error, without changing anything, if it won't fit.
+	 */
+	error = xfs_dir_leaf_compact(args->trans, bp,
+			args->total == 0 ?
+				entsize +
+				(uint)sizeof(xfs_dir_leaf_entry_t) : 0,
+			args->justcheck);
+	if (error)
+		return(error);
+	/*
+	 * After compaction, the block is guaranteed to have only one
+	 * free region, in freemap[0].  If it is not big enough, give up.
+	 */
+	if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) <
+	    (entsize + (uint)sizeof(xfs_dir_leaf_entry_t)))
+		return(XFS_ERROR(ENOSPC));
+
+	if (!args->justcheck)
+		xfs_dir_leaf_add_work(bp, args, index, 0);
+	return(0);
+}
+
+/*
+ * Add a name to a leaf directory structure.
+ */
+STATIC void
+xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
+		      int mapindex)
+{
+	xfs_dir_leafblock_t *leaf;
+	xfs_dir_leaf_hdr_t *hdr;
+	xfs_dir_leaf_entry_t *entry;
+	xfs_dir_leaf_name_t *namest;
+	xfs_dir_leaf_map_t *map;
+	/* REFERENCED */
+	xfs_mount_t *mp;
+	int tmp, i;
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	hdr = &leaf->hdr;
+	ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE));
+	ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT)));
+
+	/*
+	 * Force open some space in the entry array and fill it in.
+	 */
+	entry = &leaf->entries[index];
+	if (index < INT_GET(hdr->count, ARCH_CONVERT)) {
+		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - index;
+		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
+		ovbcopy(entry, entry + 1, tmp);
+		xfs_da_log_buf(args->trans, bp,
+		    XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
+	}
+	INT_MOD(hdr->count, ARCH_CONVERT, +1);
+
+	/*
+	 * Allocate space for the new string (at the end of the run).
+	 */
+	map = &hdr->freemap[mapindex];
+	mp = args->trans->t_mountp;
+	ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+	ASSERT(INT_GET(map->size, ARCH_CONVERT) >= XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen));
+	ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+	INT_MOD(map->size, ARCH_CONVERT, -(XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen)));
+	INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT));
+	INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
+	entry->namelen = args->namelen;
+	xfs_da_log_buf(args->trans, bp,
+	    XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
+
+	/*
+	 * Copy the string and inode number into the new space.
+	 */
+	namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+	XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &namest->inumber, ARCH_CONVERT);
+	bcopy(args->name, namest->name, args->namelen);
+	xfs_da_log_buf(args->trans, bp,
+	    XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)));
+
+	/*
+	 * Update the control info for this leaf node
+	 */
+	if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
+		INT_COPY(hdr->firstused, entry->nameidx, ARCH_CONVERT);
+	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
+	tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1) * (uint)sizeof(xfs_dir_leaf_entry_t)
+			+ (uint)sizeof(xfs_dir_leaf_hdr_t);
+	map = &hdr->freemap[0];
+	for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
+		if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
+			INT_MOD(map->base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
+			INT_MOD(map->size, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
+		}
+	}
+	INT_MOD(hdr->namebytes, ARCH_CONVERT, args->namelen);
+	xfs_da_log_buf(args->trans, bp,
+		XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
+}
+
+/*
+ * Garbage collect a leaf directory block by copying it to a new buffer.
+ */
+STATIC int
+xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
+		     int justcheck)
+{
+	xfs_dir_leafblock_t *leaf_s, *leaf_d;
+	xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
+	xfs_mount_t *mp;
+	char *tmpbuffer;
+	char *tmpbuffer2;
+	int rval;
+	int lbsize;
+
+	mp = trans->t_mountp;
+	lbsize = XFS_LBSIZE(mp);
+	tmpbuffer = kmem_alloc(lbsize, KM_SLEEP);
+	ASSERT(tmpbuffer != NULL);
+	bcopy(bp->data, tmpbuffer, lbsize);
+
+	/*
+	 * Make a second copy in case xfs_dir_leaf_moveents()
+	 * below destroys the original.
+	 */
+	if (musthave || justcheck) {
+	        tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP);
+	  	bcopy(bp->data, tmpbuffer2, lbsize);
+	} 
+	bzero(bp->data, lbsize);
+
+	/*
+	 * Copy basic information
+	 */
+	leaf_s = (xfs_dir_leafblock_t *)tmpbuffer;
+	leaf_d = bp->data;
+	hdr_s = &leaf_s->hdr;
+	hdr_d = &leaf_d->hdr;
+	hdr_d->info = hdr_s->info;	/* struct copy */
+	INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize);
+	if (INT_GET(hdr_d->firstused, ARCH_CONVERT) == 0)
+		INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize - 1);
+	INT_ZERO(hdr_d->namebytes, ARCH_CONVERT);
+	INT_ZERO(hdr_d->count, ARCH_CONVERT);
+	hdr_d->holes = 0;
+	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
+	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+
+	/*
+	 * Copy all entry's in the same (sorted) order,
+	 * but allocate filenames packed and in sequence.
+	 * This changes the source (leaf_s) as well.
+	 */
+	xfs_dir_leaf_moveents(leaf_s, 0, leaf_d, 0, (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
+
+	if (musthave && INT_GET(hdr_d->freemap[0].size, ARCH_CONVERT) < musthave)
+		rval = XFS_ERROR(ENOSPC);
+	else
+		rval = 0;
+	
+	if (justcheck || rval == ENOSPC) {
+	        ASSERT(tmpbuffer2);
+		bcopy(tmpbuffer2, bp->data, lbsize);
+	} else {
+		xfs_da_log_buf(trans, bp, 0, lbsize - 1);
+	}
+
+	kmem_free(tmpbuffer, lbsize);
+	if (musthave || justcheck)
+	  	kmem_free(tmpbuffer2, lbsize);
+	return(rval);
+}
+
+/*
+ * Redistribute the directory entries between two leaf nodes,
+ * taking into account the size of the new entry.
+ *
+ * NOTE: if new block is empty, then it will get the upper half of old block.
+ */
+STATIC void
+xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
+				      xfs_da_state_blk_t *blk2)
+{
+	xfs_da_state_blk_t *tmp_blk;
+	xfs_dir_leafblock_t *leaf1, *leaf2;
+	xfs_dir_leaf_hdr_t *hdr1, *hdr2;
+	int count, totallen, max, space, swap;
+
+	/*
+	 * Set up environment.
+	 */
+	ASSERT(blk1->magic == XFS_DIR_LEAF_MAGIC);
+	ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC);
+	leaf1 = blk1->bp->data;
+	leaf2 = blk2->bp->data;
+	ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+
+	/*
+	 * Check ordering of blocks, reverse if it makes things simpler.
+	 */
+	swap = 0;
+	if (xfs_dir_leaf_order(blk1->bp, blk2->bp)) {
+		tmp_blk = blk1;
+		blk1 = blk2;
+		blk2 = tmp_blk;
+		leaf1 = blk1->bp->data;
+		leaf2 = blk2->bp->data;
+		swap = 1;
+	}
+	hdr1 = &leaf1->hdr;
+	hdr2 = &leaf2->hdr;
+
+	/*
+	 * Examine entries until we reduce the absolute difference in
+	 * byte usage between the two blocks to a minimum.  Then get
+	 * the direction to copy and the number of elements to move.
+	 */
+	state->inleaf = xfs_dir_leaf_figure_balance(state, blk1, blk2,
+							   &count, &totallen);
+	if (swap)
+		state->inleaf = !state->inleaf;
+
+	/*
+	 * Move any entries required from leaf to leaf:
+	 */
+	if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
+		/*
+		 * Figure the total bytes to be added to the destination leaf.
+		 */
+		count = INT_GET(hdr1->count, ARCH_CONVERT) - count;	/* number entries being moved */
+		space  = INT_GET(hdr1->namebytes, ARCH_CONVERT) - totallen;
+		space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
+		space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
+
+		/*
+		 * leaf2 is the destination, compact it if it looks tight.
+		 */
+		max  = INT_GET(hdr2->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
+		max -= INT_GET(hdr2->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
+		if (space > max) {
+			xfs_dir_leaf_compact(state->args->trans, blk2->bp,
+								 0, 0);
+		}
+
+		/*
+		 * Move high entries from leaf1 to low end of leaf2.
+		 */
+		xfs_dir_leaf_moveents(leaf1, INT_GET(hdr1->count, ARCH_CONVERT) - count,
+					     leaf2, 0, count, state->mp);
+
+		xfs_da_log_buf(state->args->trans, blk1->bp, 0,
+						   state->blocksize-1);
+		xfs_da_log_buf(state->args->trans, blk2->bp, 0,
+						   state->blocksize-1);
+
+	} else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
+		/*
+		 * Figure the total bytes to be added to the destination leaf.
+		 */
+		count -= INT_GET(hdr1->count, ARCH_CONVERT);		/* number entries being moved */
+		space  = totallen - INT_GET(hdr1->namebytes, ARCH_CONVERT);
+		space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
+		space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
+
+		/*
+		 * leaf1 is the destination, compact it if it looks tight.
+		 */
+		max  = INT_GET(hdr1->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
+		max -= INT_GET(hdr1->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
+		if (space > max) {
+			xfs_dir_leaf_compact(state->args->trans, blk1->bp,
+								 0, 0);
+		}
+
+		/*
+		 * Move low entries from leaf2 to high end of leaf1.
+		 */
+		xfs_dir_leaf_moveents(leaf2, 0, leaf1, (int)INT_GET(hdr1->count, ARCH_CONVERT),
+					     count, state->mp);
+
+		xfs_da_log_buf(state->args->trans, blk1->bp, 0,
+						   state->blocksize-1);
+		xfs_da_log_buf(state->args->trans, blk2->bp, 0,
+						   state->blocksize-1);
+	}
+
+	/*
+	 * Copy out last hashval in each block for B-tree code.
+	 */
+	blk1->hashval = INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+	blk2->hashval = INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+
+	/*
+	 * Adjust the expected index for insertion.
+	 * GROT: this doesn't work unless blk2 was originally empty.
+	 */
+	if (!state->inleaf) {
+		blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+	}
+}
+
+/*
+ * Examine entries until we reduce the absolute difference in
+ * byte usage between the two blocks to a minimum.
+ * GROT: Is this really necessary?  With other than a 512 byte blocksize,
+ * GROT: there will always be enough room in either block for a new entry.
+ * GROT: Do a double-split for this case?
+ */
+STATIC int
+xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
+					   xfs_da_state_blk_t *blk1,
+					   xfs_da_state_blk_t *blk2,
+					   int *countarg, int *namebytesarg)
+{
+	xfs_dir_leafblock_t *leaf1, *leaf2;
+	xfs_dir_leaf_hdr_t *hdr1, *hdr2;
+	xfs_dir_leaf_entry_t *entry;
+	int count, max, totallen, half;
+	int lastdelta, foundit, tmp;
+
+	/*
+	 * Set up environment.
+	 */
+	leaf1 = blk1->bp->data;
+	leaf2 = blk2->bp->data;
+	hdr1 = &leaf1->hdr;
+	hdr2 = &leaf2->hdr;
+	foundit = 0;
+	totallen = 0;
+
+	/*
+	 * Examine entries until we reduce the absolute difference in
+	 * byte usage between the two blocks to a minimum.
+	 */
+	max = INT_GET(hdr1->count, ARCH_CONVERT) + INT_GET(hdr2->count, ARCH_CONVERT);
+	half  = (max+1) * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
+	half += INT_GET(hdr1->namebytes, ARCH_CONVERT) + INT_GET(hdr2->namebytes, ARCH_CONVERT) + state->args->namelen;
+	half /= 2;
+	lastdelta = state->blocksize;
+	entry = &leaf1->entries[0];
+	for (count = 0; count < max; entry++, count++) {
+
+#define XFS_DIR_ABS(A)	(((A) < 0) ? -(A) : (A))
+		/*
+		 * The new entry is in the first block, account for it.
+		 */
+		if (count == blk1->index) {
+			tmp = totallen + (uint)sizeof(*entry)
+				+ XFS_DIR_LEAF_ENTSIZE_BYNAME(state->args->namelen);
+			if (XFS_DIR_ABS(half - tmp) > lastdelta)
+				break;
+			lastdelta = XFS_DIR_ABS(half - tmp);
+			totallen = tmp;
+			foundit = 1;
+		}
+
+		/*
+		 * Wrap around into the second block if necessary.
+		 */
+		if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
+			leaf1 = leaf2;
+			entry = &leaf1->entries[0];
+		}
+
+		/*
+		 * Figure out if next leaf entry would be too much.
+		 */
+		tmp = totallen + (uint)sizeof(*entry)
+				+ XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
+		if (XFS_DIR_ABS(half - tmp) > lastdelta)
+			break;
+		lastdelta = XFS_DIR_ABS(half - tmp);
+		totallen = tmp;
+#undef XFS_DIR_ABS
+	}
+
+	/*
+	 * Calculate the number of namebytes that will end up in lower block.
+	 * If new entry not in lower block, fix up the count.
+	 */
+	totallen -=
+		count * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
+	if (foundit) {
+		totallen -= (sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1) +
+			    state->args->namelen;
+	}
+
+	*countarg = count;
+	*namebytesarg = totallen;
+	return(foundit);
+}
+
+/*========================================================================
+ * Routines used for shrinking the Btree.
+ *========================================================================*/
+
+/*
+ * Check a leaf block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ */
+int
+xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
+{
+	xfs_dir_leafblock_t *leaf;
+	xfs_da_state_blk_t *blk;
+	xfs_da_blkinfo_t *info;
+	int count, bytes, forward, error, retval, i;
+	xfs_dablk_t blkno;
+	xfs_dabuf_t *bp;
+
+	/*
+	 * Check for the degenerate case of the block being over 50% full.
+	 * If so, it's not worth even looking to see if we might be able
+	 * to coalesce with a sibling.
+	 */
+	blk = &state->path.blk[ state->path.active-1 ];
+	info = blk->bp->data;
+	ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	leaf = (xfs_dir_leafblock_t *)info;
+	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) +
+		count * (uint)sizeof(xfs_dir_leaf_entry_t) +
+		count * ((uint)sizeof(xfs_dir_leaf_name_t)-1) +
+		INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+	if (bytes > (state->blocksize >> 1)) {
+		*action = 0;	/* blk over 50%, dont try to join */
+		return(0);
+	}
+
+	/*
+	 * Check for the degenerate case of the block being empty.
+	 * If the block is empty, we'll simply delete it, no need to
+	 * coalesce it with a sibling block.  We choose (aribtrarily)
+	 * to merge with the forward block unless it is NULL.
+	 */
+	if (count == 0) {
+		/*
+		 * Make altpath point to the block we want to keep and
+		 * path point to the block we want to drop (this one).
+		 */
+		forward = !INT_ISZERO(info->forw, ARCH_CONVERT);
+		bcopy(&state->path, &state->altpath, sizeof(state->path));
+		error = xfs_da_path_shift(state, &state->altpath, forward,
+						 0, &retval);
+		if (error)
+			return(error);
+		if (retval) {
+			*action = 0;
+		} else {
+			*action = 2;
+		}
+		return(0);
+	}
+
+	/*
+	 * Examine each sibling block to see if we can coalesce with
+	 * at least 25% free space to spare.  We need to figure out
+	 * whether to merge with the forward or the backward block.
+	 * We prefer coalescing with the lower numbered sibling so as
+	 * to shrink a directory over time.
+	 */
+	forward = (INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT));	/* start with smaller blk num */
+	for (i = 0; i < 2; forward = !forward, i++) {
+		if (forward)
+			blkno = INT_GET(info->forw, ARCH_CONVERT);
+		else
+			blkno = INT_GET(info->back, ARCH_CONVERT);
+		if (blkno == 0)
+			continue;
+		error = xfs_da_read_buf(state->args->trans, state->args->dp,
+							    blkno, -1, &bp,
+							    XFS_DATA_FORK);
+		if (error)
+			return(error);
+		ASSERT(bp != NULL);
+
+		leaf = (xfs_dir_leafblock_t *)info;
+		count  = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		bytes  = state->blocksize - (state->blocksize>>2);
+		bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+		leaf = bp->data;
+		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+		bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
+		bytes -= count * (uint)sizeof(xfs_dir_leaf_entry_t);
+		bytes -= (uint)sizeof(xfs_dir_leaf_hdr_t);
+		if (bytes >= 0)
+			break;	/* fits with at least 25% to spare */
+
+		xfs_da_brelse(state->args->trans, bp);
+	}
+	if (i >= 2) {
+		*action = 0;
+		return(0);
+	}
+	xfs_da_buf_done(bp);
+
+	/*
+	 * Make altpath point to the block we want to keep (the lower
+	 * numbered block) and path point to the block we want to drop.
+	 */
+	bcopy(&state->path, &state->altpath, sizeof(state->path));
+	if (blkno < blk->blkno) {
+		error = xfs_da_path_shift(state, &state->altpath, forward,
+						 0, &retval);
+	} else {
+		error = xfs_da_path_shift(state, &state->path, forward,
+						 0, &retval);
+	}
+	if (error)
+		return(error);
+	if (retval) {
+		*action = 0;
+	} else {
+		*action = 1;
+	}
+	return(0);
+}
+
+/*
+ * Remove a name from the leaf directory structure.
+ *
+ * Return 1 if leaf is less than 37% full, 0 if >= 37% full.
+ * If two leaves are 37% full, when combined they will leave 25% free.
+ */
+int
+xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
+{
+	xfs_dir_leafblock_t *leaf;
+	xfs_dir_leaf_hdr_t *hdr;
+	xfs_dir_leaf_map_t *map;
+	xfs_dir_leaf_entry_t *entry;
+	xfs_dir_leaf_name_t *namest;
+	int before, after, smallest, entsize;
+	int tablesize, tmp, i;
+	xfs_mount_t *mp;
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	hdr = &leaf->hdr;
+	mp = trans->t_mountp;
+	ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
+	ASSERT((index >= 0) && (index < INT_GET(hdr->count, ARCH_CONVERT)));
+	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
+	entry = &leaf->entries[index];
+	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
+	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
+
+	/*
+	 * Scan through free region table:
+	 *    check for adjacency of free'd entry with an existing one,
+	 *    find smallest free region in case we need to replace it,
+	 *    adjust any map that borders the entry table,
+	 */
+	tablesize = INT_GET(hdr->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
+			+ (uint)sizeof(xfs_dir_leaf_hdr_t);
+	map = &hdr->freemap[0];
+	tmp = INT_GET(map->size, ARCH_CONVERT);
+	before = after = -1;
+	smallest = XFS_DIR_LEAF_MAPSIZE - 1;
+	entsize = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
+	for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
+		ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+		ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+		if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
+			INT_MOD(map->base, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
+			INT_MOD(map->size, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
+		}
+
+		if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == INT_GET(entry->nameidx, ARCH_CONVERT)) {
+			before = i;
+		} else if (INT_GET(map->base, ARCH_CONVERT) == (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
+			after = i;
+		} else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
+			tmp = INT_GET(map->size, ARCH_CONVERT);
+			smallest = i;
+		}
+	}
+
+	/*
+	 * Coalesce adjacent freemap regions,
+	 * or replace the smallest region.
+	 */
+	if ((before >= 0) || (after >= 0)) {
+		if ((before >= 0) && (after >= 0)) {
+			map = &hdr->freemap[before];
+			INT_MOD(map->size, ARCH_CONVERT, entsize);
+			INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT));
+			INT_ZERO(hdr->freemap[after].base, ARCH_CONVERT);
+			INT_ZERO(hdr->freemap[after].size, ARCH_CONVERT);
+		} else if (before >= 0) {
+			map = &hdr->freemap[before];
+			INT_MOD(map->size, ARCH_CONVERT, entsize);
+		} else {
+			map = &hdr->freemap[after];
+			INT_COPY(map->base, entry->nameidx, ARCH_CONVERT); 
+			INT_MOD(map->size, ARCH_CONVERT, entsize);
+		}
+	} else {
+		/*
+		 * Replace smallest region (if it is smaller than free'd entry)
+		 */
+		map = &hdr->freemap[smallest];
+		if (INT_GET(map->size, ARCH_CONVERT) < entsize) {
+			INT_COPY(map->base, entry->nameidx, ARCH_CONVERT); 
+			INT_SET(map->size, ARCH_CONVERT, entsize);
+		}
+	}
+
+	/*
+	 * Did we remove the first entry?
+	 */
+	if (INT_GET(entry->nameidx, ARCH_CONVERT) == INT_GET(hdr->firstused, ARCH_CONVERT))
+		smallest = 1;
+	else
+		smallest = 0;
+
+	/*
+	 * Compress the remaining entries and zero out the removed stuff.
+	 */
+	namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+	bzero((char *)namest, entsize);
+	xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize));
+
+	INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen));
+	tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t);
+	ovbcopy(entry + 1, entry, tmp);
+	INT_MOD(hdr->count, ARCH_CONVERT, -1);
+	xfs_da_log_buf(trans, bp,
+	    XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
+	entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
+	bzero((char *)entry, sizeof(xfs_dir_leaf_entry_t));
+
+	/*
+	 * If we removed the first entry, re-find the first used byte
+	 * in the name area.  Note that if the entry was the "firstused",
+	 * then we don't have a "hole" in our block resulting from
+	 * removing the name.
+	 */
+	if (smallest) {
+		tmp = XFS_LBSIZE(mp);
+		entry = &leaf->entries[0];
+		for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
+			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
+			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
+			if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
+				tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
+		}
+		INT_SET(hdr->firstused, ARCH_CONVERT, tmp);
+		if (INT_GET(hdr->firstused, ARCH_CONVERT) == 0)
+			INT_SET(hdr->firstused, ARCH_CONVERT, tmp - 1);
+	} else {
+		hdr->holes = 1;		/* mark as needing compaction */
+	}
+
+	xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
+
+	/*
+	 * Check if leaf is less than 50% full, caller may want to
+	 * "join" the leaf with a sibling if so.
+	 */
+	tmp  = (uint)sizeof(xfs_dir_leaf_hdr_t);
+	tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
+	tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
+	tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+	if (tmp < mp->m_dir_magicpct)
+		return(1);			/* leaf is < 37% full */
+	return(0);
+}
+
+/*
+ * Move all the directory entries from drop_leaf into save_leaf.
+ */
+void
+xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+				      xfs_da_state_blk_t *save_blk)
+{
+	xfs_dir_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf;
+	xfs_dir_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr;
+	xfs_mount_t *mp;
+	char *tmpbuffer;
+
+	/*
+	 * Set up environment.
+	 */
+	mp = state->mp;
+	ASSERT(drop_blk->magic == XFS_DIR_LEAF_MAGIC);
+	ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC);
+	drop_leaf = drop_blk->bp->data;
+	save_leaf = save_blk->bp->data;
+	ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	drop_hdr = &drop_leaf->hdr;
+	save_hdr = &save_leaf->hdr;
+
+	/*
+	 * Save last hashval from dying block for later Btree fixup.
+	 */
+	drop_blk->hashval = INT_GET(drop_leaf->entries[ drop_leaf->hdr.count-1 ].hashval, ARCH_CONVERT);
+
+	/*
+	 * Check if we need a temp buffer, or can we do it in place.
+	 * Note that we don't check "leaf" for holes because we will
+	 * always be dropping it, toosmall() decided that for us already.
+	 */
+	if (save_hdr->holes == 0) {
+		/*
+		 * dest leaf has no holes, so we add there.  May need
+		 * to make some room in the entry array.
+		 */
+		if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
+			xfs_dir_leaf_moveents(drop_leaf, 0, save_leaf, 0,
+						 (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+		} else {
+			xfs_dir_leaf_moveents(drop_leaf, 0,
+					      save_leaf, INT_GET(save_hdr->count, ARCH_CONVERT),
+					      (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+		}
+	} else {
+		/*
+		 * Destination has holes, so we make a temporary copy
+		 * of the leaf and add them both to that.
+		 */
+		tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
+		ASSERT(tmpbuffer != NULL);
+		bzero(tmpbuffer, state->blocksize);
+		tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer;
+		tmp_hdr = &tmp_leaf->hdr;
+		tmp_hdr->info = save_hdr->info;	/* struct copy */
+		INT_ZERO(tmp_hdr->count, ARCH_CONVERT);
+		INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
+		if (INT_GET(tmp_hdr->firstused, ARCH_CONVERT) == 0)
+			INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize - 1);
+		INT_ZERO(tmp_hdr->namebytes, ARCH_CONVERT);
+		if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
+			xfs_dir_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
+						 (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+			xfs_dir_leaf_moveents(save_leaf, 0,
+					      tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
+					      (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
+		} else {
+			xfs_dir_leaf_moveents(save_leaf, 0, tmp_leaf, 0,	
+						 (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
+			xfs_dir_leaf_moveents(drop_leaf, 0,
+					      tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
+					      (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+		}
+		bcopy(tmp_leaf, save_leaf, state->blocksize);
+		kmem_free(tmpbuffer, state->blocksize);
+	}
+
+	xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
+					   state->blocksize - 1);
+
+	/*
+	 * Copy out last hashval in each block for B-tree code.
+	 */
+	save_blk->hashval = INT_GET(save_leaf->entries[ INT_GET(save_leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+}
+
+
+/*========================================================================
+ * Routines used for finding things in the Btree.
+ *========================================================================*/
+
+/*
+ * Look up a name in a leaf directory structure.
+ * This is the internal routine, it uses the caller's buffer.
+ *
+ * Note that duplicate keys are allowed, but only check within the
+ * current leaf node.  The Btree code must check in adjacent leaf nodes.
+ *
+ * Return in *index the index into the entry[] array of either the found
+ * entry, or where the entry should have been (insert before that entry).
+ *
+ * Don't change the args->inumber unless we find the filename.
+ */
+int
+xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
+{
+	xfs_dir_leafblock_t *leaf;
+	xfs_dir_leaf_entry_t *entry;
+	xfs_dir_leaf_name_t *namest;
+	int probe, span;
+	xfs_dahash_t hashval;
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8));
+
+	/*
+	 * Binary search.  (note: small blocks will skip this loop)
+	 */
+	hashval = args->hashval;
+	probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2;
+	for (entry = &leaf->entries[probe]; span > 4;
+		   entry = &leaf->entries[probe]) {
+		span /= 2;
+		if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)
+			probe += span;
+		else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval)
+			probe -= span;
+		else
+			break;
+	}
+	ASSERT((probe >= 0) && \
+	       ((INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0) || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))));
+	ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT) == hashval));
+
+	/*
+	 * Since we may have duplicate hashval's, find the first matching
+	 * hashval in the leaf.
+	 */
+	while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT) >= hashval)) {
+		entry--;
+		probe--;
+	}
+	while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
+		entry++;
+		probe++;
+	}
+	if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
+		*index = probe;
+		ASSERT(args->oknoent);
+		return(XFS_ERROR(ENOENT));
+	}
+
+	/*
+	 * Duplicate keys may be present, so search all of them for a match.
+	 */
+	while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)) {
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+		if (entry->namelen == args->namelen &&
+		    namest->name[0] == args->name[0] &&
+		    bcmp(args->name, namest->name, args->namelen) == 0) {
+			XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &args->inumber, ARCH_CONVERT);
+			*index = probe;
+			return(XFS_ERROR(EEXIST));
+		}
+		entry++;
+		probe++;
+	}
+	*index = probe;
+	ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
+	return(XFS_ERROR(ENOENT));
+}
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Move the indicated entries from one leaf to another.
+ * NOTE: this routine modifies both source and destination leaves.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
+		      xfs_dir_leafblock_t *leaf_d, int start_d,
+		      int count, xfs_mount_t *mp)
+{
+	xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
+	xfs_dir_leaf_entry_t *entry_s, *entry_d;
+	int tmp, i;
+
+	/*
+	 * Check for nothing to do.
+	 */
+	if (count == 0)
+		return;
+
+	/*
+	 * Set up environment.
+	 */
+	ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	hdr_s = &leaf_s->hdr;
+	hdr_d = &leaf_d->hdr;
+	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
+	ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >= 
+		((INT_GET(hdr_s->count, ARCH_CONVERT)*sizeof(*entry_s))+sizeof(*hdr_s)));
+	ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
+	ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= 
+		((INT_GET(hdr_d->count, ARCH_CONVERT)*sizeof(*entry_d))+sizeof(*hdr_d)));
+
+	ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
+	ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
+	ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
+
+	/*
+	 * Move the entries in the destination leaf up to make a hole?
+	 */
+	if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
+		tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
+		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
+		entry_s = &leaf_d->entries[start_d];
+		entry_d = &leaf_d->entries[start_d + count];
+		bcopy(entry_s, entry_d, tmp);
+	}
+
+	/*
+	 * Copy all entry's in the same (sorted) order,
+	 * but allocate filenames packed and in sequence.
+	 */
+	entry_s = &leaf_s->entries[start_s];
+	entry_d = &leaf_d->entries[start_d];
+	for (i = 0; i < count; entry_s++, entry_d++, i++) {
+		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) >= INT_GET(hdr_s->firstused, ARCH_CONVERT));
+		ASSERT(entry_s->namelen < MAXNAMELEN);
+		tmp = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry_s);
+		INT_MOD(hdr_d->firstused, ARCH_CONVERT, -(tmp));
+		entry_d->hashval = entry_s->hashval; /* INT_: direct copy */
+		INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT); 
+		entry_d->namelen = entry_s->namelen;
+		ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
+		bcopy(XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
+		      XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)), tmp);
+		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
+		bzero((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
+		      tmp);
+		INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen));
+		INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen);
+		INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
+		INT_MOD(hdr_d->count, ARCH_CONVERT, +1);
+		tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
+				+ (uint)sizeof(xfs_dir_leaf_hdr_t);
+		ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
+
+	}
+
+	/*
+	 * Zero out the entries we just copied.
+	 */
+	if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
+		tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
+		entry_s = &leaf_s->entries[start_s];
+		ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
+		bzero((char *)entry_s, tmp);
+	} else {
+		/*
+		 * Move the remaining entries down to fill the hole,
+		 * then zero the entries at the top.
+		 */
+		tmp  = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
+		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
+		entry_s = &leaf_s->entries[start_s + count];
+		entry_d = &leaf_s->entries[start_s];
+		bcopy(entry_s, entry_d, tmp);
+
+		tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
+		entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)];
+		ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
+		bzero((char *)entry_s, tmp);
+	}
+
+	/*
+	 * Fill in the freemap information
+	 */
+	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_hdr_t));
+	INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT, INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t));
+	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+	INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, INT_ZERO(hdr_d->freemap[2].base, ARCH_CONVERT));
+	INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, INT_ZERO(hdr_d->freemap[2].size, ARCH_CONVERT));
+	hdr_s->holes = 1;	/* leaf may not be compact */
+}
+
+/*
+ * Compare two leaf blocks "order".
+ */
+int
+xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
+{
+	xfs_dir_leafblock_t *leaf1, *leaf2;
+
+	leaf1 = leaf1_bp->data;
+	leaf2 = leaf2_bp->data;
+	ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) &&
+	       (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC));
+	if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) && 
+	    ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
+	      INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
+	     (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
+	      INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+		return(1);
+	}
+	return(0);
+}
+
+/*
+ * Pick up the last hashvalue from a leaf block.
+ */
+xfs_dahash_t
+xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count)
+{
+	xfs_dir_leafblock_t *leaf;
+
+	leaf = bp->data;
+	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+	if (count)
+		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+	if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0)
+		return(0);
+	return(INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+}
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
new file mode 100644
index 000000000..92e2818cb
--- /dev/null
+++ b/libxfs/xfs_ialloc.c
@@ -0,0 +1,1113 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Internal functions.
+ */
+
+/*
+ * Log specified fields for the inode given by bp and off.
+ */
+STATIC void
+xfs_ialloc_log_di(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_buf_t	*bp,		/* inode buffer */
+	int		off,		/* index of inode in buffer */
+	int		fields)		/* bitmask of fields to log */
+{
+	int			first;		/* first byte number */
+	int			ioffset;	/* off in bytes */
+	int			last;		/* last byte number */
+	xfs_mount_t		*mp;		/* mount point structure */
+	static const short	offsets[] = {	/* field offsets */
+						/* keep in sync with bits */
+		offsetof(xfs_dinode_core_t, di_magic),
+		offsetof(xfs_dinode_core_t, di_mode),
+		offsetof(xfs_dinode_core_t, di_version),
+		offsetof(xfs_dinode_core_t, di_format),
+		offsetof(xfs_dinode_core_t, di_onlink),
+		offsetof(xfs_dinode_core_t, di_uid),
+		offsetof(xfs_dinode_core_t, di_gid),
+		offsetof(xfs_dinode_core_t, di_nlink),
+		offsetof(xfs_dinode_core_t, di_projid),
+		offsetof(xfs_dinode_core_t, di_pad),
+		offsetof(xfs_dinode_core_t, di_atime),
+		offsetof(xfs_dinode_core_t, di_mtime),
+		offsetof(xfs_dinode_core_t, di_ctime),
+		offsetof(xfs_dinode_core_t, di_size),
+		offsetof(xfs_dinode_core_t, di_nblocks),
+		offsetof(xfs_dinode_core_t, di_extsize),
+		offsetof(xfs_dinode_core_t, di_nextents),
+		offsetof(xfs_dinode_core_t, di_anextents),
+		offsetof(xfs_dinode_core_t, di_forkoff),
+		offsetof(xfs_dinode_core_t, di_aformat),
+		offsetof(xfs_dinode_core_t, di_dmevmask),
+		offsetof(xfs_dinode_core_t, di_dmstate),
+		offsetof(xfs_dinode_core_t, di_flags),
+		offsetof(xfs_dinode_core_t, di_gen),
+		offsetof(xfs_dinode_t, di_next_unlinked),
+		offsetof(xfs_dinode_t, di_u),
+		offsetof(xfs_dinode_t, di_a),
+		sizeof(xfs_dinode_t)
+	};
+        
+
+	ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
+	ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
+	mp = tp->t_mountp;
+	/*
+	 * Get the inode-relative first and last bytes for these fields
+	 */
+	xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
+	/*
+	 * Convert to buffer offsets and log it.
+	 */
+	ioffset = off << mp->m_sb.sb_inodelog;
+	first += ioffset;
+	last += ioffset;
+	xfs_trans_log_buf(tp, bp, first, last);
+}
+
+/*
+ * Allocation group level functions.
+ */
+
+/*
+ * Allocate new inodes in the allocation group specified by agbp.
+ * Return 0 for success, else error code.
+ */
+STATIC int				/* error code or 0 */
+xfs_ialloc_ag_alloc(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_buf_t	*agbp,		/* alloc group buffer */
+	int		*alloc)
+{
+	xfs_agi_t	*agi;		/* allocation group header */
+	xfs_alloc_arg_t	args;		/* allocation argument structure */
+	int		blks_per_cluster;  /* fs blocks per inode cluster */
+	xfs_btree_cur_t	*cur;		/* inode btree cursor */
+	xfs_daddr_t		d;		/* disk addr of buffer */
+	int		error;
+	xfs_buf_t	*fbuf;		/* new free inodes' buffer */
+	xfs_dinode_t	*free;		/* new free inode structure */
+	int		i;		/* inode counter */
+	int		j;		/* block counter */
+	int		nbufs;		/* num bufs of new inodes */
+	xfs_agino_t	newino;		/* new first inode's number */
+	xfs_agino_t	newlen;		/* new number of inodes */
+	int		ninodes;	/* num inodes per buf */
+	xfs_agino_t	thisino;	/* current inode number, for loop */
+	int		version;	/* inode version number to use */
+	static xfs_timestamp_t ztime;	/* zero xfs timestamp */
+	int		isaligned;	/* inode allocation at stripe unit */
+					/* boundary */
+        xfs_dinode_core_t dic;          /* a dinode_core to copy to new */
+                                        /* inodes */
+        
+	args.tp = tp;
+	args.mp = tp->t_mountp;
+
+	/*
+	 * Locking will ensure that we don't have two callers in here
+	 * at one time.
+	 */
+	newlen = XFS_IALLOC_INODES(args.mp);
+	if (args.mp->m_maxicount &&
+	    args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+		return XFS_ERROR(ENOSPC);
+	args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
+	/*
+	 * Set the alignment for the allocation.
+	 * If stripe alignment is turned on then align at stripe unit
+	 * boundary.
+	 * If the cluster size is smaller than a filesystem block 
+	 * then we're doing I/O for inodes in filesystem block size pieces,
+	 * so don't need alignment anyway.
+	 */
+	isaligned = 0;
+	if (args.mp->m_sinoalign) {
+		ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+		args.alignment = args.mp->m_dalign;
+		isaligned = 1;
+	} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+	    args.mp->m_sb.sb_inoalignmt >= 
+	    XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
+		args.alignment = args.mp->m_sb.sb_inoalignmt;
+	else
+		args.alignment = 1;
+	agi = XFS_BUF_TO_AGI(agbp);
+	/*
+	 * Need to figure out where to allocate the inode blocks.
+	 * Ideally they should be spaced out through the a.g.
+	 * For now, just allocate blocks up front.
+	 */
+	args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+	args.fsbno = XFS_AGB_TO_FSB(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+				    args.agbno);
+	/*
+	 * Allocate a fixed-size extent of inodes.
+	 */
+	args.type = XFS_ALLOCTYPE_NEAR_BNO;
+	args.mod = args.total = args.wasdel = args.isfl = args.userdata = 
+		args.minalignslop = 0;
+	args.prod = 1;
+	/*
+	 * Allow space for the inode btree to split.
+	 */
+	args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+	if (error = xfs_alloc_vextent(&args))
+		return error;
+
+	/*
+	 * If stripe alignment is turned on, then try again with cluster
+	 * alignment.
+	 */
+	if (isaligned && args.fsbno == NULLFSBLOCK) {
+		args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+		args.fsbno = XFS_AGB_TO_FSB(args.mp,
+				INT_GET(agi->agi_seqno, ARCH_CONVERT), args.agbno);
+		if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+	    		args.mp->m_sb.sb_inoalignmt >= 
+	    		XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
+				args.alignment = args.mp->m_sb.sb_inoalignmt;
+		else
+			args.alignment = 1;
+		if (error = xfs_alloc_vextent(&args))
+       		         return error;
+	}
+	
+	if (args.fsbno == NULLFSBLOCK) {
+		*alloc = 0;
+		return 0;
+	}
+	ASSERT(args.len == args.minlen);
+	/*
+	 * Convert the results.
+	 */
+	newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
+	/*
+	 * Loop over the new block(s), filling in the inodes.
+	 * For small block sizes, manipulate the inodes in buffers
+	 * which are multiples of the blocks size.
+	 */
+	if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
+		blks_per_cluster = 1;
+		nbufs = (int)args.len;
+		ninodes = args.mp->m_sb.sb_inopblock;
+	} else {
+		blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
+			           args.mp->m_sb.sb_blocksize;
+		nbufs = (int)args.len / blks_per_cluster;
+		ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
+	}
+	/*
+	 * Figure out what version number to use in the inodes we create.
+	 * If the superblock version has caught up to the one that supports
+	 * the new inode format, then use the new inode version.  Otherwise
+	 * use the old version so that old kernels will continue to be
+	 * able to use the file system.
+	 */
+	if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb))
+		version = XFS_DINODE_VERSION_2;
+	else
+		version = XFS_DINODE_VERSION_1;
+	for (j = 0; j < nbufs; j++) {
+		/*
+		 * Get the block.
+		 */
+		d = XFS_AGB_TO_DADDR(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+				     args.agbno + (j * blks_per_cluster));
+		fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
+					 args.mp->m_bsize * blks_per_cluster,
+					 XFS_BUF_LOCK);
+		ASSERT(fbuf);
+		ASSERT(!XFS_BUF_GETERROR(fbuf));		
+		/*
+		 * Loop over the inodes in this buffer.
+		 */
+		INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+		INT_ZERO(dic.di_mode, ARCH_CONVERT);
+		INT_SET(dic.di_version, ARCH_CONVERT, version);
+		INT_ZERO(dic.di_format, ARCH_CONVERT);
+		INT_ZERO(dic.di_onlink, ARCH_CONVERT);
+		INT_ZERO(dic.di_uid, ARCH_CONVERT);
+		INT_ZERO(dic.di_gid, ARCH_CONVERT);
+		INT_ZERO(dic.di_nlink, ARCH_CONVERT);
+		INT_ZERO(dic.di_projid, ARCH_CONVERT);
+		bzero(&(dic.di_pad[0]),sizeof(dic.di_pad));
+		INT_SET(dic.di_atime.t_sec, ARCH_CONVERT, ztime.t_sec);
+		INT_SET(dic.di_atime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
+                
+		INT_SET(dic.di_mtime.t_sec, ARCH_CONVERT, ztime.t_sec);
+		INT_SET(dic.di_mtime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
+                
+		INT_SET(dic.di_ctime.t_sec, ARCH_CONVERT, ztime.t_sec);
+		INT_SET(dic.di_ctime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
+                
+		INT_ZERO(dic.di_size, ARCH_CONVERT);
+		INT_ZERO(dic.di_nblocks, ARCH_CONVERT);
+		INT_ZERO(dic.di_extsize, ARCH_CONVERT);
+		INT_ZERO(dic.di_nextents, ARCH_CONVERT);
+		INT_ZERO(dic.di_anextents, ARCH_CONVERT);
+		INT_ZERO(dic.di_forkoff, ARCH_CONVERT);
+		INT_ZERO(dic.di_aformat, ARCH_CONVERT);
+		INT_ZERO(dic.di_dmevmask, ARCH_CONVERT);
+		INT_ZERO(dic.di_dmstate, ARCH_CONVERT);
+		INT_ZERO(dic.di_flags, ARCH_CONVERT);
+		INT_ZERO(dic.di_gen, ARCH_CONVERT);
+                
+		for (i = 0; i < ninodes; i++) {
+			free = XFS_MAKE_IPTR(args.mp, fbuf, i);
+                        bcopy (&dic, &(free->di_core), sizeof(xfs_dinode_core_t));
+		        INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
+			xfs_ialloc_log_di(tp, fbuf, i,
+				XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
+		}
+		xfs_trans_inode_alloc_buf(tp, fbuf);
+	}
+	INT_MOD(agi->agi_count, ARCH_CONVERT, newlen);
+	INT_MOD(agi->agi_freecount, ARCH_CONVERT, newlen);
+	mraccess(&args.mp->m_peraglock);
+	args.mp->m_perag[INT_GET(agi->agi_seqno, ARCH_CONVERT)].pagi_freecount += newlen;
+	mraccunlock(&args.mp->m_peraglock);
+	INT_SET(agi->agi_newino, ARCH_CONVERT, newino);
+	/*
+	 * Insert records describing the new inode chunk into the btree.
+	 */
+	cur = xfs_btree_init_cursor(args.mp, tp, agbp,
+			INT_GET(agi->agi_seqno, ARCH_CONVERT),
+			XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+	for (thisino = newino;
+	     thisino < newino + newlen;
+	     thisino += XFS_INODES_PER_CHUNK) {
+		if (error = xfs_inobt_lookup_eq(cur, thisino,
+				XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i)) {
+			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+			return error;
+		}
+		ASSERT(i == 0);
+		if (error = xfs_inobt_insert(cur, &i)) {
+			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+			return error;
+		}
+		ASSERT(i == 1);
+	}
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	/*
+	 * Log allocation group header fields
+	 */
+	xfs_ialloc_log_agi(tp, agbp,
+		XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
+	/*
+	 * Modify/log superblock values for inode count and inode free count.
+	 */
+	xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
+	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
+	*alloc = 1;
+	return 0;
+}
+
+/*
+ * Select an allocation group to look for a free inode in, based on the parent
+ * inode and then mode.  Return the allocation group buffer.
+ */
+STATIC xfs_buf_t *			/* allocation group buffer */
+xfs_ialloc_ag_select(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_ino_t	parent,		/* parent directory inode number */
+	mode_t		mode,		/* bits set to indicate file type */
+	int		okalloc)	/* ok to allocate more space */
+{
+	xfs_buf_t	*agbp;		/* allocation group header buffer */
+	xfs_agnumber_t	agcount;	/* number of ag's in the filesystem */
+	xfs_agnumber_t	agno;		/* current ag number */
+	int		flags;		/* alloc buffer locking flags */
+	xfs_extlen_t	ineed;		/* blocks needed for inode allocation */
+	xfs_extlen_t	longest;	/* longest extent available */
+	xfs_mount_t	*mp;		/* mount point structure */
+	int		needspace;	/* file mode implies space allocated */
+	xfs_perag_t	*pag;		/* per allocation group data */
+	xfs_agnumber_t	pagno;		/* parent (starting) ag number */
+
+	/*
+	 * Files of these types need at least one block if length > 0
+	 * (and they won't fit in the inode, but that's hard to figure out).
+	 */
+	needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
+	mp = tp->t_mountp;
+	agcount = mp->m_sb.sb_agcount;
+	if (S_ISDIR(mode))
+		pagno = atomicIncWithWrap((int *)&mp->m_agirotor, agcount);
+	else
+		pagno = XFS_INO_TO_AGNO(mp, parent);
+	ASSERT(pagno < agcount);
+	/*
+	 * Loop through allocation groups, looking for one with a little
+	 * free space in it.  Note we don't look for free inodes, exactly.
+	 * Instead, we include whether there is a need to allocate inodes
+	 * to mean that blocks must be allocated for them, 
+	 * if none are currently free.
+	 */
+	agno = pagno;
+	flags = XFS_ALLOC_FLAG_TRYLOCK;
+	for (;;) {
+		mraccess(&mp->m_peraglock);
+		pag = &mp->m_perag[agno];
+		if (!pag->pagi_init) {
+			if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
+				agbp = NULL;
+				mraccunlock(&mp->m_peraglock);
+				goto nextag;
+			}
+		} else
+			agbp = NULL;
+		/*
+		 * Is there enough free space for the file plus a block
+		 * of inodes (if we need to allocate some)?
+		 */
+		ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
+		if (ineed && !pag->pagf_init) {
+			if (agbp == NULL &&
+			    xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
+				agbp = NULL;
+				mraccunlock(&mp->m_peraglock);
+				goto nextag;
+			}
+			(void)xfs_alloc_pagf_init(mp, tp, agno, flags);
+		}
+		if (!ineed || pag->pagf_init) {
+			if (ineed && !(longest = pag->pagf_longest))
+				longest = pag->pagf_flcount > 0;
+			if (!ineed ||
+			    (pag->pagf_freeblks >= needspace + ineed &&
+			     longest >= ineed &&
+			     okalloc)) {
+				if (agbp == NULL &&
+				    xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
+					agbp = NULL;
+					mraccunlock(&mp->m_peraglock);
+					goto nextag;
+				}
+				mraccunlock(&mp->m_peraglock);
+				return agbp;
+			}
+		}
+		mraccunlock(&mp->m_peraglock);
+		if (agbp)
+			xfs_trans_brelse(tp, agbp);
+nextag:		
+		/*   
+		 * No point in iterating over the rest, if we're shutting
+		 * down.
+		 */
+		if (XFS_FORCED_SHUTDOWN(mp))
+			return (xfs_buf_t *)0;
+		agno++;
+		if (agno == agcount)
+			agno = 0;
+		if (agno == pagno) {
+			if (flags == 0)
+				return (xfs_buf_t *)0;
+			flags = 0;
+		}
+	}
+}
+
+/* 
+ * Visible inode allocation functions.
+ */
+
+/*
+ * Allocate an inode on disk.
+ * Mode is used to tell whether the new inode will need space, and whether
+ * it is a directory.
+ *
+ * The arguments IO_agbp and alloc_done are defined to work within
+ * the constraint of one allocation per transaction.
+ * xfs_dialloc() is designed to be called twice if it has to do an
+ * allocation to make more free inodes.  On the first call,
+ * IO_agbp should be set to NULL. If an inode is available,
+ * i.e., xfs_dialloc() did not need to do an allocation, an inode
+ * number is returned.  In this case, IO_agbp would be set to the 
+ * current ag_buf and alloc_done set to false.
+ * If an allocation needed to be done, xfs_dialloc would return
+ * the current ag_buf in IO_agbp and set alloc_done to true.
+ * The caller should then commit the current transaction, allocate a new
+ * transaction, and call xfs_dialloc() again, passing in the previous
+ * value of IO_agbp.  IO_agbp should be held across the transactions.
+ * Since the agbp is locked across the two calls, the second call is
+ * guaranteed to have a free inode available.
+ *
+ * Once we successfully pick an inode its number is returned and the
+ * on-disk data structures are updated.  The inode itself is not read
+ * in, since doing so would break ordering constraints with xfs_reclaim.
+ */
+int
+xfs_dialloc(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_ino_t	parent,		/* parent inode (directory) */
+	mode_t		mode,		/* mode bits for new inode */
+	int		okalloc,	/* ok to allocate more space */
+	xfs_buf_t	**IO_agbp,	/* in/out ag header's buffer */
+	boolean_t	*alloc_done,	/* true if we needed to replenish
+					   inode freelist */
+	xfs_ino_t	*inop)		/* inode number allocated */
+{
+	xfs_agnumber_t	agcount;	/* number of allocation groups */
+	xfs_buf_t	*agbp;		/* allocation group header's buffer */
+	xfs_agnumber_t	agno;		/* allocation group number */
+	xfs_agi_t	*agi;		/* allocation group header structure */
+	xfs_btree_cur_t	*cur;		/* inode allocation btree cursor */
+	int		error;		/* error return value */
+	int		i;		/* result code */
+	int		ialloced;	/* inode allocation status */
+	int		noroom = 0;	/* no space for inode blk allocation */
+	xfs_ino_t	ino;		/* fs-relative inode to be returned */
+	/* REFERENCED */
+	int		j;		/* result code */
+	xfs_mount_t	*mp;		/* file system mount structure */
+	int		offset;		/* index of inode in chunk */
+	xfs_agino_t	pagino;		/* parent's a.g. relative inode # */
+	xfs_agnumber_t	pagno;		/* parent's allocation group number */
+	xfs_inobt_rec_t	rec;		/* inode allocation record */
+	xfs_agnumber_t	tagno;		/* testing allocation group number */
+	xfs_btree_cur_t	*tcur;		/* temp cursor */
+	xfs_inobt_rec_t	trec;		/* temp inode allocation record */
+
+
+	if (*IO_agbp == NULL) {
+		/*
+		 * We do not have an agbp, so select an initial allocation
+		 * group for inode allocation.
+		 */
+		agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+		/*
+		 * Couldn't find an allocation group satisfying the 
+		 * criteria, give up.
+		 */
+		if (!agbp) {
+			*inop = NULLFSINO;
+			return 0;
+		}
+		agi = XFS_BUF_TO_AGI(agbp);
+		ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+	} else {
+		/*
+		 * Continue where we left off before.  In this case, we 
+		 * know that the allocation group has free inodes.
+		 */
+		agbp = *IO_agbp;
+		agi = XFS_BUF_TO_AGI(agbp);
+		ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+		ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0);
+	}
+	mp = tp->t_mountp;
+	agcount = mp->m_sb.sb_agcount;
+	agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
+	tagno = agno;
+	pagno = XFS_INO_TO_AGNO(mp, parent);
+	pagino = XFS_INO_TO_AGINO(mp, parent);
+
+	/*
+	 * If we have already hit the ceiling of inode blocks then clear
+	 * okalloc so we scan all available agi structures for a free
+	 * inode.
+	 */
+
+	if (mp->m_maxicount &&
+	    mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
+		noroom = 1;
+		okalloc = 0;
+	}
+
+	/*
+	 * Loop until we find an allocation group that either has free inodes
+	 * or in which we can allocate some inodes.  Iterate through the
+	 * allocation groups upward, wrapping at the end.
+	 */
+	*alloc_done = B_FALSE;
+	while (INT_GET(agi->agi_freecount, ARCH_CONVERT) == 0) {
+		/* 
+		 * Don't do anything if we're not supposed to allocate
+		 * any blocks, just go on to the next ag.
+		 */
+		if (okalloc) {
+			/*
+			 * Try to allocate some new inodes in the allocation
+			 * group.
+			 */
+			if (error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced)) {
+				xfs_trans_brelse(tp, agbp);
+				if (error == ENOSPC) {
+					*inop = NULLFSINO;
+					return 0;
+				} else
+					return error;
+			}
+			if (ialloced) {
+				/*
+				 * We successfully allocated some inodes, return
+				 * the current context to the caller so that it
+				 * can commit the current transaction and call
+				 * us again where we left off.
+				 */
+				ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0);
+				*alloc_done = B_TRUE;
+				*IO_agbp = agbp;
+				*inop = NULLFSINO;
+				return 0;
+			}
+		}
+		/*
+		 * If it failed, give up on this ag.
+		 */
+		xfs_trans_brelse(tp, agbp);
+		/*
+		 * Go on to the next ag: get its ag header.
+		 */
+nextag:
+		if (++tagno == agcount)
+			tagno = 0;
+		if (tagno == agno) {
+			*inop = NULLFSINO;
+			return noroom ? ENOSPC : 0;
+		}
+		mraccess(&mp->m_peraglock);
+		error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
+		mraccunlock(&mp->m_peraglock);
+		if (error)
+			goto nextag;
+		agi = XFS_BUF_TO_AGI(agbp);
+		ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+	}
+	/*
+	 * Here with an allocation group that has a free inode.
+	 * Reset agno since we may have chosen a new ag in the
+	 * loop above.
+	 */
+	agno = tagno;
+	*IO_agbp = NULL;
+	cur = xfs_btree_init_cursor(mp, tp, agbp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+				    XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+	/*
+	 * If pagino is 0 (this is the root inode allocation) use newino.
+	 * This must work because we've just allocated some.
+	 */
+	if (!pagino)
+		pagino = INT_GET(agi->agi_newino, ARCH_CONVERT);
+#ifdef DEBUG
+	if (cur->bc_nlevels == 1) {
+		int	freecount = 0;
+
+		if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))
+			goto error0;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		do {
+			if (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+					&rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT))
+				goto error0;
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			freecount += rec.ir_freecount;
+			if (error = xfs_inobt_increment(cur, 0, &i))
+				goto error0;
+		} while (i == 1);
+
+		ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+		       XFS_FORCED_SHUTDOWN(mp));
+	}
+#endif
+	/*
+	 * If in the same a.g. as the parent, try to get near the parent.
+	 */
+	if (pagno == agno) {
+		if (error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))
+			goto error0;
+		if (i != 0 &&
+		    (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+			    &rec.ir_freecount, &rec.ir_free, &j, ARCH_NOCONVERT)) == 0 &&
+		    j == 1 &&
+		    rec.ir_freecount > 0) {
+			/*
+			 * Found a free inode in the same chunk
+			 * as parent, done.
+			 */
+		}
+		/*
+		 * In the same a.g. as parent, but parent's chunk is full.
+		 */
+		else {
+			int	doneleft;	/* done, to the left */
+			int	doneright;	/* done, to the right */
+
+			if (error)
+				goto error0;
+			ASSERT(i == 1);
+			ASSERT(j == 1);
+			/*
+			 * Duplicate the cursor, search left & right
+			 * simultaneously.
+			 */
+			if (error = xfs_btree_dup_cursor(cur, &tcur))
+				goto error0;
+			/*
+			 * Search left with tcur, back up 1 record.
+			 */
+			if (error = xfs_inobt_decrement(tcur, 0, &i))
+				goto error1;
+			doneleft = !i;
+			if (!doneleft) {
+				if (error = xfs_inobt_get_rec(tcur,
+						&trec.ir_startino,
+						&trec.ir_freecount,
+						&trec.ir_free, &i, ARCH_NOCONVERT))
+					goto error1;
+				XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
+			}
+			/* 
+			 * Search right with cur, go forward 1 record.
+			 */
+			if (error = xfs_inobt_increment(cur, 0, &i))
+				goto error1;
+			doneright = !i;
+			if (!doneright) {
+				if (error = xfs_inobt_get_rec(cur,
+						&rec.ir_startino,
+						&rec.ir_freecount,
+						&rec.ir_free, &i, ARCH_NOCONVERT))
+					goto error1;
+				XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
+			}
+			/*
+			 * Loop until we find the closest inode chunk
+			 * with a free one.
+			 */
+			while (!doneleft || !doneright) {
+				int	useleft;  /* using left inode
+						     chunk this time */
+
+				/*
+				 * Figure out which block is closer,
+				 * if both are valid.
+				 */
+				if (!doneleft && !doneright)
+					useleft =
+						pagino -
+						(trec.ir_startino +
+						 XFS_INODES_PER_CHUNK - 1) <
+						 rec.ir_startino - pagino;
+				else
+					useleft = !doneleft;
+				/*
+				 * If checking the left, does it have
+				 * free inodes?
+				 */
+				if (useleft && trec.ir_freecount) {
+					/*
+					 * Yes, set it up as the chunk to use.
+					 */
+					rec = trec;
+					xfs_btree_del_cursor(cur,
+						XFS_BTREE_NOERROR);
+					cur = tcur;
+					break;
+				}
+				/*
+				 * If checking the right, does it have
+				 * free inodes?
+				 */
+				if (!useleft && rec.ir_freecount) {
+					/*
+					 * Yes, it's already set up.
+					 */
+					xfs_btree_del_cursor(tcur,
+						XFS_BTREE_NOERROR);
+					break;
+				}
+				/*
+				 * If used the left, get another one
+				 * further left.
+				 */
+				if (useleft) {
+					if (error = xfs_inobt_decrement(tcur, 0,
+							&i))
+						goto error1;
+					doneleft = !i;
+					if (!doneleft) {
+						if (error = xfs_inobt_get_rec(
+							    tcur,
+							    &trec.ir_startino,
+							    &trec.ir_freecount,
+							    &trec.ir_free, &i, ARCH_NOCONVERT))
+							goto error1;
+						XFS_WANT_CORRUPTED_GOTO(i == 1,
+							error1);
+					}
+				}
+				/*
+				 * If used the right, get another one
+				 * further right.
+				 */
+				else {
+					if (error = xfs_inobt_increment(cur, 0,
+							&i))
+						goto error1;
+					doneright = !i;
+					if (!doneright) {
+						if (error = xfs_inobt_get_rec(
+							    cur,
+							    &rec.ir_startino,
+							    &rec.ir_freecount,
+							    &rec.ir_free, &i, ARCH_NOCONVERT))
+							goto error1;
+						XFS_WANT_CORRUPTED_GOTO(i == 1,
+							error1);
+					}
+				}
+			}
+			ASSERT(!doneleft || !doneright);
+		}
+	}
+	/*
+	 * In a different a.g. from the parent.
+	 * See if the most recently allocated block has any free.
+	 */
+	else if (INT_GET(agi->agi_newino, ARCH_CONVERT) != NULLAGINO) {
+		if (error = xfs_inobt_lookup_eq(cur,
+				INT_GET(agi->agi_newino, ARCH_CONVERT), 0, 0, &i))
+			goto error0;
+		if (i == 1 &&
+		    (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+			    &rec.ir_freecount, &rec.ir_free, &j, ARCH_NOCONVERT)) == 0 &&
+		    j == 1 &&
+		    rec.ir_freecount > 0) {
+			/*
+			 * The last chunk allocated in the group still has
+			 * a free inode.
+			 */
+		}
+		/*
+		 * None left in the last group, search the whole a.g.
+		 */
+		else {
+			if (error)
+				goto error0;
+			if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))
+				goto error0;
+			ASSERT(i == 1);
+			for (;;) {
+				if (error = xfs_inobt_get_rec(cur,
+						&rec.ir_startino,
+						&rec.ir_freecount, &rec.ir_free,
+						&i, ARCH_NOCONVERT))
+					goto error0;
+				XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+				if (rec.ir_freecount > 0)
+					break;
+				if (error = xfs_inobt_increment(cur, 0, &i))
+					goto error0;
+				XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			}
+		}
+	}
+	offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
+	ASSERT(offset >= 0);
+	ASSERT(offset < XFS_INODES_PER_CHUNK);
+	ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
+				   XFS_INODES_PER_CHUNK) == 0);
+	ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
+	XFS_INOBT_CLR_FREE(&rec, offset, ARCH_NOCONVERT);
+	rec.ir_freecount--;
+	if (error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
+			rec.ir_free))
+		goto error0;
+	INT_MOD(agi->agi_freecount, ARCH_CONVERT, -1);
+	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
+	mraccess(&mp->m_peraglock);
+	mp->m_perag[tagno].pagi_freecount--;
+	mraccunlock(&mp->m_peraglock);
+#ifdef DEBUG
+	if (cur->bc_nlevels == 1) {
+		int	freecount = 0;
+
+		if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))
+			goto error0;
+		do {
+			if (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+					&rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT))
+				goto error0;
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+			freecount += rec.ir_freecount;
+			if (error = xfs_inobt_increment(cur, 0, &i))
+				goto error0;
+		} while (i == 1);
+		ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+		       XFS_FORCED_SHUTDOWN(mp));
+	}
+#endif
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
+	*inop = ino;
+	return 0;
+error1:
+	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+error0:
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	return error;
+}
+
+
+/*
+ * Return the location of the inode in bno/off, for mapping it into a buffer.
+ */
+/*ARGSUSED*/
+int
+xfs_dilocate(
+	xfs_mount_t	*mp,	/* file system mount structure */
+	xfs_trans_t	*tp,	/* transaction pointer */
+	xfs_ino_t	ino,	/* inode to locate */
+	xfs_fsblock_t	*bno,	/* output: block containing inode */
+	int		*len,	/* output: num blocks in inode cluster */
+	int		*off,	/* output: index in block of inode */
+	uint		flags)	/* flags concerning inode lookup */	     
+{
+	xfs_agblock_t	agbno;	/* block number of inode in the alloc group */
+	xfs_buf_t	*agbp;	/* agi buffer */
+	xfs_agino_t	agino;	/* inode number within alloc group */
+	xfs_agnumber_t	agno;	/* allocation group number */
+	int		blks_per_cluster; /* num blocks per inode cluster */
+	xfs_agblock_t	chunk_agbno;	/* first block in inode chunk */
+	xfs_agino_t	chunk_agino;	/* first agino in inode chunk */
+	__int32_t	chunk_cnt;	/* count of free inodes in chunk */
+	xfs_inofree_t	chunk_free;	/* mask of free inodes in chunk */
+	xfs_agblock_t	cluster_agbno;	/* first block in inode cluster */
+	xfs_btree_cur_t	*cur;	/* inode btree cursor */
+	int		error;	/* error code */
+	int		i;	/* temp state */
+	int		offset;	/* index of inode in its buffer */
+	int		offset_agbno;	/* blks from chunk start to inode */
+
+	ASSERT(ino != NULLFSINO);
+	/*
+	 * Split up the inode number into its parts.
+	 */
+	agno = XFS_INO_TO_AGNO(mp, ino);
+	agino = XFS_INO_TO_AGINO(mp, ino);
+	agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+	if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
+	    ino != XFS_AGINO_TO_INO(mp, agno, agino))
+		return XFS_ERROR(EINVAL);
+	if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) ||
+	    !(flags & XFS_IMAP_LOOKUP)) {
+		offset = XFS_INO_TO_OFFSET(mp, ino);
+		ASSERT(offset < mp->m_sb.sb_inopblock);
+		*bno = XFS_AGB_TO_FSB(mp, agno, agbno);
+		*off = offset;
+		*len = 1;
+		return 0;
+	}
+	blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+	if (*bno != NULLFSBLOCK) {
+		offset = XFS_INO_TO_OFFSET(mp, ino);
+		ASSERT(offset < mp->m_sb.sb_inopblock);
+		cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno);
+		*off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
+			offset;
+		*len = blks_per_cluster;
+		return 0;
+	}
+	if (mp->m_inoalign_mask) {
+		offset_agbno = agbno & mp->m_inoalign_mask;
+		chunk_agbno = agbno - offset_agbno;
+	} else {
+		mraccess(&mp->m_peraglock);
+		error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+		mraccunlock(&mp->m_peraglock);
+		if (error)
+			return error;
+		cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
+			(xfs_inode_t *)0, 0);
+		if (error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))
+			goto error0;
+		if (error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
+				&chunk_free, &i, ARCH_NOCONVERT))
+			goto error0;
+		if (i == 0)
+			error = XFS_ERROR(EINVAL);
+		xfs_trans_brelse(tp, agbp);
+		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);		
+		if (error)
+			return error;
+		chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
+		offset_agbno = agbno - chunk_agbno;
+	}
+	ASSERT(agbno >= chunk_agbno);
+	cluster_agbno = chunk_agbno +
+		((offset_agbno / blks_per_cluster) * blks_per_cluster);
+	offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
+		XFS_INO_TO_OFFSET(mp, ino);
+	*bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno);
+	*off = offset;
+	*len = blks_per_cluster;
+	return 0;
+error0:
+	xfs_trans_brelse(tp, agbp);
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/*
+ * Compute and fill in value of m_in_maxlevels.
+ */
+void
+xfs_ialloc_compute_maxlevels(
+	xfs_mount_t	*mp)		/* file system mount structure */
+{
+	int		level;
+	uint		maxblocks;
+	uint		maxleafents;
+	int		minleafrecs;
+	int		minnoderecs;
+
+	maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
+		XFS_INODES_PER_CHUNK_LOG;
+	minleafrecs = mp->m_alloc_mnr[0];
+	minnoderecs = mp->m_alloc_mnr[1];
+	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+	for (level = 1; maxblocks > 1; level++)
+		maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+	mp->m_in_maxlevels = level;
+}
+
+/*
+ * Log specified fields for the ag hdr (inode section)
+ */
+void
+xfs_ialloc_log_agi(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_buf_t	*bp,		/* allocation group header buffer */
+	int		fields)		/* bitmask of fields to log */
+{
+	int			first;		/* first byte number */
+	int			last;		/* last byte number */
+	static const short	offsets[] = {	/* field starting offsets */
+					/* keep in sync with bit definitions */
+		offsetof(xfs_agi_t, agi_magicnum),
+		offsetof(xfs_agi_t, agi_versionnum),
+		offsetof(xfs_agi_t, agi_seqno),
+		offsetof(xfs_agi_t, agi_length),
+		offsetof(xfs_agi_t, agi_count),
+		offsetof(xfs_agi_t, agi_root),
+		offsetof(xfs_agi_t, agi_level),
+		offsetof(xfs_agi_t, agi_freecount),
+		offsetof(xfs_agi_t, agi_newino),
+		offsetof(xfs_agi_t, agi_dirino),
+		offsetof(xfs_agi_t, agi_unlinked),
+		sizeof(xfs_agi_t)
+	};
+#ifdef DEBUG
+	xfs_agi_t		*agi;	/* allocation group header */
+
+	agi = XFS_BUF_TO_AGI(bp);
+	ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) ==
+		XFS_AGI_MAGIC);
+#endif
+	/*
+	 * Compute byte offsets for the first and last fields.
+	 */
+	xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
+	/*
+	 * Log the allocation group inode header buffer.
+	 */
+	xfs_trans_log_buf(tp, bp, first, last);
+}
+
+/*
+ * Read in the allocation group header (inode allocation section)
+ */
+int
+xfs_ialloc_read_agi(
+	xfs_mount_t	*mp,		/* file system mount structure */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_agnumber_t	agno,		/* allocation group number */
+	xfs_buf_t	**bpp)		/* allocation group hdr buf */
+{
+	xfs_agi_t	*agi;		/* allocation group header */
+	int		agi_ok;		/* agi is consistent */
+	xfs_buf_t	*bp;		/* allocation group hdr buf */
+    xfs_daddr_t		d;		/* disk block address */
+	int		error;
+#ifdef DEBUG
+	int		i;
+#endif
+	xfs_perag_t	*pag;		/* per allocation group data */
+
+
+	ASSERT(agno != NULLAGNUMBER);
+	d = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR);
+	if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1, 0, &bp))
+		return error;
+	ASSERT(bp && !XFS_BUF_GETERROR(bp));
+	/*
+	 * Validate the magic number of the agi block.
+	 */
+	agi = XFS_BUF_TO_AGI(bp);
+	agi_ok =
+		INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC &&
+		XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+	if (XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
+			XFS_RANDOM_IALLOC_READ_AGI)) {
+		xfs_trans_brelse(tp, bp);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	pag = &mp->m_perag[agno];
+	if (!pag->pagi_init) {
+		pag->pagi_freecount = INT_GET(agi->agi_freecount, ARCH_CONVERT);
+		pag->pagi_init = 1;
+	} else {
+		/*
+		 * It's possible for these to be out of sync if
+		 * we are in the middle of a forced shutdown.
+		 */
+		ASSERT(pag->pagi_freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT)
+			|| XFS_FORCED_SHUTDOWN(mp));
+	}
+#ifdef DEBUG
+	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
+		ASSERT(INT_GET(agi->agi_unlinked[i], ARCH_CONVERT) != 0);
+#endif
+	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
+	*bpp = bp;
+	return 0;
+}
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
new file mode 100644
index 000000000..bdf2dae48
--- /dev/null
+++ b/libxfs/xfs_ialloc_btree.c
@@ -0,0 +1,1552 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Inode allocation management for XFS.
+ */
+#include <xfs.h>
+
+/*
+ * Insert one record/level.  Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int				/* error */
+xfs_inobt_insrec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level to insert record at */
+	xfs_agblock_t		*bnop,	/* i/o: block number inserted */
+	xfs_inobt_rec_t		*recp,	/* i/o: record data inserted */
+	xfs_btree_cur_t		**curp,	/* output: new cursor replacing cur */
+	int			*stat)	/* success/failure */
+{
+	xfs_inobt_block_t	*block;	/* btree block record/key lives in */
+	xfs_buf_t		*bp;	/* buffer for block */
+	int			error;	/* error return value */
+	int			i;	/* loop index */
+	xfs_inobt_key_t		key;	/* key value being inserted */
+	xfs_inobt_key_t		*kp;	/* pointer to btree keys */
+	xfs_agblock_t		nbno;	/* block number of allocated block */
+	xfs_btree_cur_t		*ncur;	/* new cursor to be used at next lvl */
+	xfs_inobt_key_t		nkey;	/* new key value, from split */
+	xfs_inobt_rec_t		nrec;	/* new record value, for caller */
+	int			optr;	/* old ptr value */
+	xfs_inobt_ptr_t		*pp;	/* pointer to btree addresses */
+	int			ptr;	/* index in btree block for this rec */
+	xfs_inobt_rec_t		*rp;	/* pointer to btree records */
+
+	/*
+	 * If we made it to the root level, allocate a new root block
+	 * and we're done.
+	 */
+	if (level >= cur->bc_nlevels) {
+		error = xfs_inobt_newroot(cur, &i);
+		*bnop = NULLAGBLOCK;
+		*stat = i;
+		return error;
+	}
+	/*
+	 * Make a key out of the record data to be inserted, and save it.
+	 */
+	key.ir_startino = recp->ir_startino; /* INT_: direct copy */
+	optr = ptr = cur->bc_ptrs[level];
+	/*
+	 * If we're off the left edge, return failure.
+	 */
+	if (ptr == 0) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Get pointers to the btree buffer and block.
+	 */
+	bp = cur->bc_bufs[level];
+	block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, level, bp))
+		return error;
+	/* 
+	 * Check that the new entry is being inserted in the right place.
+	 */
+	if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		if (level == 0) {
+			rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
+			xfs_btree_check_rec(cur->bc_btnum, recp, rp);
+		} else {
+			kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
+			xfs_btree_check_key(cur->bc_btnum, &key, kp);
+		}
+	}
+#endif
+	nbno = NULLAGBLOCK;
+	ncur = (xfs_btree_cur_t *)0;
+	/*
+	 * If the block is full, we can't insert the new entry until we
+	 * make the block un-full.
+	 */
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
+		/*
+		 * First, try shifting an entry to the right neighbor.
+		 */
+		if (error = xfs_inobt_rshift(cur, level, &i))
+			return error;
+		if (i) {
+			/* nothing */
+		}
+		/*
+		 * Next, try shifting an entry to the left neighbor.
+		 */
+		else {
+			if (error = xfs_inobt_lshift(cur, level, &i))
+				return error;
+			if (i) {
+				optr = ptr = cur->bc_ptrs[level];
+			} else {
+				/*
+				 * Next, try splitting the current block
+				 * in half. If this works we have to
+				 * re-set our variables because
+				 * we could be in a different block now.
+				 */
+				if (error = xfs_inobt_split(cur, level, &nbno,
+						&nkey, &ncur, &i))
+					return error;
+				if (i) {
+					bp = cur->bc_bufs[level];
+					block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+					if (error = xfs_btree_check_sblock(cur,
+							block, level, bp))
+						return error;
+#endif
+					ptr = cur->bc_ptrs[level];
+					nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */
+				} else {
+					/*
+					 * Otherwise the insert fails.
+					 */
+					*stat = 0;
+					return 0;
+				}
+			}
+		}
+	}
+	/*
+	 * At this point we know there's room for our new entry in the block
+	 * we're pointing at.
+	 */
+	if (level > 0) {
+		/*
+		 * It's a non-leaf entry.  Make a hole for the new data
+		 * in the key and ptr regions of the block.
+		 */
+		kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
+		pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
+#ifdef DEBUG
+		for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level))
+				return error;
+		}
+#endif
+		ovbcopy(&kp[ptr - 1], &kp[ptr],
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp));
+		ovbcopy(&pp[ptr - 1], &pp[ptr],
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp));
+		/*
+		 * Now stuff the new data in, bump numrecs and log the new data.
+		 */
+#ifdef DEBUG
+		if (error = xfs_btree_check_sptr(cur, *bnop, level))
+			return error;
+#endif
+		kp[ptr - 1] = key; /* INT_: struct copy */
+		INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
+		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+		xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+		xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+	} else {
+		/*
+		 * It's a leaf entry.  Make a hole for the new record.
+		 */
+		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
+		ovbcopy(&rp[ptr - 1], &rp[ptr],
+			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
+		/*
+		 * Now stuff the new record in, bump numrecs
+		 * and log the new data.
+		 */
+		rp[ptr - 1] = *recp; /* INT_: struct copy */
+		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+		xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+	}
+	/*
+	 * Log the new number of records in the btree header.
+	 */
+	xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
+#ifdef DEBUG
+	/*
+	 * Check that the key/record is in the right place, now.
+	 */
+	if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		if (level == 0)
+			xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
+				rp + ptr);
+		else
+			xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
+				kp + ptr);
+	}
+#endif
+	/*
+	 * If we inserted at the start of a block, update the parents' keys.
+	 */
+	if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
+		return error;
+	/*
+	 * Return the new block number, if any.
+	 * If there is one, give back a record value and a cursor too.
+	 */
+	*bnop = nbno;
+	if (nbno != NULLAGBLOCK) {
+		*recp = nrec; /* INT_: struct copy */
+		*curp = ncur;
+	}
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Log header fields from a btree block.
+ */
+STATIC void
+xfs_inobt_log_block(
+	xfs_trans_t		*tp,	/* transaction pointer */
+	xfs_buf_t		*bp,	/* buffer containing btree block */
+	int			fields)	/* mask of fields: XFS_BB_... */
+{
+	int			first;	/* first byte offset logged */
+	int			last;	/* last byte offset logged */
+	static const short	offsets[] = {	/* table of offsets */
+		offsetof(xfs_inobt_block_t, bb_magic),
+		offsetof(xfs_inobt_block_t, bb_level),
+		offsetof(xfs_inobt_block_t, bb_numrecs),
+		offsetof(xfs_inobt_block_t, bb_leftsib),
+		offsetof(xfs_inobt_block_t, bb_rightsib),
+		sizeof(xfs_inobt_block_t)
+	};
+
+	xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
+	xfs_trans_log_buf(tp, bp, first, last);
+}
+
+/*
+ * Log keys from a btree block (nonleaf).
+ */
+STATIC void
+xfs_inobt_log_keys(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_buf_t		*bp,	/* buffer containing btree block */
+	int			kfirst,	/* index of first key to log */
+	int			klast)	/* index of last key to log */
+{
+	xfs_inobt_block_t	*block;	/* btree block to log from */
+	int			first;	/* first byte offset logged */
+	xfs_inobt_key_t		*kp;	/* key pointer in btree block */
+	int			last;	/* last byte offset logged */
+
+	block = XFS_BUF_TO_INOBT_BLOCK(bp);
+	kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
+	first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
+	last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
+	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Log block pointer fields from a btree block (nonleaf).
+ */
+STATIC void
+xfs_inobt_log_ptrs(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_buf_t		*bp,	/* buffer containing btree block */
+	int			pfirst,	/* index of first pointer to log */
+	int			plast)	/* index of last pointer to log */
+{
+	xfs_inobt_block_t	*block;	/* btree block to log from */
+	int			first;	/* first byte offset logged */
+	int			last;	/* last byte offset logged */
+	xfs_inobt_ptr_t		*pp;	/* block-pointer pointer in btree blk */
+
+	block = XFS_BUF_TO_INOBT_BLOCK(bp);
+	pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
+	first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
+	last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
+	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Log records from a btree block (leaf).
+ */
+STATIC void
+xfs_inobt_log_recs(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_buf_t		*bp,	/* buffer containing btree block */
+	int			rfirst,	/* index of first record to log */
+	int			rlast)	/* index of last record to log */
+{
+	xfs_inobt_block_t	*block;	/* btree block to log from */
+	int			first;	/* first byte offset logged */
+	int			last;	/* last byte offset logged */
+	xfs_inobt_rec_t		*rp;	/* record pointer for btree block */
+
+	block = XFS_BUF_TO_INOBT_BLOCK(bp);
+	rp = XFS_INOBT_REC_ADDR(block, 1, cur);
+	first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
+	last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
+	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Lookup the record.  The cursor is made to point to it, based on dir.
+ * Return 0 if can't find any such record, 1 for success.
+ */
+STATIC int				/* error */
+xfs_inobt_lookup(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_lookup_t		dir,	/* <=, ==, or >= */
+	int			*stat)	/* success/failure */
+{
+	xfs_agblock_t		agbno;	/* a.g. relative btree block number */
+	xfs_agnumber_t		agno;	/* allocation group number */
+	xfs_inobt_block_t	*block;	/* current btree block */
+	int			diff;	/* difference for the current key */
+	int			error;	/* error return value */
+	int			keyno;	/* current key number */
+	int			level;	/* level in the btree */
+	xfs_mount_t		*mp;	/* file system mount point */
+
+	/*
+	 * Get the allocation group header, and the root block number.
+	 */
+	mp = cur->bc_mp;
+	{
+		xfs_agi_t	*agi;	/* a.g. inode header */
+
+		agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
+		agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
+		agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+	}
+	/*
+	 * Iterate over each level in the btree, starting at the root.
+	 * For each level above the leaves, find the key we need, based
+	 * on the lookup record, then follow the corresponding block
+	 * pointer down to the next level.
+	 */
+	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+		xfs_buf_t	*bp;	/* buffer pointer for btree block */
+		xfs_daddr_t		d;	/* disk address of btree block */
+
+		/*
+		 * Get the disk address we're looking for.
+		 */
+		d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+		/*
+		 * If the old buffer at this level is for a different block,
+		 * throw it away, otherwise just use it.
+		 */
+		bp = cur->bc_bufs[level];
+		if (bp && XFS_BUF_ADDR(bp) != d)
+			bp = (xfs_buf_t *)0;
+		if (!bp) {
+			/*
+			 * Need to get a new buffer.  Read it, then 
+			 * set it in the cursor, releasing the old one.
+			 */
+			if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+					agno, agbno, 0, &bp, XFS_INO_BTREE_REF))
+				return error;
+			xfs_btree_setbuf(cur, level, bp);
+			/*
+			 * Point to the btree block, now that we have the buffer
+			 */
+			block = XFS_BUF_TO_INOBT_BLOCK(bp);
+			if (error = xfs_btree_check_sblock(cur, block, level,
+					bp))
+				return error;
+		} else
+			block = XFS_BUF_TO_INOBT_BLOCK(bp);
+		/*
+		 * If we already had a key match at a higher level, we know
+		 * we need to use the first entry in this block.
+		 */
+		if (diff == 0)
+			keyno = 1;
+		/*
+		 * Otherwise we need to search this block.  Do a binary search.
+		 */
+		else {
+			int		high;	/* high entry number */
+			xfs_inobt_key_t	*kkbase;/* base of keys in block */
+			xfs_inobt_rec_t	*krbase;/* base of records in block */
+			int		low;	/* low entry number */
+
+			/*
+			 * Get a pointer to keys or records.
+			 */
+			if (level > 0)
+				kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
+			else
+				krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
+			/*
+			 * Set low and high entry numbers, 1-based.
+			 */
+			low = 1;
+			if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+				/*
+				 * If the block is empty, the tree must
+				 * be an empty leaf.
+				 */
+				ASSERT(level == 0 && cur->bc_nlevels == 1);
+				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+				*stat = 0;
+				return 0;
+			}
+			/*
+			 * Binary search the block.
+			 */
+			while (low <= high) {
+				xfs_agino_t	startino;	/* key value */
+
+				/*
+				 * keyno is average of low and high.
+				 */
+				keyno = (low + high) >> 1;
+				/*
+				 * Get startino.
+				 */
+				if (level > 0) {
+					xfs_inobt_key_t	*kkp;
+
+					kkp = kkbase + keyno - 1;
+					startino = INT_GET(kkp->ir_startino, ARCH_CONVERT);
+				} else {
+					xfs_inobt_rec_t	*krp;
+
+					krp = krbase + keyno - 1;
+					startino = INT_GET(krp->ir_startino, ARCH_CONVERT);
+				}
+				/*
+				 * Compute difference to get next direction.
+				 */
+				diff = (int)startino - cur->bc_rec.i.ir_startino;
+				/*
+				 * Less than, move right.
+				 */
+				if (diff < 0)
+					low = keyno + 1;
+				/*
+				 * Greater than, move left.
+				 */
+				else if (diff > 0)
+					high = keyno - 1;
+				/*
+				 * Equal, we're done.
+				 */
+				else
+					break;
+			}
+		}
+		/*
+		 * If there are more levels, set up for the next level
+		 * by getting the block number and filling in the cursor.
+		 */
+		if (level > 0) {
+			/*
+			 * If we moved left, need the previous key number,
+			 * unless there isn't one.
+			 */
+			if (diff > 0 && --keyno < 1)
+				keyno = 1;
+			agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
+#ifdef DEBUG
+			if (error = xfs_btree_check_sptr(cur, agbno, level))
+				return error;
+#endif
+			cur->bc_ptrs[level] = keyno;
+		}
+	}
+	/*
+	 * Done with the search.
+	 * See if we need to adjust the results.
+	 */
+	if (dir != XFS_LOOKUP_LE && diff < 0) {
+		keyno++;
+		/*
+		 * If ge search and we went off the end of the block, but it's
+		 * not the last block, we're in the wrong block.
+		 */
+		if (dir == XFS_LOOKUP_GE &&
+		    keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
+		    INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+			int	i;
+
+			cur->bc_ptrs[0] = keyno;
+			if (error = xfs_inobt_increment(cur, 0, &i))
+				return error;
+			ASSERT(i == 1);
+			*stat = 1;
+			return 0;
+		}
+	}
+	else if (dir == XFS_LOOKUP_LE && diff > 0)
+		keyno--;
+	cur->bc_ptrs[0] = keyno;
+	/*
+	 * Return if we succeeded or not.
+	 */
+	if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
+		*stat = 0;
+	else
+		*stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
+	return 0;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int				/* error */
+xfs_inobt_lshift(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level to shift record on */
+	int			*stat)	/* success/failure */
+{
+	int			error;	/* error return value */
+#ifdef DEBUG
+	int			i;	/* loop index */
+#endif
+	xfs_inobt_key_t		key;	/* key value for leaf level upward */
+	xfs_buf_t		*lbp;	/* buffer for left neighbor block */
+	xfs_inobt_block_t	*left;	/* left neighbor btree block */
+	xfs_inobt_key_t		*lkp;	/* key pointer for left block */
+	xfs_inobt_ptr_t		*lpp;	/* address pointer for left block */
+	xfs_inobt_rec_t		*lrp;	/* record pointer for left block */
+	int			nrec;	/* new number of left block entries */
+	xfs_buf_t		*rbp;	/* buffer for right (current) block */
+	xfs_inobt_block_t	*right;	/* right (current) btree block */
+	xfs_inobt_key_t		*rkp;	/* key pointer for right block */
+	xfs_inobt_ptr_t		*rpp;	/* address pointer for right block */
+	xfs_inobt_rec_t		*rrp;	/* record pointer for right block */
+
+	/*
+	 * Set up variables for this block as "right".
+	 */
+	rbp = cur->bc_bufs[level];
+	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+		return error;
+#endif
+	/*
+	 * If we've got no left sibling then we can't shift an entry left.
+	 */
+	if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * If the cursor entry is the one that would be moved, don't 
+	 * do it... it's too complicated.
+	 */
+	if (cur->bc_ptrs[level] <= 1) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Set up the left neighbor as "left".
+	 */
+	if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+			cur->bc_private.i.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
+			XFS_INO_BTREE_REF))
+		return error;
+	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
+	if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+		return error;
+	/*
+	 * If it's full, it can't take another entry.
+	 */
+	if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
+		*stat = 0;
+		return 0;
+	}
+	nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+	/*
+	 * If non-leaf, copy a key and a ptr to the left block.
+	 */
+	if (level > 0) {
+		lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
+		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
+		*lkp = *rkp;
+		xfs_inobt_log_keys(cur, lbp, nrec, nrec);
+		lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
+		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+		if (error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level))
+			return error;
+#endif
+		*lpp = *rpp; /* INT_: no-change copy */
+		xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
+	}
+	/*
+	 * If leaf, copy a record to the left block.
+	 */
+	else {
+		lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
+		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
+		*lrp = *rrp;
+		xfs_inobt_log_recs(cur, lbp, nrec, nrec);
+	}
+	/*
+	 * Bump and log left's numrecs, decrement and log right's numrecs.
+	 */
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
+	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
+#ifdef DEBUG
+	if (level > 0)
+		xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
+	else
+		xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
+#endif
+	INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
+	xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
+	/*
+	 * Slide the contents of right down one entry.
+	 */
+	if (level > 0) {
+#ifdef DEBUG
+		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+					level))
+				return error;
+		}
+#endif
+		ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+	} else {
+		ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
+		rkp = &key;
+	}
+	/*
+	 * Update the parent key values of right.
+	 */
+	if (error = xfs_inobt_updkey(cur, rkp, level + 1))
+		return error;
+	/*
+	 * Slide the cursor value left one.
+	 */
+	cur->bc_ptrs[level]--;
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Allocate a new root block, fill it in.
+ */
+STATIC int				/* error */
+xfs_inobt_newroot(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			*stat)	/* success/failure */
+{
+	xfs_agi_t		*agi;	/* a.g. inode header */
+	xfs_alloc_arg_t		args;	/* allocation argument structure */
+	xfs_inobt_block_t	*block;	/* one half of the old root block */
+	xfs_buf_t		*bp;	/* buffer containing block */
+	int			error;	/* error return value */
+	xfs_inobt_key_t		*kp;	/* btree key pointer */
+	xfs_agblock_t		lbno;	/* left block number */
+	xfs_buf_t		*lbp;	/* left buffer pointer */
+	xfs_inobt_block_t	*left;	/* left btree block */
+	xfs_buf_t		*nbp;	/* new (root) buffer */
+	xfs_inobt_block_t	*new;	/* new (root) btree block */
+	int			nptr;	/* new value for key index, 1 or 2 */
+	xfs_inobt_ptr_t		*pp;	/* btree address pointer */
+	xfs_agblock_t		rbno;	/* right block number */
+	xfs_buf_t		*rbp;	/* right buffer pointer */
+	xfs_inobt_block_t	*right;	/* right btree block */
+	xfs_inobt_rec_t		*rp;	/* btree record pointer */
+
+	ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
+
+	/*
+	 * Get a block & a buffer.
+	 */
+	agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
+	args.tp = cur->bc_tp;
+	args.mp = cur->bc_mp;
+	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
+		INT_GET(agi->agi_root, ARCH_CONVERT));
+	args.mod = args.minleft = args.alignment = args.total = args.wasdel =
+		args.isfl = args.userdata = args.minalignslop = 0;
+	args.minlen = args.maxlen = args.prod = 1;
+	args.type = XFS_ALLOCTYPE_NEAR_BNO;
+	if (error = xfs_alloc_vextent(&args))
+		return error;
+	/*
+	 * None available, we fail.
+	 */
+	if (args.fsbno == NULLFSBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	ASSERT(args.len == 1);
+	nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
+	new = XFS_BUF_TO_INOBT_BLOCK(nbp);
+	/*
+	 * Set the root data in the a.g. inode structure.
+	 */
+	INT_SET(agi->agi_root, ARCH_CONVERT, args.agbno);
+	INT_MOD(agi->agi_level, ARCH_CONVERT, 1);
+	xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
+		XFS_AGI_ROOT | XFS_AGI_LEVEL);
+	/*
+	 * At the previous root level there are now two blocks: the old
+	 * root, and the new block generated when it was split.
+	 * We don't know which one the cursor is pointing at, so we
+	 * set up variables "left" and "right" for each case.
+	 */
+	bp = cur->bc_bufs[cur->bc_nlevels - 1];
+	block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp))
+		return error;
+#endif
+	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		/*
+		 * Our block is left, pick up the right block.
+		 */
+		lbp = bp;
+		lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
+		left = block;
+		rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+		if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
+				rbno, 0, &rbp, XFS_INO_BTREE_REF))
+			return error;
+		bp = rbp;
+		right = XFS_BUF_TO_INOBT_BLOCK(rbp);
+		if (error = xfs_btree_check_sblock(cur, right,
+				cur->bc_nlevels - 1, rbp))
+			return error;
+		nptr = 1;
+	} else {
+		/*
+		 * Our block is right, pick up the left block.
+		 */
+		rbp = bp;
+		rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
+		right = block;
+		lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+		if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
+				lbno, 0, &lbp, XFS_INO_BTREE_REF))
+			return error;
+		bp = lbp;
+		left = XFS_BUF_TO_INOBT_BLOCK(lbp);
+		if (error = xfs_btree_check_sblock(cur, left,
+				cur->bc_nlevels - 1, lbp))
+			return error;
+		nptr = 2;
+	}
+	/*
+	 * Fill in the new block's btree header and log it.
+	 */
+	INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
+	INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
+	INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
+	INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+        INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+	xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
+	ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
+	/*
+	 * Fill in the key data in the new root.
+	 */
+	kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
+	if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
+		kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
+		kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
+	} else {
+		rp = XFS_INOBT_REC_ADDR(left, 1, cur);
+		INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT);
+		rp = XFS_INOBT_REC_ADDR(right, 1, cur);
+		INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT);
+	}
+	xfs_inobt_log_keys(cur, nbp, 1, 2);
+	/*
+	 * Fill in the pointer data in the new root.
+	 */
+	pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
+	INT_SET(pp[0], ARCH_CONVERT, lbno);
+	INT_SET(pp[1], ARCH_CONVERT, rbno);
+	xfs_inobt_log_ptrs(cur, nbp, 1, 2);
+	/*
+	 * Fix up the cursor.
+	 */
+	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
+	cur->bc_ptrs[cur->bc_nlevels] = nptr;
+	cur->bc_nlevels++;
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int				/* error */
+xfs_inobt_rshift(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level to shift record on */
+	int			*stat)	/* success/failure */
+{
+	int			error;	/* error return value */
+	int			i;	/* loop index */
+	xfs_inobt_key_t		key;	/* key value for leaf level upward */
+	xfs_buf_t		*lbp;	/* buffer for left (current) block */
+	xfs_inobt_block_t	*left;	/* left (current) btree block */
+	xfs_inobt_key_t		*lkp;	/* key pointer for left block */
+	xfs_inobt_ptr_t		*lpp;	/* address pointer for left block */
+	xfs_inobt_rec_t		*lrp;	/* record pointer for left block */
+	xfs_buf_t		*rbp;	/* buffer for right neighbor block */
+	xfs_inobt_block_t	*right;	/* right neighbor btree block */
+	xfs_inobt_key_t		*rkp;	/* key pointer for right block */
+	xfs_inobt_ptr_t		*rpp;	/* address pointer for right block */
+	xfs_inobt_rec_t		*rrp;	/* record pointer for right block */
+	xfs_btree_cur_t		*tcur;	/* temporary cursor */
+
+	/*
+	 * Set up variables for this block as "left".
+	 */
+	lbp = cur->bc_bufs[level];
+	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+		return error;
+#endif
+	/*
+	 * If we've got no right sibling then we can't shift an entry right.
+	 */
+	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * If the cursor entry is the one that would be moved, don't
+	 * do it... it's too complicated.
+	 */
+	if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Set up the right neighbor as "right".
+	 */
+	if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+			cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
+			XFS_INO_BTREE_REF))
+		return error;
+	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
+	if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+		return error;
+	/*
+	 * If it's full, it can't take another entry.
+	 */
+	if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Make a hole at the start of the right neighbor block, then
+	 * copy the last left block entry to the hole.
+	 */
+	if (level > 0) {
+		lkp = XFS_INOBT_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		lpp = XFS_INOBT_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
+		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+		for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))
+				return error;
+		}
+#endif
+		ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+#ifdef DEBUG
+		if (error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))
+			return error;
+#endif
+		*rkp = *lkp; /* INT_: no change copy */
+		*rpp = *lpp; /* INT_: no change copy */
+		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+	} else {
+		lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
+		ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		*rrp = *lrp;
+		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
+		rkp = &key;
+	}
+	/*
+	 * Decrement and log left's numrecs, bump and log right's numrecs.
+	 */
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
+	INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+#ifdef DEBUG
+	if (level > 0)
+		xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
+	else
+		xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
+#endif
+	xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
+	/*
+	 * Using a temporary cursor, update the parent key values of the
+	 * block on the right.
+	 */
+	if (error = xfs_btree_dup_cursor(cur, &tcur))
+		return error;
+	xfs_btree_lastrec(tcur, level);
+	if ((error = xfs_inobt_increment(tcur, level, &i)) ||
+	    (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
+		xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+		return error;
+	}
+	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and its first record (to be inserted into parent).
+ */
+STATIC int				/* error */
+xfs_inobt_split(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level to split */
+	xfs_agblock_t		*bnop,	/* output: block number allocated */
+	xfs_inobt_key_t		*keyp,	/* output: first key of new block */
+	xfs_btree_cur_t		**curp,	/* output: new cursor */
+	int			*stat)	/* success/failure */
+{
+	xfs_alloc_arg_t		args;	/* allocation argument structure */
+	int			error;	/* error return value */
+	int			i;	/* loop index/record number */
+	xfs_agblock_t		lbno;	/* left (current) block number */
+	xfs_buf_t		*lbp;	/* buffer for left block */
+	xfs_inobt_block_t	*left;	/* left (current) btree block */
+	xfs_inobt_key_t		*lkp;	/* left btree key pointer */
+	xfs_inobt_ptr_t		*lpp;	/* left btree address pointer */
+	xfs_inobt_rec_t		*lrp;	/* left btree record pointer */
+	xfs_buf_t		*rbp;	/* buffer for right block */
+	xfs_inobt_block_t	*right;	/* right (new) btree block */
+	xfs_inobt_key_t		*rkp;	/* right btree key pointer */
+	xfs_inobt_ptr_t		*rpp;	/* right btree address pointer */
+	xfs_inobt_rec_t		*rrp;	/* right btree record pointer */
+
+	/*
+	 * Set up left block (current one).
+	 */
+	lbp = cur->bc_bufs[level];
+	args.tp = cur->bc_tp;
+	args.mp = cur->bc_mp;
+	lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
+	/*
+	 * Allocate the new block.
+	 * If we can't do it, we're toast.  Give up.
+	 */
+	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno);
+	args.mod = args.minleft = args.alignment = args.total = args.wasdel =
+		args.isfl = args.userdata = args.minalignslop = 0;
+	args.minlen = args.maxlen = args.prod = 1;
+	args.type = XFS_ALLOCTYPE_NEAR_BNO;
+	if (error = xfs_alloc_vextent(&args))
+		return error;
+	if (args.fsbno == NULLFSBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	ASSERT(args.len == 1);
+	rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
+	/*
+	 * Set up the new block as "right".
+	 */
+	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
+	/*
+	 * "Left" is the current (according to the cursor) block.
+	 */
+	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+		return error;
+#endif
+	/*
+	 * Fill in the btree header for the new block.
+	 */
+	INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
+	right->bb_level = left->bb_level; /* INT_: direct copy */
+	INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
+	/*
+	 * Make sure that if there's an odd number of entries now, that
+	 * each new block will have the same number of entries.
+	 */
+	if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
+	    cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
+		INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+	i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+	/*
+	 * For non-leaf blocks, copy keys and addresses over to the new block.
+	 */
+	if (level > 0) {
+		lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
+		lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
+		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
+		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+			if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))
+				return error;
+		}
+#endif
+		bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+		bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+		xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		*keyp = *rkp;
+	}
+	/*
+	 * For leaf blocks, copy records over to the new block.
+	 */
+	else {
+		lrp = XFS_INOBT_REC_ADDR(left, i, cur);
+		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
+		bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+		xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+		keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
+	}
+	/*
+	 * Find the left block number by looking in the buffer.
+	 * Adjust numrecs, sibling pointers.
+	 */
+	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
+	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
+	INT_SET(left->bb_rightsib, ARCH_CONVERT, args.agbno);
+	INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+	xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
+	xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+	/*
+	 * If there's a block to the new block's right, make that block
+	 * point back to right instead of to left.
+	 */
+	if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+		xfs_inobt_block_t	*rrblock;	/* rr btree block */
+		xfs_buf_t		*rrbp;		/* buffer for rrblock */
+
+		if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
+				INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
+				XFS_INO_BTREE_REF))
+			return error;
+		rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
+		if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))
+			return error;
+		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.agbno);
+		xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
+	}
+	/*
+	 * If the cursor is really in the right block, move it there.
+	 * If it's just pointing past the last entry in left, then we'll
+	 * insert there, so don't change anything in that case.
+	 */
+	if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+		xfs_btree_setbuf(cur, level, rbp);
+		cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+	}
+	/*
+	 * If there are more levels, we'll need another cursor which refers
+	 * the right block, no matter where this cursor was.
+	 */
+	if (level + 1 < cur->bc_nlevels) {
+		if (error = xfs_btree_dup_cursor(cur, curp))
+			return error;
+		(*curp)->bc_ptrs[level + 1]++;
+	}
+	*bnop = args.agbno;
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Update keys at all levels from here to the root along the cursor's path.
+ */
+STATIC int				/* error */
+xfs_inobt_updkey(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_inobt_key_t		*keyp,	/* new key value to update to */
+	int			level)	/* starting level for update */
+{
+	int			ptr;	/* index of key in block */
+
+	/*
+	 * Go up the tree from this level toward the root.
+	 * At each level, update the key value to the value input.
+	 * Stop when we reach a level where the cursor isn't pointing
+	 * at the first entry in the block.
+	 */
+	for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+		xfs_buf_t		*bp;	/* buffer for block */
+		xfs_inobt_block_t	*block;	/* btree block */
+#ifdef DEBUG
+		int			error;	/* error return value */
+#endif
+		xfs_inobt_key_t		*kp;	/* ptr to btree block keys */
+
+		bp = cur->bc_bufs[level];
+		block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_sblock(cur, block, level, bp))
+			return error;
+#endif
+		ptr = cur->bc_ptrs[level];
+		kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
+		*kp = *keyp;
+		xfs_inobt_log_keys(cur, bp, ptr, ptr);
+	}
+	return 0;
+}
+
+/*
+ * Externally visible routines.
+ */
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int					/* error */
+xfs_inobt_decrement(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level in btree, 0 is leaf */
+	int			*stat)	/* success/failure */
+{
+	xfs_inobt_block_t	*block;	/* btree block */
+	int			error;
+	int			lev;	/* btree level */
+
+	ASSERT(level < cur->bc_nlevels);
+	/*
+	 * Read-ahead to the left at this level.
+	 */
+	xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+	/*
+	 * Decrement the ptr at this level.  If we're still in the block
+	 * then we're done.
+	 */
+	if (--cur->bc_ptrs[level] > 0) {
+		*stat = 1;
+		return 0;
+	}
+	/*
+	 * Get a pointer to the btree block.
+	 */
+	block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, level,
+			cur->bc_bufs[level]))
+		return error;
+#endif
+	/*
+	 * If we just went off the left edge of the tree, return failure.
+	 */
+	if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * March up the tree decrementing pointers.
+	 * Stop when we don't go off the left edge of a block.
+	 */
+	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+		if (--cur->bc_ptrs[lev] > 0)
+			break;
+		/*
+		 * Read-ahead the left block, we're going to read it
+		 * in the next loop.
+		 */
+		xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+	}
+	/*
+	 * If we went off the root then we are seriously confused.
+	 */
+	ASSERT(lev < cur->bc_nlevels);
+	/*
+	 * Now walk back down the tree, fixing up the cursor's buffer
+	 * pointers and key numbers.
+	 */
+	for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
+		xfs_agblock_t	agbno;	/* block number of btree block */
+		xfs_buf_t	*bp;	/* buffer containing btree block */
+
+		agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+				cur->bc_private.i.agno, agbno, 0, &bp,
+				XFS_INO_BTREE_REF))
+			return error;
+		lev--;
+		xfs_btree_setbuf(cur, lev, bp);
+		block = XFS_BUF_TO_INOBT_BLOCK(bp);
+		if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+			return error;
+		cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	}
+	*stat = 1;
+	return 0;
+}
+
+/* 
+ * Get the data from the pointed-to record.
+ */
+int					/* error */
+xfs_inobt_get_rec(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_agino_t		*ino,	/* output: starting inode of chunk */
+	__int32_t		*fcnt,	/* output: number of free inodes */
+	xfs_inofree_t		*free,	/* output: free inode mask */
+	int			*stat,	/* output: success/failure */
+        xfs_arch_t              arch)   /* input: architecture */
+{
+	xfs_inobt_block_t	*block;	/* btree block */
+	xfs_buf_t		*bp;	/* buffer containing btree block */
+#ifdef DEBUG
+	int			error;	/* error return value */
+#endif
+	int			ptr;	/* record number */
+	xfs_inobt_rec_t		*rec;	/* record data */
+
+	bp = cur->bc_bufs[0];
+	ptr = cur->bc_ptrs[0];
+	block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, 0, bp))
+		return error;
+#endif
+	/*
+	 * Off the right end or left end, return failure.
+	 */
+	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * Point to the record and extract its data.
+	 */
+	rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
+        ASSERT(arch == ARCH_NOCONVERT || arch == ARCH_CONVERT);
+        if (arch == ARCH_NOCONVERT) {
+	    *ino = INT_GET(rec->ir_startino, ARCH_CONVERT);
+	    *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT);
+	    *free = INT_GET(rec->ir_free, ARCH_CONVERT);
+        } else {
+	    INT_COPY(*ino, rec->ir_startino, ARCH_CONVERT);
+	    INT_COPY(*fcnt, rec->ir_freecount, ARCH_CONVERT);
+	    INT_COPY(*free, rec->ir_free, ARCH_CONVERT);
+        }
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int					/* error */
+xfs_inobt_increment(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level in btree, 0 is leaf */
+	int			*stat)	/* success/failure */
+{
+	xfs_inobt_block_t	*block;	/* btree block */
+	xfs_buf_t		*bp;	/* buffer containing btree block */
+	int			error;	/* error return value */
+	int			lev;	/* btree level */
+
+	ASSERT(level < cur->bc_nlevels);
+	/*
+	 * Read-ahead to the right at this level.
+	 */
+	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+	/*
+	 * Get a pointer to the btree block.
+	 */
+	bp = cur->bc_bufs[level];
+	block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, level, bp))
+		return error;
+#endif
+	/*
+	 * Increment the ptr at this level.  If we're still in the block
+	 * then we're done.
+	 */
+	if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+		*stat = 1;
+		return 0;
+	}
+	/*
+	 * If we just went off the right edge of the tree, return failure.
+	 */
+	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+		*stat = 0;
+		return 0;
+	}
+	/*
+	 * March up the tree incrementing pointers.
+	 * Stop when we don't go off the right edge of a block.
+	 */
+	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+		bp = cur->bc_bufs[lev];
+		block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+		if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+			return error;
+#endif
+		if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+			break;
+		/*
+		 * Read-ahead the right block, we're going to read it
+		 * in the next loop.
+		 */
+		xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+	}
+	/*
+	 * If we went off the root then we are seriously confused.
+	 */
+	ASSERT(lev < cur->bc_nlevels);
+	/*
+	 * Now walk back down the tree, fixing up the cursor's buffer
+	 * pointers and key numbers.
+	 */
+	for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
+	     lev > level; ) {
+		xfs_agblock_t	agbno;	/* block number of btree block */
+
+		agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+		if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+				cur->bc_private.i.agno, agbno, 0, &bp,
+				XFS_INO_BTREE_REF))
+			return error;
+		lev--;
+		xfs_btree_setbuf(cur, lev, bp);
+		block = XFS_BUF_TO_INOBT_BLOCK(bp);
+		if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+			return error;
+		cur->bc_ptrs[lev] = 1;
+	}
+	*stat = 1;
+	return 0;
+}
+
+/*
+ * Insert the current record at the point referenced by cur.
+ * The cursor may be inconsistent on return if splits have been done.
+ */
+int					/* error */
+xfs_inobt_insert(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	int		*stat)		/* success/failure */
+{
+	int		error;		/* error return value */
+	int		i;		/* result value, 0 for failure */
+	int		level;		/* current level number in btree */
+	xfs_agblock_t	nbno;		/* new block number (split result) */
+	xfs_btree_cur_t	*ncur;		/* new cursor (split result) */
+	xfs_inobt_rec_t	nrec;		/* record being inserted this level */
+	xfs_btree_cur_t	*pcur;		/* previous level's cursor */
+
+	level = 0;
+	nbno = NULLAGBLOCK;
+	INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino);
+	INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount);
+	INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free);
+	ncur = (xfs_btree_cur_t *)0;
+	pcur = cur;
+	/*
+	 * Loop going up the tree, starting at the leaf level.
+	 * Stop when we don't get a split block, that must mean that
+	 * the insert is finished with this level.
+	 */
+	do {
+		/*
+		 * Insert nrec/nbno into this level of the tree.
+		 * Note if we fail, nbno will be null.
+		 */
+		if (error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
+				&i)) {
+			if (pcur != cur)
+				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+			return error;
+		}
+		/*
+		 * See if the cursor we just used is trash.
+		 * Can't trash the caller's cursor, but otherwise we should
+		 * if ncur is a new cursor or we're about to be done.
+		 */
+		if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
+			cur->bc_nlevels = pcur->bc_nlevels;
+			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+		}
+		/*
+		 * If we got a new cursor, switch to it.
+		 */
+		if (ncur) {
+			pcur = ncur;
+			ncur = (xfs_btree_cur_t *)0;
+		}
+	} while (nbno != NULLAGBLOCK);
+	*stat = i;
+	return 0;
+}
+
+/*
+ * Lookup the record equal to ino in the btree given by cur.
+ */
+int					/* error */
+xfs_inobt_lookup_eq(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	xfs_agino_t	ino,		/* starting inode of chunk */
+	__int32_t	fcnt,		/* free inode count */
+	xfs_inofree_t	free,		/* free inode mask */
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.i.ir_startino = ino;
+	cur->bc_rec.i.ir_freecount = fcnt;
+	cur->bc_rec.i.ir_free = free;
+	return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to ino
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_inobt_lookup_ge(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	xfs_agino_t	ino,		/* starting inode of chunk */
+	__int32_t	fcnt,		/* free inode count */
+	xfs_inofree_t	free,		/* free inode mask */
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.i.ir_startino = ino;
+	cur->bc_rec.i.ir_freecount = fcnt;
+	cur->bc_rec.i.ir_free = free;
+	return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to ino
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_inobt_lookup_le(
+	xfs_btree_cur_t	*cur,		/* btree cursor */
+	xfs_agino_t	ino,		/* starting inode of chunk */
+	__int32_t	fcnt,		/* free inode count */
+	xfs_inofree_t	free,		/* free inode mask */
+	int		*stat)		/* success/failure */
+{
+	cur->bc_rec.i.ir_startino = ino;
+	cur->bc_rec.i.ir_freecount = fcnt;
+	cur->bc_rec.i.ir_free = free;
+	return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur, to the value given
+ * by [ino, fcnt, free].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+int					/* error */
+xfs_inobt_update(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	xfs_agino_t		ino,	/* starting inode of chunk */
+	__int32_t		fcnt,	/* free inode count */
+	xfs_inofree_t		free)	/* free inode mask */
+{
+	xfs_inobt_block_t	*block;	/* btree block to update */
+	xfs_buf_t		*bp;	/* buffer containing btree block */
+	int			error;	/* error return value */
+	int			ptr;	/* current record number (updating) */
+	xfs_inobt_rec_t		*rp;	/* pointer to updated record */
+
+	/*
+	 * Pick up the current block.
+	 */
+	bp = cur->bc_bufs[0];
+	block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+	if (error = xfs_btree_check_sblock(cur, block, 0, bp))
+		return error;
+#endif
+	/*
+	 * Get the address of the rec to be updated.
+	 */
+	ptr = cur->bc_ptrs[0];
+	rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
+	/*
+	 * Fill in the new contents and log them.
+	 */
+	INT_SET(rp->ir_startino, ARCH_CONVERT, ino);
+	INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt);
+	INT_SET(rp->ir_free, ARCH_CONVERT, free);
+	xfs_inobt_log_recs(cur, bp, ptr, ptr);
+	/*
+	 * Updating first record in leaf. Pass new key value up to our parent.
+	 */
+	if (ptr == 1) {
+		xfs_inobt_key_t	key;	/* key containing [ino] */
+
+		INT_SET(key.ir_startino, ARCH_CONVERT, ino);
+		if (error = xfs_inobt_updkey(cur, &key, 1))
+			return error;
+	}
+	return 0;
+}
diff --git a/libxfs/xfs_inode.c b/libxfs/xfs_inode.c
new file mode 100644
index 000000000..36bf1bd9e
--- /dev/null
+++ b/libxfs/xfs_inode.c
@@ -0,0 +1,1371 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+xfs_zone_t *xfs_ifork_zone;
+xfs_zone_t *xfs_inode_zone;
+
+#ifdef DEBUG
+void
+xfs_inobp_check(
+	xfs_mount_t	*mp,
+	xfs_buf_t	*bp)
+{
+	int		i;
+	int		j;
+	xfs_dinode_t	*dip;
+
+	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
+
+	for (i = 0; i < j; i++) {
+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+					i * mp->m_sb.sb_inodesize);
+		if (INT_ISZERO(dip->di_next_unlinked, ARCH_CONVERT))  {
+			xfs_fs_cmn_err(CE_ALERT, mp,
+				"Detected a bogus zero next_unlinked field in incore inode buffer 0x%p.  About to pop an ASSERT.",
+				bp);
+			ASSERT(!INT_ISZERO(dip->di_next_unlinked, ARCH_CONVERT));
+		}
+	}
+}
+#endif
+
+
+/*
+ * This routine is called to map an inode to the buffer containing
+ * the on-disk version of the inode.  It returns a pointer to the
+ * buffer containing the on-disk inode in the bpp parameter, and in
+ * the dip parameter it returns a pointer to the on-disk inode within
+ * that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and
+ * dipp are undefined.
+ *
+ * If the inode is new and has not yet been initialized, use xfs_imap()
+ * to determine the size and location of the buffer to read from disk.
+ * If the inode has already been mapped to its buffer and read in once,
+ * then use the mapping information stored in the inode rather than
+ * calling xfs_imap().  This allows us to avoid the overhead of looking
+ * at the inode btree for small block file systems (see xfs_dilocate()).
+ * We can tell whether the inode has been mapped in before by comparing
+ * its disk block address to 0.  Only uninitialized inodes will have
+ * 0 for the disk block address.
+ */
+int
+xfs_itobp(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,	
+	xfs_dinode_t	**dipp,
+	xfs_buf_t	**bpp,
+	xfs_daddr_t	bno)
+{
+	xfs_buf_t	*bp;
+	int		error;
+	xfs_imap_t	imap;
+#ifdef __KERNEL__
+	int		i;
+	int		ni;
+#endif
+
+	if (ip->i_blkno == (xfs_daddr_t)0) {
+		/*
+		 * Call the space management code to find the location of the
+		 * inode on disk.
+		 */
+		imap.im_blkno = bno;
+		error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP);
+		if (error != 0) {
+			return error;
+		}
+
+		/*
+		 * If the inode number maps to a block outside the bounds
+		 * of the file system then return NULL rather than calling
+		 * read_buf and panicing when we get an error from the
+		 * driver.
+		 */
+		if ((imap.im_blkno + imap.im_len) >
+		    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+			return XFS_ERROR(EINVAL);
+		}
+
+		/*
+		 * Fill in the fields in the inode that will be used to
+		 * map the inode to its buffer from now on.
+		 */
+		ip->i_blkno = imap.im_blkno;
+		ip->i_len = imap.im_len;
+		ip->i_boffset = imap.im_boffset;
+	} else {
+		/*
+		 * We've already mapped the inode once, so just use the
+		 * mapping that we saved the first time.
+		 */
+		imap.im_blkno = ip->i_blkno;
+		imap.im_len = ip->i_len;
+		imap.im_boffset = ip->i_boffset;
+	}
+	ASSERT(bno == 0 || bno == imap.im_blkno);
+
+	/*
+	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
+	 * default to just a read_buf() call.
+	 */
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
+				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
+
+	if (error) {
+		return error;
+	}
+#ifdef __KERNEL__
+	/*
+	 * Validate the magic number and version of every inode in the buffer
+	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+	 */
+#ifdef DEBUG
+	ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
+#else
+	ni = 1;
+#endif
+	for (i = 0; i < ni; i++) {
+		int		di_ok;
+		xfs_dinode_t	*dip;
+
+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+					(i << mp->m_sb.sb_inodelog));
+		di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC &&
+			    XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT));
+		if (XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
+				 XFS_RANDOM_ITOBP_INOTOBP)) {
+#ifdef DEBUG
+			prdev("bad inode magic/vsn daddr 0x%Lx #%d (magic=%x)", 
+				mp->m_dev, imap.im_blkno, i,
+				INT_GET(dip->di_core.di_magic, ARCH_CONVERT));
+#endif
+			xfs_trans_brelse(tp, bp);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+	}
+#endif	/* __KERNEL__ */
+
+	xfs_inobp_check(mp, bp);
+
+	/*
+	 * Mark the buffer as an inode buffer now that it looks good
+	 */
+	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
+
+	/*
+	 * Set *dipp to point to the on-disk inode in the buffer.
+	 */
+	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Move inode type and inode format specific information from the
+ * on-disk inode to the in-core inode.  For fifos, devs, and sockets
+ * this means set if_rdev to the proper value.  For files, directories,
+ * and symlinks this means to bring in the in-line data or extent
+ * pointers.  For a file in B-tree format, only the root is immediately
+ * brought in-core.  The rest will be in-lined in if_extents when it
+ * is first referenced (see xfs_iread_extents()).
+ */
+STATIC int
+xfs_iformat(
+	xfs_inode_t		*ip,
+	xfs_dinode_t		*dip)
+{
+	xfs_attr_shortform_t	*atp;
+	int			size;
+	int			error;
+        xfs_fsize_t             di_size;
+	ip->i_df.if_ext_max =
+		XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+	error = 0;
+
+	if (INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 
+                INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
+	    INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT)) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt dinode %Lu, extent total = %d, nblocks = %Ld.  Unmount and run xfs_repair.",
+			ip->i_ino,
+			(int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
+			INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT));
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	if (INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt dinode %Lu, forkoff = 0x%x.  Unmount and run xfs_repair.",
+			ip->i_ino, (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	switch (ip->i_d.di_mode & IFMT) {
+	case IFIFO:
+	case IFCHR:
+	case IFBLK:
+	case IFSOCK:
+		if (INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV)
+			return XFS_ERROR(EFSCORRUPTED);
+		ip->i_d.di_size = 0;
+		ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
+		break;
+
+	case IFREG:
+	case IFLNK:
+	case IFDIR:
+		switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) {
+		case XFS_DINODE_FMT_LOCAL:
+			/*
+			 * no local regular files yet
+			 */
+			if ((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFREG) {
+				xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+					"corrupt inode (local format for regular file) %Lu.  Unmount and run xfs_repair.",
+					ip->i_ino);
+				return XFS_ERROR(EFSCORRUPTED);
+			}
+                        
+                        di_size=INT_GET(dip->di_core.di_size, ARCH_CONVERT);
+			if (di_size >
+			    XFS_DFORK_DSIZE_ARCH(dip, ip->i_mount, ARCH_CONVERT)) {
+				xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+					"corrupt inode %Lu (bad size %Ld for local inode).  Unmount and run xfs_repair.",
+					ip->i_ino, di_size);
+				return XFS_ERROR(EFSCORRUPTED);
+			}
+
+			size = (int)di_size;
+			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
+			break;
+		case XFS_DINODE_FMT_EXTENTS:
+			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
+			break;
+		case XFS_DINODE_FMT_BTREE:
+			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
+			break;
+		default:
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+		break;
+
+	default:
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	if (error) {
+		return error;
+        }
+	if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT))
+		return 0;
+	ASSERT(ip->i_afp == NULL);
+	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
+	ip->i_afp->if_ext_max =
+		XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+	switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) {
+	case XFS_DINODE_FMT_LOCAL:
+		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+		size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT);
+		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
+		break;
+	default:
+		error = XFS_ERROR(EFSCORRUPTED);
+		break;
+	}
+	if (error) {
+		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+		ip->i_afp = NULL;
+		xfs_idestroy_fork(ip, XFS_DATA_FORK);
+	}
+	return error;
+}
+
+/*
+ * The file is in-lined in the on-disk inode.
+ * If it fits into if_inline_data, then copy
+ * it there, otherwise allocate a buffer for it
+ * and copy the data there.  Either way, set
+ * if_data to point at the data.
+ * If we allocate a buffer for the data, make
+ * sure that its size is a multiple of 4 and
+ * record the real size in i_real_bytes.
+ */
+STATIC int
+xfs_iformat_local(
+	xfs_inode_t	*ip,
+	xfs_dinode_t	*dip,
+	int		whichfork,
+	int		size)
+{
+	xfs_ifork_t	*ifp;
+	int		real_size;
+
+	/*
+	 * If the size is unreasonable, then something
+	 * is wrong and we just bail out rather than crash in
+	 * kmem_alloc() or bcopy() below.
+	 */
+	if (size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu (bad size %d for local fork, size = %d).  Unmount and run xfs_repair.",
+			ip->i_ino, size,
+			XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT));
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	real_size = 0;
+	if (size == 0)
+		ifp->if_u1.if_data = NULL;
+	else if (size <= sizeof(ifp->if_u2.if_inline_data))
+		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+	else {
+		real_size = roundup(size, 4);
+		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+	}
+	ifp->if_bytes = size;
+	ifp->if_real_bytes = real_size;
+	if (size)
+		bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_data, size);
+	ifp->if_flags &= ~XFS_IFEXTENTS;
+	ifp->if_flags |= XFS_IFINLINE;
+	return 0;
+}
+
+/*
+ * The file consists of a set of extents all
+ * of which fit into the on-disk inode.
+ * If there are few enough extents to fit into
+ * the if_inline_ext, then copy them there.
+ * Otherwise allocate a buffer for them and copy
+ * them into it.  Either way, set if_extents
+ * to point at the extents.
+ */
+STATIC int
+xfs_iformat_extents(
+	xfs_inode_t	*ip,
+	xfs_dinode_t	*dip,
+	int		whichfork)
+{
+	xfs_ifork_t	*ifp;
+	int		nex;
+	int		real_size;
+	int		size;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
+	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
+
+	/*
+	 * If the number of extents is unreasonable, then something
+	 * is wrong and we just bail out rather than crash in
+	 * kmem_alloc() or bcopy() below.
+	 */
+	if (size < 0 || size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu ((a)extents = %d).  Unmount and run xfs_repair.",
+			ip->i_ino, nex);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	real_size = 0;
+	if (nex == 0)
+		ifp->if_u1.if_extents = NULL;
+	else if (nex <= XFS_INLINE_EXTS)
+		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+	else {
+		ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
+		ASSERT(ifp->if_u1.if_extents != NULL);
+		real_size = size;
+	}
+	ifp->if_bytes = size;
+	ifp->if_real_bytes = real_size;
+	if (size) {
+		xfs_validate_extents(
+			(xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT),
+			nex, XFS_EXTFMT_INODE(ip));
+		bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_extents,
+		      size);
+		xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
+			whichfork);
+		if (whichfork != XFS_DATA_FORK ||
+			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
+				if (xfs_check_nostate_extents(
+				    ifp->if_u1.if_extents, nex))
+					return XFS_ERROR(EFSCORRUPTED);
+	}
+	ifp->if_flags |= XFS_IFEXTENTS;
+	return 0;
+}
+
+/*
+ * The file has too many extents to fit into
+ * the inode, so they are in B-tree format.
+ * Allocate a buffer for the root of the B-tree
+ * and copy the root into it.  The i_extents
+ * field will remain NULL until all of the
+ * extents are read in (when they are needed).
+ */
+STATIC int
+xfs_iformat_btree(
+	xfs_inode_t		*ip,
+	xfs_dinode_t		*dip,
+	int			whichfork)
+{
+	xfs_bmdr_block_t	*dfp;
+	xfs_ifork_t		*ifp;
+	/* REFERENCED */
+	int			nrecs;
+	int			size;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+	size = XFS_BMAP_BROOT_SPACE(dfp);
+	nrecs = XFS_BMAP_BROOT_NUMRECS(dfp);
+
+	/*
+	 * blow out if -- fork has less extents than can fit in
+	 * fork (fork shouldn't be a btree format), root btree
+	 * block has more records than can fit into the fork,
+	 * or the number of extents is greater than the number of
+	 * blocks.
+	 */
+	if (XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
+	    || XFS_BMDR_SPACE_CALC(nrecs) >
+			XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)
+	    || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu (btree).  Unmount and run xfs_repair.",
+			ip->i_ino);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	ifp->if_broot_bytes = size;
+	ifp->if_broot = kmem_alloc(size, KM_SLEEP);
+	ASSERT(ifp->if_broot != NULL);
+	/*
+	 * Copy and convert from the on-disk structure
+	 * to the in-memory structure.
+	 */
+	xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT),
+		ifp->if_broot, size);
+	ifp->if_flags &= ~XFS_IFEXTENTS;
+	ifp->if_flags |= XFS_IFBROOT;
+
+	return 0;
+}
+
+/*
+ * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk
+ * and native format
+ *
+ * buf  = on-disk representation 
+ * dip  = native representation 
+ * dir  = direction - +ve -> disk to native
+ *                    -ve -> native to disk
+ * arch = on-disk architecture
+ */
+ 
+void 
+xfs_xlate_dinode_core(xfs_caddr_t buf, xfs_dinode_core_t *dip, 
+    int dir, xfs_arch_t arch)
+{
+    xfs_dinode_core_t   *buf_core;
+    xfs_dinode_core_t   *mem_core;
+    
+    ASSERT(dir);
+    
+    buf_core=(xfs_dinode_core_t*)buf;
+    mem_core=(xfs_dinode_core_t*)dip;
+    
+    if (arch == ARCH_NOCONVERT) {
+        if (dir>0) {
+            bcopy((xfs_caddr_t)buf_core, (xfs_caddr_t)mem_core, sizeof(xfs_dinode_core_t));
+        } else {
+            bcopy((xfs_caddr_t)mem_core, (xfs_caddr_t)buf_core, sizeof(xfs_dinode_core_t));
+        }
+        return;
+    }
+    
+    INT_XLATE(buf_core->di_magic,       mem_core->di_magic,        dir, arch);
+    INT_XLATE(buf_core->di_mode,        mem_core->di_mode,         dir, arch);
+    INT_XLATE(buf_core->di_version,     mem_core->di_version,      dir, arch);
+    INT_XLATE(buf_core->di_format,      mem_core->di_format,       dir, arch);
+    INT_XLATE(buf_core->di_onlink,      mem_core->di_onlink,       dir, arch);
+    INT_XLATE(buf_core->di_uid,         mem_core->di_uid,          dir, arch);
+    INT_XLATE(buf_core->di_gid,         mem_core->di_gid,          dir, arch);
+    INT_XLATE(buf_core->di_nlink,       mem_core->di_nlink,        dir, arch);
+    INT_XLATE(buf_core->di_projid,      mem_core->di_projid,       dir, arch);
+    
+    if (dir>0) {
+        bcopy(buf_core->di_pad, mem_core->di_pad, sizeof(buf_core->di_pad));
+    } else {
+        bcopy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad));
+    }
+    
+    INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec,  dir, arch);
+    INT_XLATE(buf_core->di_atime.t_nsec,mem_core->di_atime.t_nsec, dir, arch);
+    
+    INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec,  dir, arch);
+    INT_XLATE(buf_core->di_mtime.t_nsec,mem_core->di_mtime.t_nsec, dir, arch);
+    
+    INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec,  dir, arch);
+    INT_XLATE(buf_core->di_ctime.t_nsec,mem_core->di_ctime.t_nsec, dir, arch);
+    
+    INT_XLATE(buf_core->di_size,        mem_core->di_size,         dir, arch);
+    INT_XLATE(buf_core->di_nblocks,     mem_core->di_nblocks,      dir, arch);
+    INT_XLATE(buf_core->di_extsize,     mem_core->di_extsize,      dir, arch);
+    
+    INT_XLATE(buf_core->di_nextents,    mem_core->di_nextents,     dir, arch);
+    INT_XLATE(buf_core->di_anextents,   mem_core->di_anextents,    dir, arch);
+    INT_XLATE(buf_core->di_forkoff,     mem_core->di_forkoff,      dir, arch);
+    INT_XLATE(buf_core->di_aformat,     mem_core->di_aformat,      dir, arch);
+    INT_XLATE(buf_core->di_dmevmask,    mem_core->di_dmevmask,     dir, arch);
+    INT_XLATE(buf_core->di_dmstate,     mem_core->di_dmstate,      dir, arch);
+    INT_XLATE(buf_core->di_flags,       mem_core->di_flags,        dir, arch);
+    INT_XLATE(buf_core->di_gen,         mem_core->di_gen,          dir, arch);
+    
+}
+
+/*
+ * Given a mount structure and an inode number, return a pointer
+ * to a newly allocated in-core inode coresponding to the given
+ * inode number.
+ * 
+ * Initialize the inode's attributes and extent pointers if it
+ * already has them (it will not if the inode has no links).
+ */
+int
+xfs_iread(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_ino_t	ino,
+	xfs_inode_t	**ipp,
+	xfs_daddr_t		bno)
+{
+	xfs_buf_t	*bp;
+	xfs_dinode_t	*dip;
+	xfs_inode_t	*ip;
+	int		error;
+
+	ASSERT(xfs_inode_zone != NULL);
+
+	ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
+	ip->i_ino = ino;
+	ip->i_dev = mp->m_dev;
+	ip->i_mount = mp;
+
+	/*
+	 * Get pointer's to the on-disk inode and the buffer containing it.
+	 * If the inode number refers to a block outside the file system
+	 * then xfs_itobp() will return NULL.  In this case we should
+	 * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
+	 * know that this is a new incore inode.
+	 */
+	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno);
+
+	if (error != 0) {
+		kmem_zone_free(xfs_inode_zone, ip);
+		return error;
+	}
+
+	/*
+	 * Initialize inode's trace buffers.
+	 * Do this before xfs_iformat in case it adds entries.
+	 */
+#ifdef XFS_BMAP_TRACE
+	ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_BMBT_TRACE
+	ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_RW_TRACE
+	ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_STRAT_TRACE
+	ip->i_strat_trace = ktrace_alloc(XFS_STRAT_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_ILOCK_TRACE
+	ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_DIR2_TRACE
+	ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP);
+#endif
+
+	/*
+	 * If we got something that isn't an inode it means someone
+	 * (nfs or dmi) has a stale handle.
+	 */
+        if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) {
+		kmem_zone_free(xfs_inode_zone, ip);
+		xfs_trans_brelse(tp, bp);
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * If the on-disk inode is already linked to a directory
+	 * entry, copy all of the inode into the in-core inode.
+	 * xfs_iformat() handles copying in the inode format
+	 * specific information.
+	 * Otherwise, just get the truly permanent information.
+	 */
+	if (!INT_ISZERO(dip->di_core.di_mode, ARCH_CONVERT)) {
+                xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, 
+                     &(ip->i_d), 1, ARCH_CONVERT);
+		error = xfs_iformat(ip, dip);
+		if (error)  {
+			kmem_zone_free(xfs_inode_zone, ip);
+			xfs_trans_brelse(tp, bp);
+			return error;
+		}
+	} else {
+		ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT);
+		ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT);
+		ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT);
+		/*
+		 * Make sure to pull in the mode here as well in
+		 * case the inode is released without being used.
+		 * This ensures that xfs_inactive() will see that
+		 * the inode is already free and not try to mess
+		 * with the uninitialized part of it.
+		 */
+		ip->i_d.di_mode = 0;
+		/*
+		 * Initialize the per-fork minima and maxima for a new
+		 * inode here.  xfs_iformat will do it for old inodes.
+		 */
+		ip->i_df.if_ext_max =
+			XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+	}	
+
+	/*
+	 * The inode format changed when we moved the link count and
+	 * made it 32 bits long.  If this is an old format inode,
+	 * convert it in memory to look like a new one.  If it gets
+	 * flushed to disk we will convert back before flushing or
+	 * logging it.  We zero out the new projid field and the old link
+	 * count field.  We'll handle clearing the pad field (the remains
+	 * of the old uuid field) when we actually convert the inode to
+	 * the new format. We don't change the version number so that we
+	 * can distinguish this from a real new format inode.
+	 */
+	if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
+		ip->i_d.di_nlink = ip->i_d.di_onlink;
+		ip->i_d.di_onlink = 0;
+		ip->i_d.di_projid = 0;
+	}
+
+	ip->i_delayed_blks = 0;
+
+	/*
+	 * Mark the buffer containing the inode as something to keep
+	 * around for a while.  This helps to keep recently accessed
+	 * meta-data in-core longer.
+	 */
+	 XFS_BUF_SET_REF(bp, XFS_INO_REF);
+
+	/*
+	 * Use xfs_trans_brelse() to release the buffer containing the
+	 * on-disk inode, because it was acquired with xfs_trans_read_buf()
+	 * in xfs_itobp() above.  If tp is NULL, this is just a normal
+	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
+	 * will only release the buffer if it is not dirty within the
+	 * transaction.  It will be OK to release the buffer in this case,
+	 * because inodes on disk are never destroyed and we will be
+	 * locking the new in-core inode before putting it in the hash
+	 * table where other processes can find it.  Thus we don't have
+	 * to worry about the inode being changed just because we released
+	 * the buffer.
+	 */
+	xfs_trans_brelse(tp, bp);
+	*ipp = ip;
+	return 0;
+}
+
+/*
+ * Read in extents from a btree-format inode.
+ * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
+ */
+int
+xfs_iread_extents(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	int		whichfork)
+{
+	int		error;
+	xfs_ifork_t	*ifp;
+	size_t		size;
+
+	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+		return XFS_ERROR(EFSCORRUPTED);
+	size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t);
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	/*
+	 * We know that the size is legal (it's checked in iformat_btree)
+	 */
+	ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
+	ASSERT(ifp->if_u1.if_extents != NULL);
+	ifp->if_lastex = NULLEXTNUM;
+	ifp->if_bytes = ifp->if_real_bytes = (int)size;
+	ifp->if_flags |= XFS_IFEXTENTS;
+	error = xfs_bmap_read_extents(tp, ip, whichfork);
+	if (error) {
+		kmem_free(ifp->if_u1.if_extents, size);
+		ifp->if_u1.if_extents = NULL;
+		ifp->if_bytes = ifp->if_real_bytes = 0;
+		ifp->if_flags &= ~XFS_IFEXTENTS;
+		return error;
+	}
+	xfs_validate_extents((xfs_bmbt_rec_32_t *)ifp->if_u1.if_extents,
+		XFS_IFORK_NEXTENTS(ip, whichfork), XFS_EXTFMT_INODE(ip));
+	return 0;
+}
+
+/*
+ * Reallocate the space for if_broot based on the number of records
+ * being added or deleted as indicated in rec_diff.  Move the records
+ * and pointers in if_broot to fit the new size.  When shrinking this
+ * will eliminate holes between the records and pointers created by
+ * the caller.  When growing this will create holes to be filled in
+ * by the caller.
+ *
+ * The caller must not request to add more records than would fit in
+ * the on-disk inode root.  If the if_broot is currently NULL, then
+ * if we adding records one will be allocated.  The caller must also
+ * not request that the number of records go below zero, although
+ * it can go to zero.
+ *
+ * ip -- the inode whose if_broot area is changing
+ * ext_diff -- the change in the number of records, positive or negative,
+ *	 requested for the if_broot array.
+ */
+void
+xfs_iroot_realloc(
+	xfs_inode_t 		*ip,
+	int 			rec_diff,
+	int			whichfork)
+{
+	int			cur_max;
+	xfs_ifork_t		*ifp;
+	xfs_bmbt_block_t	*new_broot;
+	int			new_max;
+	size_t			new_size;
+	char			*np;
+	char			*op;
+
+	/*
+	 * Handle the degenerate case quietly.
+	 */
+	if (rec_diff == 0) {
+		return;
+	}
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (rec_diff > 0) {
+		/*
+		 * If there wasn't any memory allocated before, just
+		 * allocate it now and get out.
+		 */
+		if (ifp->if_broot_bytes == 0) {
+			new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
+			ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size,
+								     KM_SLEEP);
+			ifp->if_broot_bytes = (int)new_size;
+			return;
+		}
+
+		/*
+		 * If there is already an existing if_broot, then we need
+		 * to realloc() it and shift the pointers to their new
+		 * location.  The records don't change location because
+		 * they are kept butted up against the btree block header.
+		 */
+		cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes);
+		new_max = cur_max + rec_diff;
+		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
+		ifp->if_broot = (xfs_bmbt_block_t *) 
+		  kmem_realloc(ifp->if_broot,
+				new_size,
+				(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
+				KM_SLEEP);
+		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
+						      ifp->if_broot_bytes);
+		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
+						      (int)new_size);
+		ifp->if_broot_bytes = (int)new_size;
+		ASSERT(ifp->if_broot_bytes <=
+			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
+		ovbcopy(op, np, cur_max * (uint)sizeof(xfs_dfsbno_t));
+		return;
+	}
+
+	/*
+	 * rec_diff is less than 0.  In this case, we are shrinking the
+	 * if_broot buffer.  It must already exist.  If we go to zero
+	 * records, just get rid of the root and clear the status bit.
+	 */
+	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
+	cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes);
+	new_max = cur_max + rec_diff;
+	ASSERT(new_max >= 0);
+	if (new_max > 0)
+		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
+	else
+		new_size = 0;
+	if (new_size > 0) {
+		new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP);
+		/*
+		 * First copy over the btree block header.
+		 */
+		bcopy(ifp->if_broot, new_broot, sizeof(xfs_bmbt_block_t));
+	} else {
+		new_broot = NULL;
+		ifp->if_flags &= ~XFS_IFBROOT;
+	}
+
+	/*
+	 * Only copy the records and pointers if there are any.
+	 */
+	if (new_max > 0) {
+		/*
+		 * First copy the records.
+		 */
+		op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1,
+						     ifp->if_broot_bytes);
+		np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
+						     (int)new_size);
+		bcopy(op, np, new_max * (uint)sizeof(xfs_bmbt_rec_t));	
+
+		/*
+		 * Then copy the pointers.
+		 */
+		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
+						     ifp->if_broot_bytes);
+		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1,
+						     (int)new_size);
+		bcopy(op, np, new_max * (uint)sizeof(xfs_dfsbno_t));
+	}
+	kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+	ifp->if_broot = new_broot;
+	ifp->if_broot_bytes = (int)new_size;
+	ASSERT(ifp->if_broot_bytes <=
+		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
+	return;
+}
+
+/*
+ * This is called when the amount of space needed for if_extents
+ * is increased or decreased.  The change in size is indicated by
+ * the number of extents that need to be added or deleted in the
+ * ext_diff parameter.
+ *
+ * If the amount of space needed has decreased below the size of the
+ * inline buffer, then switch to using the inline buffer.  Otherwise,
+ * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
+ * to what is needed.
+ *
+ * ip -- the inode whose if_extents area is changing
+ * ext_diff -- the change in the number of extents, positive or negative,
+ *	 requested for the if_extents array.
+ */
+void
+xfs_iext_realloc(
+	xfs_inode_t	*ip,
+	int		ext_diff,
+	int		whichfork)
+{
+	int		byte_diff;
+	xfs_ifork_t	*ifp;
+	int		new_size;
+	uint		rnew_size;
+
+	if (ext_diff == 0) {
+		return;
+	}
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	byte_diff = ext_diff * (uint)sizeof(xfs_bmbt_rec_t);
+	new_size = (int)ifp->if_bytes + byte_diff;
+	ASSERT(new_size >= 0);
+
+	if (new_size == 0) {
+		if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) {
+			ASSERT(ifp->if_real_bytes != 0);
+			kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+		}
+		ifp->if_u1.if_extents = NULL;
+		rnew_size = 0;
+	} else if (new_size <= sizeof(ifp->if_u2.if_inline_ext)) {
+		/*
+		 * If the valid extents can fit in if_inline_ext,
+		 * copy them from the malloc'd vector and free it.
+		 */
+		if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) {
+			/*
+			 * For now, empty files are format EXTENTS,
+			 * so the if_extents pointer is null.
+			 */
+			if (ifp->if_u1.if_extents) {
+				bcopy(ifp->if_u1.if_extents,
+				      ifp->if_u2.if_inline_ext, new_size);
+				kmem_free(ifp->if_u1.if_extents,
+					  ifp->if_real_bytes);
+			}
+			ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+		}
+		rnew_size = 0;
+	} else {
+		rnew_size = new_size;
+		if ((rnew_size & (rnew_size - 1)) != 0)
+			rnew_size = xfs_iroundup(rnew_size);
+		/*
+		 * Stuck with malloc/realloc.
+		 */
+		if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) {
+			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+				kmem_alloc(rnew_size, KM_SLEEP);
+			bcopy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
+			      sizeof(ifp->if_u2.if_inline_ext));
+		} else if (rnew_size != ifp->if_real_bytes) {
+			ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+			  kmem_realloc(ifp->if_u1.if_extents,
+					rnew_size,
+					ifp->if_real_bytes,
+					KM_SLEEP);
+		}
+	}
+	ifp->if_real_bytes = rnew_size;
+	ifp->if_bytes = new_size;
+}
+
+
+/*
+ * This is called when the amount of space needed for if_data
+ * is increased or decreased.  The change in size is indicated by
+ * the number of bytes that need to be added or deleted in the
+ * byte_diff parameter.
+ *
+ * If the amount of space needed has decreased below the size of the
+ * inline buffer, then switch to using the inline buffer.  Otherwise,
+ * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
+ * to what is needed.
+ *
+ * ip -- the inode whose if_data area is changing
+ * byte_diff -- the change in the number of bytes, positive or negative,
+ *	 requested for the if_data array.
+ */
+void
+xfs_idata_realloc(
+	xfs_inode_t	*ip,
+	int		byte_diff,
+	int		whichfork)
+{
+	xfs_ifork_t	*ifp;
+	int		new_size;
+	int		real_size;
+
+	if (byte_diff == 0) {
+		return;
+	}
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	new_size = (int)ifp->if_bytes + byte_diff;
+	ASSERT(new_size >= 0);
+
+	if (new_size == 0) {
+		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+			kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+		}
+		ifp->if_u1.if_data = NULL;
+		real_size = 0;
+	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
+		/*
+		 * If the valid extents/data can fit in if_inline_ext/data,
+		 * copy them from the malloc'd vector and free it.
+		 */
+		if (ifp->if_u1.if_data == NULL) {
+			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+			ASSERT(ifp->if_real_bytes != 0);
+			bcopy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
+			      new_size);
+			kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+		}
+		real_size = 0;
+	} else {
+		/*
+		 * Stuck with malloc/realloc.
+		 * For inline data, the underlying buffer must be
+		 * a multiple of 4 bytes in size so that it can be
+		 * logged and stay on word boundaries.  We enforce
+		 * that here.
+		 */
+		real_size = roundup(new_size, 4);
+		if (ifp->if_u1.if_data == NULL) {
+			ASSERT(ifp->if_real_bytes == 0);
+			ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+			/*
+			 * Only do the realloc if the underlying size
+			 * is really changing.
+			 */
+			if (ifp->if_real_bytes != real_size) {
+				ifp->if_u1.if_data =
+					kmem_realloc(ifp->if_u1.if_data,
+							real_size,
+							ifp->if_real_bytes,
+							KM_SLEEP);
+			}
+		} else {
+			ASSERT(ifp->if_real_bytes == 0);
+			ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+			bcopy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
+			      ifp->if_bytes);
+		}
+	}
+	ifp->if_real_bytes = real_size;
+	ifp->if_bytes = new_size;
+	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+}
+
+
+/*
+ * Map inode to disk block and offset.
+ *
+ * mp -- the mount point structure for the current file system
+ * tp -- the current transaction
+ * ino -- the inode number of the inode to be located
+ * imap -- this structure is filled in with the information necessary
+ *	 to retrieve the given inode from disk
+ * flags -- flags to pass to xfs_dilocate indicating whether or not
+ *	 lookups in the inode btree were OK or not
+ */
+int
+xfs_imap(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_ino_t	ino,
+	xfs_imap_t	*imap,
+	uint		flags)
+{
+	xfs_fsblock_t	fsbno;
+	int		len;
+	int		off;
+	int		error;
+
+	fsbno = imap->im_blkno ?
+		XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
+	error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
+	if (error != 0) {
+		return error;
+	}
+	imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
+	imap->im_len = XFS_FSB_TO_BB(mp, len);
+	imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
+	imap->im_ioffset = (ushort)off;
+	imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
+	return 0;
+}
+
+void
+xfs_idestroy_fork(
+	xfs_inode_t	*ip,
+	int		whichfork)
+{
+	xfs_ifork_t	*ifp;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (ifp->if_broot != NULL) {
+		kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+		ifp->if_broot = NULL;
+	}
+
+	/*
+	 * If the format is local, then we can't have an extents
+	 * array so just look for an inline data array.  If we're
+	 * not local then we may or may not have an extents list,
+	 * so check and free it up if we do.
+	 */
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 
+		    (ifp->if_u1.if_data != NULL)) {
+			ASSERT(ifp->if_real_bytes != 0);
+			kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+			ifp->if_u1.if_data = NULL;
+			ifp->if_real_bytes = 0;
+		}
+	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
+		   (ifp->if_u1.if_extents != NULL) &&
+		   (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) {
+		ASSERT(ifp->if_real_bytes != 0);
+		kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+		ifp->if_u1.if_extents = NULL;
+		ifp->if_real_bytes = 0;
+	}
+	ASSERT(ifp->if_u1.if_extents == NULL ||
+	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
+	ASSERT(ifp->if_real_bytes == 0);
+	if (whichfork == XFS_ATTR_FORK) {
+		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+		ip->i_afp = NULL;
+	}
+}
+
+/*
+ * xfs_iroundup: round up argument to next power of two
+ */
+uint
+xfs_iroundup(
+	uint	v)
+{
+	int i;
+	uint m;
+
+	if ((v & (v - 1)) == 0)
+		return v;
+	ASSERT((v & 0x80000000) == 0);
+	if ((v & (v + 1)) == 0)
+		return v + 1;
+	for (i = 0, m = 1; i < 31; i++, m <<= 1) {
+		if (v & m)
+			continue;
+		v |= m;
+		if ((v & (v + 1)) == 0)
+			return v + 1;
+	}
+	ASSERT(0);
+	return( 0 );
+}
+
+/*
+ * xfs_iextents_copy()
+ *
+ * This is called to copy the REAL extents (as opposed to the delayed
+ * allocation extents) from the inode into the given buffer.  It
+ * returns the number of bytes copied into the buffer.
+ *
+ * If there are no delayed allocation extents, then we can just
+ * bcopy() the extents into the buffer.  Otherwise, we need to
+ * examine each extent in turn and skip those which are delayed.
+ */
+int
+xfs_iextents_copy(
+	xfs_inode_t		*ip,
+	xfs_bmbt_rec_32_t	*buffer,
+	int			whichfork)
+{
+	int			copied;
+	xfs_bmbt_rec_32_t	*dest_ep;
+	xfs_bmbt_rec_t		*ep;
+#ifdef DEBUG
+	xfs_exntfmt_t		fmt = XFS_EXTFMT_INODE(ip);
+#endif
+#ifdef XFS_BMAP_TRACE
+	static char		fname[] = "xfs_iextents_copy";
+#endif
+	int			i;
+	xfs_ifork_t		*ifp;
+	int			nrecs;
+	xfs_fsblock_t		start_block;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+	ASSERT(ifp->if_bytes > 0);
+
+	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork);
+	ASSERT(nrecs > 0);
+	if (nrecs == XFS_IFORK_NEXTENTS(ip, whichfork)) {
+		/*
+		 * There are no delayed allocation extents,
+		 * so just copy everything.
+		 */
+		ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+		ASSERT(ifp->if_bytes ==
+		       (XFS_IFORK_NEXTENTS(ip, whichfork) *
+		        (uint)sizeof(xfs_bmbt_rec_t)));
+		bcopy(ifp->if_u1.if_extents, buffer, ifp->if_bytes);
+		xfs_validate_extents(buffer, nrecs, fmt);
+		return ifp->if_bytes;
+	}
+
+	ASSERT(whichfork == XFS_DATA_FORK);
+	/*
+	 * There are some delayed allocation extents in the
+	 * inode, so copy the extents one at a time and skip
+	 * the delayed ones.  There must be at least one
+	 * non-delayed extent.
+	 */
+	ASSERT(nrecs > ip->i_d.di_nextents);
+	ep = ifp->if_u1.if_extents;
+	dest_ep = buffer;
+	copied = 0;
+	for (i = 0; i < nrecs; i++) {
+		start_block = xfs_bmbt_get_startblock(ep);
+		if (ISNULLSTARTBLOCK(start_block)) {
+			/*
+			 * It's a delayed allocation extent, so skip it.
+			 */
+			ep++;
+			continue;
+		}
+
+		*dest_ep = *(xfs_bmbt_rec_32_t *)ep;
+		dest_ep++;
+		ep++;
+		copied++;
+	}
+	ASSERT(copied != 0);
+	ASSERT(copied == ip->i_d.di_nextents);
+	ASSERT((copied * (uint)sizeof(xfs_bmbt_rec_t)) <= XFS_IFORK_DSIZE(ip));
+	xfs_validate_extents(buffer, copied, fmt);
+
+	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
+}		  
+
+/*
+ * Each of the following cases stores data into the same region
+ * of the on-disk inode, so only one of them can be valid at
+ * any given time. While it is possible to have conflicting formats
+ * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
+ * in EXTENTS format, this can only happen when the fork has
+ * changed formats after being modified but before being flushed.
+ * In these cases, the format always takes precedence, because the
+ * format indicates the current state of the fork.
+ */
+STATIC int
+xfs_iflush_fork(
+	xfs_inode_t		*ip,
+	xfs_dinode_t		*dip,
+	xfs_inode_log_item_t	*iip,
+	int			whichfork,
+	xfs_buf_t		*bp)
+{
+	char			*cp;
+	xfs_ifork_t		*ifp;
+	xfs_mount_t		*mp;
+#ifdef XFS_TRANS_DEBUG
+	int			first;
+#endif
+	static const short	brootflag[2] =
+		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
+	static const short	dataflag[2] =
+		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
+	static const short	extflag[2] =
+		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
+
+	if (iip == NULL)
+		return 0;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	/*
+	 * This can happen if we gave up in iformat in an error path,
+	 * for the attribute fork.
+	 */
+	if (ifp == NULL) {
+		ASSERT(whichfork == XFS_ATTR_FORK);
+		return 0;
+	}
+	cp = XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+	mp = ip->i_mount;
+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	case XFS_DINODE_FMT_LOCAL:
+		if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
+		    (ifp->if_bytes > 0)) {
+			ASSERT(ifp->if_u1.if_data != NULL);
+			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+			bcopy(ifp->if_u1.if_data, cp, ifp->if_bytes);
+		}
+		if (whichfork == XFS_DATA_FORK) {
+			if (XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip)) {
+				return XFS_ERROR(EFSCORRUPTED);
+			}
+		}
+		break;
+
+	case XFS_DINODE_FMT_EXTENTS:
+		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
+		       !(iip->ili_format.ilf_fields & extflag[whichfork]));
+		ASSERT((ifp->if_u1.if_extents != NULL) || (ifp->if_bytes == 0));
+		ASSERT((ifp->if_u1.if_extents == NULL) || (ifp->if_bytes > 0));
+		if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
+		    (ifp->if_bytes > 0)) {
+			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
+			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_32_t *)cp,
+				whichfork);
+		}
+		break;
+
+	case XFS_DINODE_FMT_BTREE:
+		if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
+		    (ifp->if_broot_bytes > 0)) {
+			ASSERT(ifp->if_broot != NULL);
+			ASSERT(ifp->if_broot_bytes <=
+			       (XFS_IFORK_SIZE(ip, whichfork) +
+				XFS_BROOT_SIZE_ADJ));
+			xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes,
+				(xfs_bmdr_block_t *)cp,
+				XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT));
+		}
+		break;
+
+	case XFS_DINODE_FMT_DEV:
+		if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
+			ASSERT(whichfork == XFS_DATA_FORK);
+			INT_SET(dip->di_u.di_dev, ARCH_CONVERT, ip->i_df.if_u2.if_rdev);
+		}
+		break;
+		
+	case XFS_DINODE_FMT_UUID:
+		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
+			ASSERT(whichfork == XFS_DATA_FORK);
+			bcopy(&ip->i_df.if_u2.if_uuid, &dip->di_u.di_muuid,
+				sizeof(uuid_t));
+		}
+		break;
+
+	default:
+		ASSERT(0);
+		break;
+	}
+
+	return 0;
+}
diff --git a/libxfs/xfs_mount.c b/libxfs/xfs_mount.c
new file mode 100644
index 000000000..c4de3b9d2
--- /dev/null
+++ b/libxfs/xfs_mount.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Mount initialization code establishing various mount
+ * fields from the superblock associated with the given
+ * mount structure.
+ */
+void
+xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
+{
+	int	i;
+
+	mp->m_agfrotor = mp->m_agirotor = 0;
+	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
+	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
+	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
+	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+	mp->m_litino = sbp->sb_inodesize -
+		((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
+	mp->m_blockmask = sbp->sb_blocksize - 1;
+	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
+	mp->m_blockwmask = mp->m_blockwsize - 1;
+
+	/*
+	 * Setup for attributes, in case they get created.
+	 * This value is for inodes getting attributes for the first time,
+	 * the per-inode value is for old attribute values.
+	 */
+	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
+	switch (sbp->sb_inodesize) {
+	case 256:
+		mp->m_attroffset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(2);
+		break;
+	case 512:
+	case 1024:
+	case 2048:
+		mp->m_attroffset = XFS_BMDR_SPACE_CALC(12);
+		break;
+	default:
+		ASSERT(0);
+	}
+	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
+
+	for (i = 0; i < 2; i++) {
+		mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+			xfs_alloc, i == 0);
+		mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+			xfs_alloc, i == 0);
+	}
+	for (i = 0; i < 2; i++) {
+		mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+			xfs_bmbt, i == 0);
+		mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+			xfs_bmbt, i == 0);
+	}
+	for (i = 0; i < 2; i++) {
+		mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+			xfs_inobt, i == 0);
+		mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+			xfs_inobt, i == 0);
+	}
+
+	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
+	mp->m_ialloc_inos = (int)MAX(XFS_INODES_PER_CHUNK, sbp->sb_inopblock);
+	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
+}
+
+static struct {
+    short offset;
+    short type;     /* 0 = integer
+                     * 1 = binary / string (no translation)
+                     */
+} xfs_sb_info[] = {
+    { offsetof(xfs_sb_t, sb_magicnum),   0 },
+    { offsetof(xfs_sb_t, sb_blocksize),  0 },
+    { offsetof(xfs_sb_t, sb_dblocks),    0 },
+    { offsetof(xfs_sb_t, sb_rblocks),    0 },
+    { offsetof(xfs_sb_t, sb_rextents),   0 },
+    { offsetof(xfs_sb_t, sb_uuid),       1 },
+    { offsetof(xfs_sb_t, sb_logstart),   0 },
+    { offsetof(xfs_sb_t, sb_rootino),    0 },
+    { offsetof(xfs_sb_t, sb_rbmino),     0 },
+    { offsetof(xfs_sb_t, sb_rsumino),    0 },
+    { offsetof(xfs_sb_t, sb_rextsize),   0 },
+    { offsetof(xfs_sb_t, sb_agblocks),   0 },
+    { offsetof(xfs_sb_t, sb_agcount),    0 },
+    { offsetof(xfs_sb_t, sb_rbmblocks),  0 },
+    { offsetof(xfs_sb_t, sb_logblocks),  0 },
+    { offsetof(xfs_sb_t, sb_versionnum), 0 },
+    { offsetof(xfs_sb_t, sb_sectsize),   0 },
+    { offsetof(xfs_sb_t, sb_inodesize),  0 },
+    { offsetof(xfs_sb_t, sb_inopblock),  0 },
+    { offsetof(xfs_sb_t, sb_fname[0]),   1 },
+    { offsetof(xfs_sb_t, sb_blocklog),   0 },
+    { offsetof(xfs_sb_t, sb_sectlog),    0 },
+    { offsetof(xfs_sb_t, sb_inodelog),   0 },
+    { offsetof(xfs_sb_t, sb_inopblog),   0 },
+    { offsetof(xfs_sb_t, sb_agblklog),   0 },
+    { offsetof(xfs_sb_t, sb_rextslog),   0 },
+    { offsetof(xfs_sb_t, sb_inprogress), 0 },
+    { offsetof(xfs_sb_t, sb_imax_pct),   0 },
+    { offsetof(xfs_sb_t, sb_icount),     0 },
+    { offsetof(xfs_sb_t, sb_ifree),      0 },
+    { offsetof(xfs_sb_t, sb_fdblocks),   0 },
+    { offsetof(xfs_sb_t, sb_frextents),  0 },
+    { offsetof(xfs_sb_t, sb_uquotino),   0 },
+    { offsetof(xfs_sb_t, sb_pquotino),   0 },
+    { offsetof(xfs_sb_t, sb_qflags),     0 },
+    { offsetof(xfs_sb_t, sb_flags),      0 },
+    { offsetof(xfs_sb_t, sb_shared_vn),  0 },
+    { offsetof(xfs_sb_t, sb_inoalignmt), 0 },
+    { offsetof(xfs_sb_t, sb_unit),       0 },
+    { offsetof(xfs_sb_t, sb_width),      0 },
+    { offsetof(xfs_sb_t, sb_dirblklog),  0 },
+    { offsetof(xfs_sb_t, sb_dummy),      1 },
+    { sizeof(xfs_sb_t),                  0 }
+};
+
+/*
+ * xfs_xlatesb
+ *     data       - on disk version of sb
+ *     sb         - a superblock
+ *     dir        - conversion direction: <0 - convert sb to buf
+ *                                        >0 - convert buf to sb
+ *     arch       - architecture to read/write from/to buf
+ *     fields     - which fields to copy (bitmask)
+ */
+void
+xfs_xlatesb(void *data, xfs_sb_t *sb, int dir, xfs_arch_t arch, 
+            __int64_t fields)
+{
+    xfs_caddr_t     buf_ptr;
+    xfs_caddr_t     mem_ptr;
+           
+    ASSERT(dir);
+    ASSERT(fields);
+
+    if (!fields)
+        return;
+    
+    buf_ptr=(xfs_caddr_t)data;
+    mem_ptr=(xfs_caddr_t)sb;
+    
+    while (fields) {
+	xfs_sb_field_t	f;
+	int		first;
+	int		size;
+
+	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
+	first = xfs_sb_info[f].offset;
+	size = xfs_sb_info[f + 1].offset - first;
+        
+        ASSERT(xfs_sb_info[f].type==0 || xfs_sb_info[f].type==1);
+        
+        if (arch == ARCH_NOCONVERT || size==1 || xfs_sb_info[f].type==1) {
+            if (dir>0) {
+   	        bcopy(buf_ptr + first, mem_ptr + first, size);
+            } else {
+   	        bcopy(mem_ptr + first, buf_ptr + first, size);
+            }
+        } else {
+            switch (size) {
+                case 2:  
+                    INT_XLATE(*(__uint16_t*)(buf_ptr+first),
+                             *(__uint16_t*)(mem_ptr+first), dir, arch);
+                    break;
+                case 4:  
+                    INT_XLATE(*(__uint32_t*)(buf_ptr+first),
+                             *(__uint32_t*)(mem_ptr+first), dir, arch);
+                    break;
+                case 8:  
+                    INT_XLATE(*(__uint64_t*)(buf_ptr+first),
+                             *(__uint64_t*)(mem_ptr+first), dir, arch);
+                    break;
+                default: 
+                    ASSERT(0);
+            }
+        }
+	fields &= ~(1LL << f);
+    }
+    
+}
diff --git a/libxfs/xfs_rtalloc.c b/libxfs/xfs_rtalloc.c
new file mode 100644
index 000000000..8f0a447f9
--- /dev/null
+++ b/libxfs/xfs_rtalloc.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Free realtime space allocation for XFS.
+ */
+#include <xfs.h>
+
+
+/*
+ * Get a buffer for the bitmap or summary file block specified.
+ * The buffer is returned read and locked.
+ */
+STATIC int				/* error */
+xfs_rtbuf_get(
+	xfs_mount_t	*mp,		/* file system mount structure */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_rtblock_t	block,		/* block number in bitmap or summary */
+	int		issum,		/* is summary not bitmap */
+	xfs_buf_t		**bpp)		/* output: buffer for the block */
+{
+	xfs_buf_t		*bp;		/* block buffer, result */
+	xfs_daddr_t		d;		/* disk addr of block */
+	int		error;		/* error value */
+	xfs_fsblock_t	fsb;		/* fs block number for block */
+	xfs_inode_t	*ip;		/* bitmap or summary inode */
+
+	ip = issum ? mp->m_rsumip : mp->m_rbmip;
+	/*
+	 * Map from the file offset (block) and inode number to the
+	 * file system block.
+	 */
+	error = xfs_bmapi_single(tp, ip, XFS_DATA_FORK, &fsb, block);
+	if (error) {
+		return error;
+	}
+	ASSERT(fsb != NULLFSBLOCK);
+	/*
+	 * Convert to disk address for buffer cache.
+	 */
+	d = XFS_FSB_TO_DADDR(mp, fsb);
+	/*
+	 * Read the buffer.
+	 */
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+				   mp->m_bsize, 0, &bp);
+	if (error) {
+		return error;
+	}
+	ASSERT(bp && !XFS_BUF_GETERROR(bp));
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Searching backward from start to limit, find the first block whose
+ * allocated/free state is different from start's.
+ */
+STATIC int				/* error */
+xfs_rtfind_back(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_rtblock_t	start,		/* starting block to look at */
+	xfs_rtblock_t	limit,		/* last block to look at */
+	xfs_rtblock_t	*rtblock)	/* out: start block found */
+{
+	xfs_rtword_t	*b;		/* current word in buffer */
+	int		bit;		/* bit number in the word */
+	xfs_rtblock_t	block;		/* bitmap block number */
+	xfs_buf_t		*bp;		/* buf for the block */
+	xfs_rtword_t	*bufp;		/* starting word in buffer */
+	int		error;		/* error value */
+	xfs_rtblock_t	firstbit;	/* first useful bit in the word */
+	xfs_rtblock_t	i;		/* current bit number rel. to start */
+	xfs_rtblock_t	len;		/* length of inspected area */
+	xfs_rtword_t	mask;		/* mask of relevant bits for value */
+	xfs_rtword_t	want;		/* mask for "good" values */
+	xfs_rtword_t	wdiff;		/* difference from wanted value */
+	int		word;		/* word number in the buffer */
+
+	/*
+	 * Compute and read in starting bitmap block for starting block.
+	 */
+	block = XFS_BITTOBLOCK(mp, start);
+	error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+	if (error) {
+		return error;
+	}
+	bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+	/*
+	 * Get the first word's index & point to it.
+	 */
+	word = XFS_BITTOWORD(mp, start);
+	b = &bufp[word];
+	bit = (int)(start & (XFS_NBWORD - 1));
+	len = start - limit + 1;
+	/*
+	 * Compute match value, based on the bit at start: if 1 (free)
+	 * then all-ones, else all-zeroes.
+	 */
+	want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+	/*
+	 * If the starting position is not word-aligned, deal with the
+	 * partial word.
+	 */
+	if (bit < XFS_NBWORD - 1) {
+		/*
+		 * Calculate first (leftmost) bit number to look at,
+		 * and mask for all the relevant bits in this word.
+		 */
+		firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0);
+		mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) <<
+			firstbit;
+		/*
+		 * Calculate the difference between the value there
+		 * and what we're looking for.
+		 */
+		if (wdiff = (*b ^ want) & mask) {
+			/*
+			 * Different.  Mark where we are and return.
+			 */
+			xfs_trans_brelse(tp, bp);
+			i = bit - XFS_RTHIBIT(wdiff);
+			*rtblock = start - i + 1;
+			return 0;
+		}
+		i = bit - firstbit + 1;
+		/*
+		 * Go on to previous block if that's where the previous word is
+		 * and we need the previous word.
+		 */
+		if (--word == -1 && i < len) {
+			/*
+			 * If done with this block, get the previous one.
+			 */
+			xfs_trans_brelse(tp, bp);
+			error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
+			if (error) {
+				return error;
+			}
+			bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			word = XFS_BLOCKWMASK(mp);
+			b = &bufp[word];
+		} else {
+			/*
+			 * Go on to the previous word in the buffer.
+			 */
+			b--;
+		}
+	} else {
+		/*
+		 * Starting on a word boundary, no partial word.
+		 */
+		i = 0;
+	}
+	/*
+	 * Loop over whole words in buffers.  When we use up one buffer
+	 * we move on to the previous one.
+	 */
+	while (len - i >= XFS_NBWORD) {
+		/*
+		 * Compute difference between actual and desired value.
+		 */
+		if (wdiff = *b ^ want) {
+			/*
+			 * Different, mark where we are and return.
+			 */
+			xfs_trans_brelse(tp, bp);
+			i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
+			*rtblock = start - i + 1;
+			return 0;
+		}
+		i += XFS_NBWORD;
+		/*
+		 * Go on to previous block if that's where the previous word is
+		 * and we need the previous word.
+		 */
+		if (--word == -1 && i < len) {
+			/*
+			 * If done with this block, get the previous one.
+			 */
+			xfs_trans_brelse(tp, bp);
+			error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
+			if (error) {
+				return error;
+			}
+			bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			word = XFS_BLOCKWMASK(mp);
+			b = &bufp[word];
+		} else {
+			/*
+			 * Go on to the previous word in the buffer.
+			 */
+			b--;
+		}
+	}
+	/*
+	 * If not ending on a word boundary, deal with the last
+	 * (partial) word.
+	 */
+	if (len - i) {
+		/*
+		 * Calculate first (leftmost) bit number to look at,
+		 * and mask for all the relevant bits in this word.
+		 */
+		firstbit = XFS_NBWORD - (len - i);
+		mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit;
+		/*
+		 * Compute difference between actual and desired value.
+		 */
+		if (wdiff = (*b ^ want) & mask) {
+			/*
+			 * Different, mark where we are and return.
+			 */
+			xfs_trans_brelse(tp, bp);
+			i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
+			*rtblock = start - i + 1;
+			return 0;
+		} else
+			i = len;
+	}
+	/*
+	 * No match, return that we scanned the whole area.
+	 */
+	xfs_trans_brelse(tp, bp);
+	*rtblock = start - i + 1;
+	return 0;
+}
+
+/*
+ * Searching forward from start to limit, find the first block whose
+ * allocated/free state is different from start's.
+ */
+STATIC int				/* error */
+xfs_rtfind_forw(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_rtblock_t	start,		/* starting block to look at */
+	xfs_rtblock_t	limit,		/* last block to look at */
+	xfs_rtblock_t	*rtblock)	/* out: start block found */
+{
+	xfs_rtword_t	*b;		/* current word in buffer */
+	int		bit;		/* bit number in the word */
+	xfs_rtblock_t	block;		/* bitmap block number */
+	xfs_buf_t		*bp;		/* buf for the block */
+	xfs_rtword_t	*bufp;		/* starting word in buffer */
+	int		error;		/* error value */
+	xfs_rtblock_t	i;		/* current bit number rel. to start */
+	xfs_rtblock_t	lastbit;	/* last useful bit in the word */
+	xfs_rtblock_t	len;		/* length of inspected area */
+	xfs_rtword_t	mask;		/* mask of relevant bits for value */
+	xfs_rtword_t	want;		/* mask for "good" values */
+	xfs_rtword_t	wdiff;		/* difference from wanted value */
+	int		word;		/* word number in the buffer */
+
+	/*
+	 * Compute and read in starting bitmap block for starting block.
+	 */
+	block = XFS_BITTOBLOCK(mp, start);
+	error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+	if (error) {
+		return error;
+	}
+	bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+	/*
+	 * Get the first word's index & point to it.
+	 */
+	word = XFS_BITTOWORD(mp, start);
+	b = &bufp[word];
+	bit = (int)(start & (XFS_NBWORD - 1));
+	len = limit - start + 1;
+	/*
+	 * Compute match value, based on the bit at start: if 1 (free)
+	 * then all-ones, else all-zeroes.
+	 */
+	want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+	/*
+	 * If the starting position is not word-aligned, deal with the
+	 * partial word.
+	 */
+	if (bit) {
+		/*
+		 * Calculate last (rightmost) bit number to look at,
+		 * and mask for all the relevant bits in this word.
+		 */
+		lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+		/*
+		 * Calculate the difference between the value there
+		 * and what we're looking for.
+		 */
+		if (wdiff = (*b ^ want) & mask) {
+			/*
+			 * Different.  Mark where we are and return.
+			 */
+			xfs_trans_brelse(tp, bp);
+			i = XFS_RTLOBIT(wdiff) - bit;
+			*rtblock = start + i - 1;
+			return 0;
+		}
+		i = lastbit - bit;
+		/*
+		 * Go on to next block if that's where the next word is
+		 * and we need the next word.
+		 */
+		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+			/*
+			 * If done with this block, get the previous one.
+			 */
+			xfs_trans_brelse(tp, bp);
+			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+			if (error) {
+				return error;
+			}
+			b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			word = 0;
+		} else {
+			/*
+			 * Go on to the previous word in the buffer.
+			 */
+			b++;
+		}
+	} else {
+		/*
+		 * Starting on a word boundary, no partial word.
+		 */
+		i = 0;
+	}
+	/*
+	 * Loop over whole words in buffers.  When we use up one buffer
+	 * we move on to the next one.
+	 */
+	while (len - i >= XFS_NBWORD) {
+		/*
+		 * Compute difference between actual and desired value.
+		 */
+		if (wdiff = *b ^ want) {
+			/*
+			 * Different, mark where we are and return.
+			 */
+			xfs_trans_brelse(tp, bp);
+			i += XFS_RTLOBIT(wdiff);
+			*rtblock = start + i - 1;
+			return 0;
+		}
+		i += XFS_NBWORD;
+		/*
+		 * Go on to next block if that's where the next word is
+		 * and we need the next word.
+		 */
+		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+			/*
+			 * If done with this block, get the next one.
+			 */
+			xfs_trans_brelse(tp, bp);
+			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+			if (error) {
+				return error;
+			}
+			b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			word = 0;
+		} else {
+			/*
+			 * Go on to the next word in the buffer.
+			 */
+			b++;
+		}
+	}
+	/*
+	 * If not ending on a word boundary, deal with the last
+	 * (partial) word.
+	 */
+	if (lastbit = len - i) {
+		/*
+		 * Calculate mask for all the relevant bits in this word.
+		 */
+		mask = ((xfs_rtword_t)1 << lastbit) - 1;
+		/*
+		 * Compute difference between actual and desired value.
+		 */
+		if (wdiff = (*b ^ want) & mask) {
+			/*
+			 * Different, mark where we are and return.
+			 */
+			xfs_trans_brelse(tp, bp);
+			i += XFS_RTLOBIT(wdiff);
+			*rtblock = start + i - 1;
+			return 0;
+		} else
+			i = len;
+	}
+	/*
+	 * No match, return that we scanned the whole area.
+	 */
+	xfs_trans_brelse(tp, bp);
+	*rtblock = start + i - 1;
+	return 0;
+}
+
+/*
+ * Mark an extent specified by start and len freed.
+ * Updates all the summary information as well as the bitmap.
+ */
+STATIC int				/* error */
+xfs_rtfree_range(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_rtblock_t	start,		/* starting block to free */
+	xfs_extlen_t	len,		/* length to free */
+	xfs_buf_t		**rbpp,		/* in/out: summary block buffer */
+	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
+{
+	xfs_rtblock_t	end;		/* end of the freed extent */
+	int		error;		/* error value */
+	xfs_rtblock_t	postblock;	/* first block freed > end */
+	xfs_rtblock_t	preblock;	/* first block freed < start */
+
+	end = start + len - 1;
+	/*
+	 * Modify the bitmap to mark this extent freed.
+	 */
+	error = xfs_rtmodify_range(mp, tp, start, len, 1);
+	if (error) {
+		return error;
+	}
+	/*
+	 * Assume we're freeing out of the middle of an allocated extent.
+	 * We need to find the beginning and end of the extent so we can
+	 * properly update the summary.
+	 */
+	error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+	if (error) {
+		return error;
+	}
+	/*
+	 * Find the next allocated block (end of allocated extent).
+	 */
+	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
+		&postblock);
+	/*
+	 * If there are blocks not being freed at the front of the
+	 * old extent, add summary data for them to be allocated.
+	 */
+	if (preblock < start) {
+		error = xfs_rtmodify_summary(mp, tp,
+			XFS_RTBLOCKLOG(start - preblock),
+			XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+		if (error) {
+			return error;
+		}
+	}
+	/*
+	 * If there are blocks not being freed at the end of the
+	 * old extent, add summary data for them to be allocated.
+	 */
+	if (postblock > end) {
+		error = xfs_rtmodify_summary(mp, tp,
+			XFS_RTBLOCKLOG(postblock - end),
+			XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
+		if (error) {
+			return error;
+		}
+	}
+	/*
+	 * Increment the summary information corresponding to the entire
+	 * (new) free extent.
+	 */
+	error = xfs_rtmodify_summary(mp, tp,
+		XFS_RTBLOCKLOG(postblock + 1 - preblock),
+		XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
+	return error;
+}
+
+/*
+ * Set the given range of bitmap bits to the given value.
+ * Do whatever I/O and logging is required.
+ */
+STATIC int				/* error */
+xfs_rtmodify_range(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_rtblock_t	start,		/* starting block to modify */
+	xfs_extlen_t	len,		/* length of extent to modify */
+	int		val)		/* 1 for free, 0 for allocated */
+{
+	xfs_rtword_t	*b;		/* current word in buffer */
+	int		bit;		/* bit number in the word */
+	xfs_rtblock_t	block;		/* bitmap block number */
+	xfs_buf_t		*bp;		/* buf for the block */
+	xfs_rtword_t	*bufp;		/* starting word in buffer */
+	int		error;		/* error value */
+	xfs_rtword_t	*first;		/* first used word in the buffer */
+	int		i;		/* current bit number rel. to start */
+	int		lastbit;	/* last useful bit in word */
+	xfs_rtword_t	mask;		/* mask o frelevant bits for value */
+	int		word;		/* word number in the buffer */
+
+	/*
+	 * Compute starting bitmap block number.
+	 */
+	block = XFS_BITTOBLOCK(mp, start);
+	/*
+	 * Read the bitmap block, and point to its data.
+	 */
+	error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+	if (error) {
+		return error;
+	}
+	bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+	/*
+	 * Compute the starting word's address, and starting bit.
+	 */
+	word = XFS_BITTOWORD(mp, start);
+	first = b = &bufp[word];
+	bit = (int)(start & (XFS_NBWORD - 1));
+	/*
+	 * 0 (allocated) => all zeroes; 1 (free) => all ones.
+	 */
+	val = -val;
+	/*
+	 * If not starting on a word boundary, deal with the first
+	 * (partial) word.
+	 */
+	if (bit) {
+		/*
+		 * Compute first bit not changed and mask of relevant bits.
+		 */
+		lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+		/*
+		 * Set/clear the active bits.
+		 */
+		if (val)
+			*b |= mask;
+		else
+			*b &= ~mask;
+		i = lastbit - bit;
+		/*
+		 * Go on to the next block if that's where the next word is
+		 * and we need the next word.
+		 */
+		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+			/*
+			 * Log the changed part of this block.
+			 * Get the next one.
+			 */
+			xfs_trans_log_buf(tp, bp,
+				(uint)((char *)first - (char *)bufp),
+				(uint)((char *)b - (char *)bufp));
+			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+			if (error) {
+				return error;
+			}
+			first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			word = 0;
+		} else {
+			/*
+			 * Go on to the next word in the buffer
+			 */
+			b++;
+		}
+	} else {
+		/*
+		 * Starting on a word boundary, no partial word.
+		 */
+		i = 0;
+	}
+	/*
+	 * Loop over whole words in buffers.  When we use up one buffer
+	 * we move on to the next one.
+	 */
+	while (len - i >= XFS_NBWORD) {
+		/*
+		 * Set the word value correctly.
+		 */
+		*b = val;
+		i += XFS_NBWORD;
+		/*
+		 * Go on to the next block if that's where the next word is
+		 * and we need the next word.
+		 */
+		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+			/*
+			 * Log the changed part of this block.
+			 * Get the next one.
+			 */
+			xfs_trans_log_buf(tp, bp,
+				(uint)((char *)first - (char *)bufp),
+				(uint)((char *)b - (char *)bufp));
+			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+			if (error) {
+				return error;
+			}
+			first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+			word = 0;
+		} else {
+			/*
+			 * Go on to the next word in the buffer
+			 */
+			b++;
+		}
+	}
+	/*
+	 * If not ending on a word boundary, deal with the last
+	 * (partial) word.
+	 */
+	if (lastbit = len - i) {
+		/*
+		 * Compute a mask of relevant bits.
+		 */
+		bit = 0;
+		mask = ((xfs_rtword_t)1 << lastbit) - 1;
+		/*
+		 * Set/clear the active bits.
+		 */
+		if (val)
+			*b |= mask;
+		else
+			*b &= ~mask;
+		b++;
+	}
+	/*
+	 * Log any remaining changed bytes.
+	 */
+	if (b > first)
+		xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
+			(uint)((char *)b - (char *)bufp - 1));
+	return 0;
+}
+
+/*
+ * Read and modify the summary information for a given extent size,
+ * bitmap block combination.
+ * Keeps track of a current summary block, so we don't keep reading
+ * it from the buffer cache.
+ */
+STATIC int				/* error */
+xfs_rtmodify_summary(
+	xfs_mount_t	*mp,		/* file system mount point */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	int		log,		/* log2 of extent size */
+	xfs_rtblock_t	bbno,		/* bitmap block number */
+	int		delta,		/* change to make to summary info */
+	xfs_buf_t		**rbpp,		/* in/out: summary block buffer */
+	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
+{
+	xfs_buf_t		*bp;		/* buffer for the summary block */
+	int		error;		/* error value */
+	xfs_fsblock_t	sb;		/* summary fsblock */
+	int		so;		/* index into the summary file */
+	xfs_suminfo_t	*sp;		/* pointer to returned data */
+
+	/*
+	 * Compute entry number in the summary file.
+	 */
+	so = XFS_SUMOFFS(mp, log, bbno);
+	/*
+	 * Compute the block number in the summary file.
+	 */
+	sb = XFS_SUMOFFSTOBLOCK(mp, so);
+	/*
+	 * If we have an old buffer, and the block number matches, use that.
+	 */
+	if (rbpp && *rbpp && *rsb == sb)
+		bp = *rbpp;
+	/*
+	 * Otherwise we have to get the buffer.
+	 */
+	else {
+		/*
+		 * If there was an old one, get rid of it first.
+		 */
+		if (rbpp && *rbpp)
+			xfs_trans_brelse(tp, *rbpp);
+		error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
+		if (error) {
+			return error;
+		}
+		/*
+		 * Remember this buffer and block for the next call.
+		 */
+		if (rbpp) {
+			*rbpp = bp;
+			*rsb = sb;
+		}
+	}
+	/*
+	 * Point to the summary information, modify and log it.
+	 */
+	sp = XFS_SUMPTR(mp, bp, so);
+	*sp += delta;
+	xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)),
+		(uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1));
+	return 0;
+}
+
+/*
+ * Free an extent in the realtime subvolume.  Length is expressed in 
+ * realtime extents, as is the block number.
+ */
+int					/* error */
+xfs_rtfree_extent(
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_rtblock_t	bno,		/* starting block number to free */
+	xfs_extlen_t	len)		/* length of extent freed */
+{
+	int		error;		/* error value */
+	xfs_inode_t	*ip;		/* bitmap file inode */
+	xfs_mount_t	*mp;		/* file system mount structure */
+	xfs_fsblock_t	sb;		/* summary file block number */
+	xfs_buf_t		*sumbp;		/* summary file block buffer */
+
+	mp = tp->t_mountp;
+	/*
+	 * Synchronize by locking the bitmap inode.
+	 */
+	error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, XFS_ILOCK_EXCL, &ip);
+	if (error) {
+		return error;
+	}
+#if defined(__KERNEL__) && defined(DEBUG)
+	/*
+	 * Check to see that this whole range is currently allocated.
+	 */
+	{
+		int	stat;		/* result from checking range */
+
+		error = xfs_rtcheck_alloc_range(mp, tp, bno, len, &stat);
+		if (error) {
+			return error;
+		}
+		ASSERT(stat);
+	}
+#endif
+	sumbp = NULL;
+	/*
+	 * Free the range of realtime blocks.
+	 */
+	error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
+	if (error) {
+		return error;
+	}
+	/*
+	 * Mark more blocks free in the superblock.
+	 */
+	xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
+	/*
+	 * If we've now freed all the blocks, reset the file sequence
+	 * number to 0.
+	 */
+	if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
+	    mp->m_sb.sb_rextents) {
+		if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
+			ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
+		*(__uint64_t *)&ip->i_d.di_atime = 0;
+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	}
+	return 0;
+}
+
+/*
+ * Initialize realtime fields in the mount structure.
+ */
+int				/* error */
+xfs_rtmount_init(
+	xfs_mount_t	*mp)	/* file system mount structure */
+{
+	xfs_buf_t	*bp;	/* buffer for last block of subvolume */
+	xfs_daddr_t	d;	/* address of last block of subvolume */
+	int		error;	/* error return value */
+	xfs_sb_t	*sbp;	/* filesystem superblock copy in mount */
+
+	sbp = &mp->m_sb;
+	if (sbp->sb_rblocks == 0)
+		return 0;
+	if (!mp->m_rtdev) {
+		printk(KERN_WARNING
+		"XFS: This FS has an RT subvol - specify -o rtdev on mount\n");
+		return XFS_ERROR(ENODEV);
+	}
+	mp->m_rsumlevels = sbp->sb_rextslog + 1;
+	mp->m_rsumsize =
+		(uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
+		sbp->sb_rbmblocks;
+	mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize);
+	mp->m_rbmip = mp->m_rsumip = NULL;
+	/*
+	 * Check that the realtime section is an ok size.
+	 */
+	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) {
+		printk(KERN_WARNING "XFS: RT mount - %llu != %llu\n",
+			XFS_BB_TO_FSB(mp, d), mp->m_sb.sb_rblocks);
+		return XFS_ERROR(E2BIG);
+	}
+	error = xfs_read_buf(mp, &mp->m_rtdev_targ, d - 1, 1, 0, &bp);
+	if (error) {
+		printk(KERN_WARNING
+			"XFS: RT mount - xfs_read_buf returned %d\n", error);
+		if (error == ENOSPC)
+			return XFS_ERROR(E2BIG);
+		return error;
+	}
+	xfs_buf_relse(bp);
+	return 0;
+}
diff --git a/libxfs/xfs_rtbit.c b/libxfs/xfs_rtbit.c
new file mode 100644
index 000000000..c51cba34c
--- /dev/null
+++ b/libxfs/xfs_rtbit.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * XFS bit manipulation routines, used only in realtime code.
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
+ */
+int
+xfs_lowbit32(
+	__uint32_t	v)
+{
+	int		i;
+
+	if (v & 0x0000ffff)
+		if (v & 0x000000ff)
+			i = 0;
+		else
+			i = 8;
+	else if (v & 0xffff0000)
+		if (v & 0x00ff0000)
+			i = 16;
+		else
+			i = 24;
+	else
+		return -1;
+	return i + xfs_lowbit[(v >> i) & 0xff];
+}
diff --git a/libxfs/xfs_trans.c b/libxfs/xfs_trans.c
new file mode 100644
index 000000000..a30ad8967
--- /dev/null
+++ b/libxfs/xfs_trans.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Initialize the precomputed transaction reservation values
+ * in the mount structure.
+ */
+void
+xfs_trans_init(
+	xfs_mount_t	*mp)
+{
+	xfs_trans_reservations_t	*resp;
+
+	resp = &(mp->m_reservations);
+	resp->tr_write =
+		(uint)(XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_itruncate =
+		(uint)(XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_rename =
+		(uint)(XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_link = (uint)XFS_CALC_LINK_LOG_RES(mp);
+	resp->tr_remove =
+		(uint)(XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_symlink =
+		(uint)(XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_create =
+		(uint)(XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_mkdir =
+		(uint)(XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_ifree =
+		(uint)(XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_ichange =
+		(uint)(XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_growdata = (uint)XFS_CALC_GROWDATA_LOG_RES(mp);
+	resp->tr_swrite = (uint)XFS_CALC_SWRITE_LOG_RES(mp);
+	resp->tr_writeid = (uint)XFS_CALC_WRITEID_LOG_RES(mp);
+	resp->tr_addafork =
+		(uint)(XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_attrinval = (uint)XFS_CALC_ATTRINVAL_LOG_RES(mp);
+	resp->tr_attrset =
+		(uint)(XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_attrrm =
+		(uint)(XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+	resp->tr_clearagi = (uint)XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+	resp->tr_growrtalloc = (uint)XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+	resp->tr_growrtzero = (uint)XFS_CALC_GROWRTZERO_LOG_RES(mp);
+	resp->tr_growrtfree = (uint)XFS_CALC_GROWRTFREE_LOG_RES(mp);
+}
diff --git a/logprint/Makefile b/logprint/Makefile
new file mode 100644
index 000000000..4b878e297
--- /dev/null
+++ b/logprint/Makefile
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_logprint
+CMDDEPS = $(LIBXFS)
+
+CFILES = log_print_trans.c log_print_all.c log_misc.c logprint.c \
+	xfs_log_recover.c
+HFILES = logprint.h
+LLDLIBS	= $(LIBXFS) $(LIBUUID)
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+	$(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
diff --git a/logprint/log_misc.c b/logprint/log_misc.c
new file mode 100644
index 000000000..bc53bfa40
--- /dev/null
+++ b/logprint/log_misc.c
@@ -0,0 +1,1184 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "logprint.h"
+
+#define ZEROED_LOG	(-4)
+#define FULL_READ	(-3)
+#define PARTIAL_READ	(-2)
+#define BAD_HEADER	(-1)
+#define NO_ERROR	(0)
+
+static int logBBsize;
+char *trans_type[] = {
+	"",
+	"SETATTR",
+	"SETATTR_SIZE",
+	"INACTIVE",
+	"CREATE",
+	"CREATE_TRUNC",
+	"TRUNCATE_FILE",
+	"REMOVE",
+	"LINK",
+	"RENAME",
+	"MKDIR",
+	"RMDIR",
+	"SYMLINK",
+	"SET_DMATTRS",
+	"GROWFS",
+	"STRAT_WRITE",
+	"DIOSTRAT",
+	"WRITE_SYNC",
+	"WRITEID",
+	"ADDAFORK",
+	"ATTRINVAL",
+	"ATRUNCATE",
+	"ATTR_SET",
+	"ATTR_RM",
+	"ATTR_FLAG",
+	"CLEAR_AGI_BUCKET",
+	"QM_SBCHANGE",
+	"DUMMY1",
+	"DUMMY2",
+	"QM_QUOTAOFF",
+	"QM_DQALLOC",
+	"QM_SETQLIM",
+	"QM_DQCLUSTER",
+	"QM_QINOCREATE",
+	"QM_QUOTAOFF_END",
+	"SB_UNIT",
+	"FSYNC_TS",
+	"GROWFSRT_ALLOC",
+	"GROWFSRT_ZERO",
+	"GROWFSRT_FREE",
+	"SWAPEXT",
+};
+
+typedef struct xlog_split_item {
+	struct xlog_split_item	*si_next;
+	struct xlog_split_item	*si_prev;
+	xlog_tid_t		si_tid;
+	int			si_skip;
+} xlog_split_item_t;
+
+xlog_split_item_t *split_list = 0;
+
+void
+print_xlog_op_line(void)
+{
+    printf("--------------------------------------"
+           "--------------------------------------\n");
+}	/* print_xlog_op_line */
+
+void
+print_xlog_record_line(void)
+{
+    printf("======================================"
+           "======================================\n");
+}	/* print_xlog_record_line */
+
+void
+print_stars(void)
+{
+    printf("***********************************"
+           "***********************************\n");
+}	/* print_xlog_record_line */
+
+/*
+ * Given a pointer to a data segment, print out the data as if it were
+ * a log operation header.
+ */
+void
+xlog_print_op_header(xlog_op_header_t	*op_head,
+		     int		i,
+		     xfs_caddr_t	*ptr)
+{
+    xlog_op_header_t hbuf;
+
+    /*
+     * bcopy because on 64/n32, partial reads can cause the op_head
+     * pointer to come in pointing to an odd-numbered byte
+     */
+    bcopy(op_head, &hbuf, sizeof(xlog_op_header_t));
+    op_head = &hbuf;
+    *ptr += sizeof(xlog_op_header_t);
+    printf("Oper (%d): tid: %x  len: %d  clientid: %s  ", i,
+	    INT_GET(op_head->oh_tid, ARCH_CONVERT),
+	    INT_GET(op_head->oh_len, ARCH_CONVERT),
+	    (op_head->oh_clientid == XFS_TRANSACTION ? "TRANS" :
+	    (op_head->oh_clientid == XFS_LOG ? "LOG" : "ERROR")));
+    printf("flags: ");
+    if (op_head->oh_flags) {
+	if (op_head->oh_flags & XLOG_START_TRANS)
+	    printf("START ");
+	if (op_head->oh_flags & XLOG_COMMIT_TRANS)
+	    printf("COMMIT ");
+	if (op_head->oh_flags & XLOG_WAS_CONT_TRANS)
+	    printf("WAS_CONT ");
+	if (op_head->oh_flags & XLOG_UNMOUNT_TRANS)
+	    printf("UNMOUNT ");
+	if (op_head->oh_flags & XLOG_CONTINUE_TRANS)
+	    printf("CONTINUE ");
+	if (op_head->oh_flags & XLOG_END_TRANS)
+	    printf("END ");
+    } else {
+	printf("none");
+    }
+    printf("\n");
+}	/* xlog_print_op_header */
+
+
+void
+xlog_print_add_to_trans(xlog_tid_t	tid,
+			int		skip)
+{
+    xlog_split_item_t *item;
+
+    item	  = (xlog_split_item_t *)calloc(sizeof(xlog_split_item_t), 1);
+    item->si_tid  = tid;
+    item->si_skip = skip;
+    item->si_next = split_list;
+    item->si_prev = 0;
+    if (split_list)
+	split_list->si_prev = item;
+    split_list	  = item;
+}	/* xlog_print_add_to_trans */
+
+
+int
+xlog_print_find_tid(xlog_tid_t tid, uint was_cont)
+{
+    xlog_split_item_t *listp = split_list;
+
+    if (!split_list) {
+	if (was_cont != 0)	/* Not first time we have used this tid */
+	    return 1;
+	else
+	    return 0;
+    }
+    while (listp) {
+	if (listp->si_tid == tid)
+	    break;
+	listp = listp->si_next;
+    }
+    if (!listp)  {
+	return 0;
+    }
+    if (--listp->si_skip == 0) {
+	if (listp == split_list) {		/* delete at head */
+	    split_list = listp->si_next;
+	    if (split_list)
+		split_list->si_prev = NULL;
+	} else {
+	    if (listp->si_next)
+		listp->si_next->si_prev = listp->si_prev;
+	    listp->si_prev->si_next = listp->si_next;
+	}
+	free(listp);
+    }
+    return 1;
+}	/* xlog_print_find_tid */
+
+int
+xlog_print_trans_header(xfs_caddr_t *ptr, int len)
+{
+    xfs_trans_header_t  *h;
+    xfs_caddr_t		cptr = *ptr;
+    __uint32_t          magic;
+    char                *magic_c = (char *)&magic;
+
+    *ptr += len;
+    
+    magic=*(__uint32_t*)cptr; /* XXX INT_GET soon */
+    
+    if (len >= 4)
+	printf("%c%c%c%c:", 
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+                magic_c[3], magic_c[2], magic_c[1], magic_c[0]);
+#else
+	        magic_c[0], magic_c[1], magic_c[2], magic_c[3]);
+#endif
+    if (len != sizeof(xfs_trans_header_t)) {
+	printf("   Not enough data to decode further\n");
+	return 1;
+    }
+    h = (xfs_trans_header_t *)cptr;
+    printf("    type: %s       tid: %x       num_items: %d\n",
+	   trans_type[h->th_type], h->th_tid, h->th_num_items);
+    return 0;
+}	/* xlog_print_trans_header */
+
+
+int
+xlog_print_trans_buffer(xfs_caddr_t *ptr, int len, int *i, int num_ops)
+{
+    xfs_buf_log_format_t *f;
+    xfs_buf_log_format_v1_t *old_f;
+    xfs_agi_t		 *agi;
+    xfs_agf_t		 *agf;
+    xfs_disk_dquot_t	 *dq;
+    xlog_op_header_t	 *head = 0;
+    int			 num, skip;
+    int			 super_block = 0;
+    int			 bucket, col, buckets;
+    __int64_t		 blkno;
+    xfs_buf_log_format_t lbuf;
+    int			 size, blen, map_size, struct_size;
+    long long		 x, y;
+    
+    /*
+     * bcopy to ensure 8-byte alignment for the long longs in
+     * buf_log_format_t structure
+     */
+    bcopy(*ptr, &lbuf, sizeof(xfs_buf_log_format_t));
+    f = &lbuf;
+    *ptr += len;
+
+    if (f->blf_type == XFS_LI_BUF) {
+	blkno = f->blf_blkno;
+	size = f->blf_size;
+	blen = f->blf_len;
+	map_size = f->blf_map_size;
+	struct_size = sizeof(xfs_buf_log_format_t);
+    } else {
+	old_f = (xfs_buf_log_format_v1_t*)f;
+	blkno = old_f->blf_blkno;
+	size = old_f->blf_size;
+	blen = old_f->blf_len;
+	map_size = old_f->blf_map_size;
+	struct_size = sizeof(xfs_buf_log_format_v1_t);
+    }
+    switch (f->blf_type)  {
+    case XFS_LI_BUF:
+	printf("BUF:  ");
+	break;
+    case XFS_LI_6_1_BUF:
+	printf("6.1 BUF:  ");
+	break;
+    case XFS_LI_5_3_BUF:
+	printf("5.3 BUF:  ");
+	break;
+    case XFS_LI_DQUOT:
+	printf("DQUOT BUF:  ");
+	break;
+    default:
+	printf("UNKNOWN BUF:  ");
+	break;
+    }
+    if (len >= struct_size) {
+	ASSERT((len - sizeof(struct_size)) % sizeof(int) == 0);
+	printf("#regs: %d   start blkno: %lld (0x%llx)  len: %d  bmap size: %d\n",
+	       size, blkno, blkno, blen, map_size);
+	if (blkno == 0)
+	    super_block = 1;
+    } else {
+	ASSERT(len >= 4);	/* must have at least 4 bytes if != 0 */
+	printf("#regs: %d   Not printing rest of data\n", f->blf_size);
+	return size;
+    }
+    num = size-1;
+
+    /* Check if all regions in this log item were in the given LR ptr */
+    if (*i+num > num_ops-1) {
+	skip = num - (num_ops-1-*i);
+	num = num_ops-1-*i;
+    } else {
+	skip = 0;
+    }
+    while (num-- > 0) {
+	(*i)++;
+	head = (xlog_op_header_t *)*ptr;
+	xlog_print_op_header(head, *i, ptr);
+	if (super_block) {
+		printf("SUPER BLOCK Buffer: ");
+		if (INT_GET(head->oh_len, ARCH_CONVERT) < 4*8) {
+			printf("Out of space\n");
+		} else {
+			printf("\n");
+			/*
+			 * bcopy because *ptr may not be 8-byte aligned
+			 */
+			bcopy(*ptr, &x, sizeof(long long));
+			bcopy(*ptr+8, &y, sizeof(long long));
+			printf("icount: %lld  ifree: %lld  ", 
+                                INT_GET(x, ARCH_CONVERT), 
+                                INT_GET(y, ARCH_CONVERT));
+			bcopy(*ptr+16, &x, sizeof(long long));
+			bcopy(*ptr+24, &y, sizeof(long long));
+			printf("fdblks: %lld  frext: %lld\n", 
+                                INT_GET(x, ARCH_CONVERT), 
+                                INT_GET(y, ARCH_CONVERT));
+		}
+		super_block = 0;
+	} else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_AGI_MAGIC) {
+		agi = (xfs_agi_t *)(*ptr);
+		printf("AGI Buffer: XAGI  ");
+		if (INT_GET(head->oh_len, ARCH_CONVERT) <
+		    sizeof(xfs_agi_t) -
+		    XFS_AGI_UNLINKED_BUCKETS*sizeof(xfs_agino_t)) {
+			printf("out of space\n");
+		} else {
+			printf("\n");
+			printf("ver: %d  ",
+				INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+			printf("seq#: %d  len: %d  cnt: %d  root: %d\n",
+				INT_GET(agi->agi_seqno, ARCH_CONVERT),
+				INT_GET(agi->agi_length, ARCH_CONVERT),
+				INT_GET(agi->agi_count, ARCH_CONVERT),
+				INT_GET(agi->agi_root, ARCH_CONVERT));
+			printf("level: %d  free#: 0x%x  newino: 0x%x\n",
+				INT_GET(agi->agi_level, ARCH_CONVERT),
+				INT_GET(agi->agi_freecount, ARCH_CONVERT),
+				INT_GET(agi->agi_newino, ARCH_CONVERT));
+			if (INT_GET(head->oh_len, ARCH_CONVERT) == 128) {
+				buckets = 17;
+			} else if (INT_GET(head->oh_len, ARCH_CONVERT) == 256) {
+				buckets = 32 + 17;
+			} else {
+				buckets = XFS_AGI_UNLINKED_BUCKETS;
+			}
+			for (bucket = 0; bucket < buckets;) {
+				printf("bucket[%d - %d]: ", bucket, bucket+3);
+				for (col = 0; col < 4; col++, bucket++) {
+					if (bucket < buckets) {
+						printf("0x%x ",
+			INT_GET(agi->agi_unlinked[bucket], ARCH_CONVERT));
+					}
+				}
+				printf("\n");
+			}
+		}
+	} else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_AGF_MAGIC) {
+		agf = (xfs_agf_t *)(*ptr);
+		printf("AGF Buffer: XAGF  ");
+		if (INT_GET(head->oh_len, ARCH_CONVERT) < sizeof(xfs_agf_t)) {
+			printf("Out of space\n");
+		} else {
+			printf("\n");
+			printf("ver: %d  seq#: %d  len: %d  \n",
+				INT_GET(agf->agf_versionnum, ARCH_CONVERT),
+				INT_GET(agf->agf_seqno, ARCH_CONVERT),
+				INT_GET(agf->agf_length, ARCH_CONVERT));
+			printf("root BNO: %d  CNT: %d\n",
+				INT_GET(agf->agf_roots[XFS_BTNUM_BNOi],
+					ARCH_CONVERT),
+				INT_GET(agf->agf_roots[XFS_BTNUM_CNTi],
+					ARCH_CONVERT));
+			printf("level BNO: %d  CNT: %d\n",
+				INT_GET(agf->agf_levels[XFS_BTNUM_BNOi],
+					ARCH_CONVERT),
+				INT_GET(agf->agf_levels[XFS_BTNUM_CNTi],
+					ARCH_CONVERT));
+			printf("1st: %d  last: %d  cnt: %d  "
+			       "freeblks: %d  longest: %d\n",
+				INT_GET(agf->agf_flfirst, ARCH_CONVERT),
+				INT_GET(agf->agf_fllast, ARCH_CONVERT),
+				INT_GET(agf->agf_flcount, ARCH_CONVERT),
+				INT_GET(agf->agf_freeblks, ARCH_CONVERT),
+				INT_GET(agf->agf_longest, ARCH_CONVERT));
+		}
+	} else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_DQUOT_MAGIC) {
+		dq = (xfs_disk_dquot_t *)(*ptr);
+		printf("DQUOT Buffer: DQ  ");
+		if (INT_GET(head->oh_len, ARCH_CONVERT) <
+				sizeof(xfs_disk_dquot_t)) {
+			printf("Out of space\n");
+		}
+		else {
+			printf("\n");
+			printf("ver: %d  flags: 0x%x  id: %d  \n",
+				INT_GET(dq->d_version, ARCH_CONVERT),
+				INT_GET(dq->d_flags, ARCH_CONVERT),
+				INT_GET(dq->d_id, ARCH_CONVERT));
+			printf("blk limits  hard: %llu  soft: %llu\n",
+				INT_GET(dq->d_blk_hardlimit, ARCH_CONVERT),
+				INT_GET(dq->d_blk_softlimit, ARCH_CONVERT));
+			printf("blk  count: %llu  warns: %d  timer: %d\n",
+				INT_GET(dq->d_bcount, ARCH_CONVERT),
+				INT_GET(dq->d_bwarns, ARCH_CONVERT),
+				INT_GET(dq->d_btimer, ARCH_CONVERT));
+			printf("ino limits  hard: %llu  soft: %llu\n",
+				INT_GET(dq->d_ino_hardlimit, ARCH_CONVERT),
+				INT_GET(dq->d_ino_softlimit, ARCH_CONVERT));
+			printf("ino  count: %llu  warns: %d  timer: %d\n",
+				INT_GET(dq->d_icount, ARCH_CONVERT),
+				INT_GET(dq->d_iwarns, ARCH_CONVERT),
+				INT_GET(dq->d_itimer, ARCH_CONVERT));
+		}
+	} else {
+		printf("BUF DATA\n");
+		if (print_data) {
+			uint *dp  = (uint *)*ptr;
+			int  nums = INT_GET(head->oh_len, ARCH_CONVERT) >> 2;
+			int  i = 0;
+
+			while (i < nums) {
+				if ((i % 8) == 0)
+					printf("%2x ", i);
+				printf("%8x ", *dp);
+				dp++;
+				i++;
+				if ((i % 8) == 0)
+					printf("\n");
+			}
+			printf("\n");
+		}
+	}
+	*ptr += INT_GET(head->oh_len, ARCH_CONVERT);
+    }
+    if (head && head->oh_flags & XLOG_CONTINUE_TRANS)
+	skip++;
+    return skip;
+}	/* xlog_print_trans_buffer */
+
+
+int
+xlog_print_trans_efd(xfs_caddr_t *ptr, uint len)
+{
+    xfs_efd_log_format_t *f;
+    xfs_extent_t	 *ex;
+    int			 i;
+    xfs_efd_log_format_t lbuf;
+    
+    /*
+     * bcopy to ensure 8-byte alignment for the long longs in
+     * xfs_efd_log_format_t structure
+     */
+    bcopy(*ptr, &lbuf, sizeof(xfs_efd_log_format_t));
+    f = &lbuf;
+    *ptr += len;
+    if (len >= sizeof(xfs_efd_log_format_t)) {
+	printf("EFD:  #regs: %d    num_extents: %d  id: 0x%llx\n",
+	       f->efd_size, f->efd_nextents, f->efd_efi_id);
+	ex = f->efd_extents;
+	for (i=0; i< f->efd_size; i++) {
+		printf("(s: 0x%llx, l: %d) ", ex->ext_start, ex->ext_len);
+		if (i % 4 == 3) printf("\n");
+		ex++;
+	}
+	if (i % 4 != 0) printf("\n");
+	return 0;
+    } else {
+	printf("EFD: Not enough data to decode further\n");
+	return 1;
+    }
+}	/* xlog_print_trans_efd */
+
+
+int
+xlog_print_trans_efi(xfs_caddr_t *ptr, uint len)
+{
+    xfs_efi_log_format_t *f;
+    xfs_extent_t	 *ex;
+    int			 i;
+    xfs_efi_log_format_t lbuf;
+
+    /*
+     * bcopy to ensure 8-byte alignment for the long longs in
+     * xfs_efi_log_format_t structure
+     */
+    bcopy(*ptr, &lbuf, sizeof(xfs_efi_log_format_t));
+    f = &lbuf;
+    *ptr += len;
+    if (len >= sizeof(xfs_efi_log_format_t)) {
+	printf("EFI:  #regs: %d    num_extents: %d  id: 0x%llx\n",
+	       f->efi_size, f->efi_nextents, f->efi_id);
+	ex = f->efi_extents;
+	for (i=0; i< f->efi_size; i++) {
+		printf("(s: 0x%llx, l: %d) ", ex->ext_start, ex->ext_len);
+		if (i % 4 == 3) printf("\n");
+		ex++;
+	}
+	if (i % 4 != 0) printf("\n");
+	return 0;
+    } else {
+	printf("EFI: Not enough data to decode further\n");
+	return 1;
+    }
+}	/* xlog_print_trans_efi */
+
+
+/* ARGSUSED */
+void
+xlog_print_trans_inode_core(xfs_dinode_core_t *ip)
+{
+    printf("INODE CORE\n");
+    printf("magic 0x%hx mode 0%ho version %d format %d\n",
+	   ip->di_magic, ip->di_mode, (int)ip->di_version,
+	   (int)ip->di_format);
+    printf("nlink %hd uid %d gid %d\n",
+	   ip->di_nlink, ip->di_uid, ip->di_gid);
+    printf("atime 0x%x mtime 0x%x ctime 0x%x\n",
+	   ip->di_atime.t_sec, ip->di_mtime.t_sec, ip->di_ctime.t_sec);
+    printf("size 0x%llx nblocks 0x%llx extsize 0x%x nextents 0x%x\n",
+	   ip->di_size, ip->di_nblocks, ip->di_extsize, ip->di_nextents);
+    printf("naextents 0x%x forkoff %d dmevmask 0x%x dmstate 0x%hx\n",
+	   ip->di_anextents, (int)ip->di_forkoff, ip->di_dmevmask,
+	   ip->di_dmstate);
+    printf("flags 0x%x gen 0x%x\n",
+	   ip->di_flags, ip->di_gen);
+}
+
+void
+xlog_print_dir_sf(xfs_dir_shortform_t *sfp, int size)
+{
+	xfs_ino_t	ino;
+	int		count;
+	int		i;
+	char		namebuf[257];
+	xfs_dir_sf_entry_t	*sfep;
+
+        /* XXX need to determine whether this is v1 or v2, then
+           print appropriate structure */
+        
+	printf("SHORTFORM DIRECTORY size %d\n",
+                size);
+        /* bail out for now */
+        
+        return;
+        
+	printf("SHORTFORM DIRECTORY size %d count %d\n",
+	       size, sfp->hdr.count);
+	bcopy(&(sfp->hdr.parent), &ino, sizeof(ino));
+	printf(".. ino 0x%llx\n", INT_GET(ino, ARCH_CONVERT));
+
+	count = (uint)(sfp->hdr.count);
+	sfep = &(sfp->list[0]);
+	for (i = 0; i < count; i++) {
+		bcopy(&(sfep->inumber), &ino, sizeof(ino));
+		bcopy((sfep->name), namebuf, sfep->namelen);
+		namebuf[sfep->namelen] = '\0';
+		printf("%s ino 0x%llx namelen %d\n",
+		       namebuf, ino, sfep->namelen);
+		sfep = XFS_DIR_SF_NEXTENTRY(sfep);
+	}
+}
+
+int
+xlog_print_trans_inode(xfs_caddr_t *ptr, int len, int *i, int num_ops)
+{
+    xfs_inode_log_format_t *f;
+    xfs_inode_log_format_t_v1 *old_f;
+    xfs_dinode_core_t	   dino;
+    xlog_op_header_t	   *op_head;
+    int			   version;
+    xfs_inode_log_format_t lbuf = {0};
+    int			   mode;
+    int			   size;
+
+    /*
+     * print inode type header region
+     *
+     * bcopy to ensure 8-byte alignment for the long longs in
+     * xfs_inode_log_format_t structure
+     *
+     * len can be smaller than xfs_inode_log_format_t sometimes... (?)
+     */
+    bcopy(*ptr, &lbuf, MIN(sizeof(xfs_inode_log_format_t), len));
+    version = lbuf.ilf_type;
+    f = &lbuf;
+    (*i)++;					/* bump index */
+    *ptr += len;
+    if (version == XFS_LI_5_3_INODE) {
+	old_f = (xfs_inode_log_format_t_v1 *)f;
+	if (len == sizeof(xfs_inode_log_format_t_v1)) {
+	    printf("5.3 INODE: #regs: %d   ino: 0x%llx  flags: 0x%x   dsize: %d\n",
+		   old_f->ilf_size, old_f->ilf_ino,
+		   old_f->ilf_fields, old_f->ilf_dsize);
+	} else {
+	    ASSERT(len >= 4);	/* must have at least 4 bytes if != 0 */
+	    printf("5.3 INODE: #regs: %d   Not printing rest of data\n",
+		   old_f->ilf_size);
+	    return old_f->ilf_size;
+	}
+    } else {
+	if (len == sizeof(xfs_inode_log_format_t)) {
+	    if (version == XFS_LI_6_1_INODE)
+		printf("6.1 INODE: ");
+	    else printf("INODE: ");
+	    printf("#regs: %d   ino: 0x%llx  flags: 0x%x   dsize: %d\n",
+		   f->ilf_size, f->ilf_ino, f->ilf_fields, f->ilf_dsize);
+	    printf("        blkno: %lld  len: %d  boff: %d\n",
+		   f->ilf_blkno, f->ilf_len, f->ilf_boffset);
+	} else {
+	    ASSERT(len >= 4);	/* must have at least 4 bytes if != 0 */
+	    printf("INODE: #regs: %d   Not printing rest of data\n",
+		   f->ilf_size);
+	    return f->ilf_size;
+	}
+    }
+
+    if (*i >= num_ops)			/* end of LR */
+	    return f->ilf_size-1;
+
+    /* core inode comes 2nd */
+    op_head = (xlog_op_header_t *)*ptr;
+    xlog_print_op_header(op_head, *i, ptr);
+    
+    if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS))  {
+	return f->ilf_size-1;
+    }
+    
+    bcopy(*ptr, &dino, sizeof(dino));
+    mode = dino.di_mode & IFMT;
+    size = (int)dino.di_size;
+    xlog_print_trans_inode_core(&dino);
+    *ptr += sizeof(xfs_dinode_core_t);
+
+    if (*i == num_ops-1 && f->ilf_size == 3)  {
+	    return 1;
+    }
+
+    /* does anything come next */
+    op_head = (xlog_op_header_t *)*ptr;
+    switch (f->ilf_fields & XFS_ILOG_NONCORE) {
+	case XFS_ILOG_DEXT: {
+	    ASSERT(f->ilf_size == 3);
+	    (*i)++;
+	    xlog_print_op_header(op_head, *i, ptr);
+	    printf("EXTENTS inode data\n");
+	    *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+	    if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS))  {
+		return 1;
+	    }
+	    break;
+	}
+	case XFS_ILOG_DBROOT: {
+	    ASSERT(f->ilf_size == 3);
+	    (*i)++;
+	    xlog_print_op_header(op_head, *i, ptr);
+	    printf("BTREE inode data\n");
+	    *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+	    if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS))  {
+		return 1;
+	    }
+	    break;
+	}
+	case XFS_ILOG_DDATA: {
+	    ASSERT(f->ilf_size == 3);
+	    (*i)++;
+	    xlog_print_op_header(op_head, *i, ptr);
+	    printf("LOCAL inode data\n");
+	    if (mode == IFDIR) {
+		xlog_print_dir_sf((xfs_dir_shortform_t*)*ptr, size);
+	    }
+	    *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+	    if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS))
+		return 1;
+	    break;
+	}
+	case XFS_ILOG_DEV: {
+	    ASSERT(f->ilf_size == 2);
+	    printf("DEV inode: no extra region\n");
+	    break;
+	}
+	case XFS_ILOG_UUID: {
+	    ASSERT(f->ilf_size == 2);
+	    printf("UUID inode: no extra region\n");
+	    break;
+	}
+	case 0: {
+	    ASSERT(f->ilf_size == 2);
+	    break;
+	}
+	default: {
+	    xlog_panic("xlog_print_trans_inode: illegal inode type");
+	}
+    }
+    return 0;
+}	/* xlog_print_trans_inode */
+
+
+
+/******************************************************************************
+ *
+ *		Log print routines
+ *
+ ******************************************************************************
+ */
+
+void
+xlog_print_lseek(xlog_t *log, int fd, xfs_daddr_t blkno, int whence)
+{
+#define BBTOOFF64(bbs)	(((xfs_off_t)(bbs)) << BBSHIFT)
+	xfs_off_t offset;
+
+	if (whence == SEEK_SET)
+		offset = BBTOOFF64(blkno+log->l_logBBstart);
+	else
+		offset = BBTOOFF64(blkno);
+	if (lseek64(fd, offset, whence) < 0) {
+		fprintf(stderr, "%s: lseek64 to %llu failed: %s\n",
+			progname, offset, strerror(errno));
+		exit(1);
+	}
+}	/* xlog_print_lseek */
+
+
+void
+print_lsn(xfs_caddr_t	string,
+	  xfs_lsn_t	*lsn,
+          xfs_arch_t    arch)
+{
+    printf("%s: %u,%u", string, 
+            CYCLE_LSN(*lsn, arch), BLOCK_LSN(*lsn, arch));
+}
+
+
+int
+xlog_print_record(int		  fd,
+		 int		  num_ops,
+		 int		  len,
+		 int		  *read_type,
+		 xfs_caddr_t	  *partial_buf,
+		 xlog_rec_header_t *rhead)
+{
+    xlog_op_header_t	*op_head;
+    xlog_rec_header_t	*rechead;
+    xfs_caddr_t		buf, ptr;
+    int			read_len, skip;
+    int			ret, n, i;
+
+    if (print_no_print)
+	    return NO_ERROR;
+    
+    if (!len) {
+        printf("\n");
+        return NO_ERROR;
+    }
+
+    /* read_len must read up to some block boundary */
+    read_len = (int) BBTOB(BTOBB(len));
+
+    /* read_type => don't malloc() new buffer, use old one */
+    if (*read_type == FULL_READ) {
+	if ((ptr = buf = (xfs_caddr_t)malloc(read_len)) == NULL) {
+	    fprintf(stderr, "xlog_print_record: malloc failed\n");
+	    exit(1);
+	}
+    } else {
+	read_len -= *read_type;
+	buf = (xfs_caddr_t)((__psint_t)(*partial_buf) + (__psint_t)(*read_type));
+	ptr = *partial_buf;
+    }
+    if ((ret = (int) read(fd, buf, read_len)) == -1) {
+	fprintf(stderr, "xlog_print_record: read error\n");
+	exit(1);
+    }
+    /* Did we overflow the end? */
+    if (*read_type == FULL_READ &&
+	BLOCK_LSN(rhead->h_lsn, ARCH_CONVERT)+BTOBB(read_len) >= logBBsize) {
+	*read_type = BBTOB(logBBsize-BLOCK_LSN(rhead->h_lsn, ARCH_CONVERT)-1);
+	*partial_buf = buf;
+	return PARTIAL_READ;
+    }
+    
+    /* Did we read everything? */
+    if ((ret == 0 && read_len != 0) || ret != read_len) {
+	*read_type = ret;
+	*partial_buf = buf;
+	return PARTIAL_READ;
+    }
+    if (*read_type != FULL_READ)
+	read_len += *read_type;
+    
+    /* Everything read in.  Start from beginning of buffer */
+    buf = ptr;
+    for (i = 0; ptr < buf + read_len; ptr += BBSIZE, i++) {
+	rechead = (xlog_rec_header_t *)ptr;
+	if (INT_GET(rechead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) {
+	    xlog_print_lseek(0, fd, -read_len+i*BBSIZE, SEEK_CUR);
+	    free(buf);
+	    return -1;
+	} else {
+	    if (INT_GET(rhead->h_cycle, ARCH_CONVERT) !=
+			INT_GET(*(uint *)ptr, ARCH_CONVERT)) {
+		if (*read_type == FULL_READ)
+		    return -1;
+		else if (INT_GET(rhead->h_cycle, ARCH_CONVERT) + 1 !=
+			INT_GET(*(uint *)ptr, ARCH_CONVERT))
+		    return -1;
+	    }
+	}
+	INT_SET(*(uint *)ptr, ARCH_CONVERT,
+		INT_GET(rhead->h_cycle_data[i], ARCH_CONVERT));
+    }
+    ptr = buf;
+    for (i=0; i<num_ops; i++) {
+	print_xlog_op_line();
+	op_head = (xlog_op_header_t *)ptr;
+	xlog_print_op_header(op_head, i, &ptr);
+
+	/* print transaction data */
+	if (print_no_data ||
+	    ((XLOG_SET(op_head->oh_flags, XLOG_WAS_CONT_TRANS) ||
+	      XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS)) && 
+	     INT_GET(op_head->oh_len, ARCH_CONVERT) == 0)) {
+	    for (n = 0; n < INT_GET(op_head->oh_len, ARCH_CONVERT); n++) {
+		printf("%c", *ptr);
+		ptr++;
+	    }
+	    printf("\n");
+	    continue;
+	}
+	if (xlog_print_find_tid(INT_GET(op_head->oh_tid, ARCH_CONVERT),
+				op_head->oh_flags & XLOG_WAS_CONT_TRANS)) {
+	    printf("Left over region from split log item\n");
+	    ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+	    continue;
+	}
+	if (INT_GET(op_head->oh_len, ARCH_CONVERT) != 0) {
+	    if (*(uint *)ptr == XFS_TRANS_HEADER_MAGIC) {
+		skip = xlog_print_trans_header(&ptr,
+					INT_GET(op_head->oh_len, ARCH_CONVERT));
+	    } else {
+		switch (*(unsigned short *)ptr) {
+		    case XFS_LI_5_3_BUF:
+		    case XFS_LI_6_1_BUF:
+		    case XFS_LI_DQUOT:
+		    case XFS_LI_BUF: {
+			skip = xlog_print_trans_buffer(&ptr,
+					INT_GET(op_head->oh_len, ARCH_CONVERT),
+					&i, num_ops);
+			break;
+		    }
+		    case XFS_LI_5_3_INODE:
+		    case XFS_LI_6_1_INODE:
+		    case XFS_LI_INODE: {
+			skip = xlog_print_trans_inode(&ptr,
+					INT_GET(op_head->oh_len, ARCH_CONVERT),
+					&i, num_ops);
+			break;
+		    }
+		    case XFS_LI_EFI: {
+			skip = xlog_print_trans_efi(&ptr,
+					INT_GET(op_head->oh_len, ARCH_CONVERT));
+			break;
+		    }
+		    case XFS_LI_EFD: {
+			skip = xlog_print_trans_efd(&ptr,
+					INT_GET(op_head->oh_len, ARCH_CONVERT));
+			break;
+		    }
+		    case XLOG_UNMOUNT_TYPE: {
+			printf("Unmount filesystem\n");
+			skip = 0;
+			break;
+		    }
+		    default: {
+			fprintf(stderr, "%s: unknown log operation type (%x)\n",
+                                progname, *(unsigned short *)ptr);
+			skip = 0;
+			ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+		    }
+		} /* switch */
+	    } /* else */
+	    if (skip != 0)
+		xlog_print_add_to_trans(INT_GET(op_head->oh_tid, ARCH_CONVERT), skip);
+	}
+    }
+    printf("\n");
+    free(buf);
+    return NO_ERROR;
+}	/* xlog_print_record */
+
+
+int
+xlog_print_rec_head(xlog_rec_header_t *head, int *len)
+{
+    int i;
+    char uub[64];
+    int datalen,bbs;
+    
+    if (print_no_print)
+	    return INT_GET(head->h_num_logops, ARCH_CONVERT);
+    
+    if (INT_ISZERO(head->h_magicno, ARCH_CONVERT))
+        return ZEROED_LOG;
+
+    if (INT_GET(head->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) {
+	printf("Header 0x%x wanted 0x%x\n", 
+                INT_GET(head->h_magicno, ARCH_CONVERT), 
+                XLOG_HEADER_MAGIC_NUM);
+	return BAD_HEADER;
+    }
+    
+    datalen=INT_GET(head->h_len, ARCH_CONVERT);
+    bbs=(datalen/BBSIZE)+(datalen%BBSIZE)?1:0;
+            
+    printf("cycle: %d	version: %d	", 
+            INT_GET(head->h_cycle, ARCH_CONVERT), 
+            INT_GET(head->h_version, ARCH_CONVERT));
+    print_lsn("	lsn", &head->h_lsn, ARCH_CONVERT);
+    print_lsn("	tail_lsn", &head->h_tail_lsn, ARCH_CONVERT);
+    printf("\n");
+    printf("length of Log Record: %d	prev offset: %d		num ops: %d\n",
+	   datalen, 
+            INT_GET(head->h_prev_block, ARCH_CONVERT), 
+            INT_GET(head->h_num_logops, ARCH_CONVERT));
+    
+    if (print_overwrite) {
+        printf("cycle num overwrites: ");
+        for (i=0; i< bbs; i++)
+	    printf("%d - 0x%x  ",
+                    i,
+                    INT_GET(head->h_cycle_data[i], ARCH_CONVERT));
+        printf("\n");
+    }
+    
+    uuid_unparse(head->h_fs_uuid, uub);
+    printf("uuid: %s   format: ", uub);
+    switch (INT_GET(head->h_fmt, ARCH_CONVERT)) {
+        case XLOG_FMT_UNKNOWN:  
+            printf("unknown\n"); 
+            break;
+        case XLOG_FMT_LINUX_LE: 
+            printf("little endian linux\n"); 
+            break;
+        case XLOG_FMT_LINUX_BE: 
+            printf("big endian linux\n"); 
+            break;
+        case XLOG_FMT_IRIX_BE: 
+            printf("big endian irix\n"); 
+            break;
+        default:                
+            printf("? (%d)\n", INT_GET(head->h_fmt, ARCH_CONVERT)); 
+            break;
+    }
+    
+    *len = INT_GET(head->h_len, ARCH_CONVERT);
+    return(INT_GET(head->h_num_logops, ARCH_CONVERT));
+}	/* xlog_print_rec_head */
+
+static void
+print_xlog_bad_zeroed(xfs_daddr_t blkno)
+{
+        print_stars();
+	printf("* ERROR: found data after zeroed blocks block=%-21lld  *\n",
+                (__int64_t)blkno);
+        print_stars();
+	if (print_exit)
+	    xlog_exit("Bad log - data after zeroed blocks");
+}	/* print_xlog_bad_zeroed */
+
+static void
+print_xlog_bad_header(xfs_daddr_t blkno, xfs_caddr_t buf)
+{
+        print_stars();
+	printf("* ERROR: header cycle=%-11d block=%-21lld        *\n",
+		GET_CYCLE(buf, ARCH_CONVERT), (__int64_t)blkno);
+        print_stars();
+	if (print_exit)
+	    xlog_exit("Bad log record header");
+}	/* print_xlog_bad_header */
+
+void
+print_xlog_bad_data(xfs_daddr_t blkno)
+{
+        print_stars();
+	printf("* ERROR: data block=%-21lld                             *\n", 
+                (__int64_t)blkno);
+        print_stars();
+	if (print_exit)
+	    xlog_exit("Bad data in log");
+}	/* print_xlog_bad_data */
+
+
+/*
+ * This code is gross and needs to be rewritten.
+ */
+void xfs_log_print(xlog_t       *log,
+                   int          fd,
+		   int		print_block_start)
+{
+    char	hbuf[XLOG_HEADER_SIZE];
+    int		num_ops, len;
+    xfs_daddr_t	block_end = 0, block_start, blkno, error;
+    int		read_type = FULL_READ;
+    xfs_caddr_t	partial_buf;
+    int         zeroed = 0;
+
+    logBBsize = log->l_logBBsize;
+              
+    /*
+     * Normally, block_start and block_end are the same value since we
+     * are printing the entire log.  However, if the start block is given,
+     * we still end at the end of the logical log.
+     */
+    if (error = xlog_print_find_oldest(log, &block_end)) {
+	    fprintf(stderr, "%s: problem finding oldest LR\n", progname);
+	    return;
+    }
+    if (print_block_start == -1)
+	    block_start = block_end;
+    else
+	    block_start = print_block_start;
+    xlog_print_lseek(log, fd, block_start, SEEK_SET);
+    blkno    = block_start;
+    
+    for (;;) {
+	if (read(fd, hbuf, 512) == 0) {
+	    printf("%s: physical end of log\n", progname);
+	    print_xlog_record_line();
+	    break;
+        }
+	if (print_only_data) {
+		printf("BLKNO: %lld\n", (__int64_t)blkno);
+		xlog_recover_print_data(hbuf, 512);
+		blkno++;
+		goto loop;
+	}
+	num_ops = xlog_print_rec_head((xlog_rec_header_t *)hbuf, &len);
+	blkno++;
+        
+        if (zeroed && num_ops != ZEROED_LOG) {
+            printf("%s: after %d zeroed blocks\n", progname, zeroed);
+            /* once we find zeroed blocks - that's all we expect */
+            print_xlog_bad_zeroed(blkno-1);
+            /* reset count since we're assuming previous zeroed blocks
+             * were bad
+             */
+            zeroed = 0;
+        }
+        
+        if (num_ops == ZEROED_LOG || num_ops == BAD_HEADER) {
+            if (num_ops == ZEROED_LOG) {
+                zeroed++; 
+            } else {
+	        print_xlog_bad_header(blkno-1, hbuf);
+            }
+            
+	    goto loop;
+	}
+        
+	error =	xlog_print_record(fd, num_ops, len, &read_type, &partial_buf,
+				  (xlog_rec_header_t *)hbuf);
+	switch (error) {
+	    case 0: {
+		blkno += BTOBB(len);
+		if (print_block_start != -1 &&
+		    blkno >= block_end)		/* If start specified, we */
+			goto end;		/* end early */
+		break;
+	    }
+	    case -1: {
+		print_xlog_bad_data(blkno-1);
+		if (print_block_start != -1 &&
+		    blkno >= block_end)		/* If start specified, */
+			goto end;		/* we end early */
+		xlog_print_lseek(log, fd, blkno, SEEK_SET);
+		goto loop;
+	    }
+	    case PARTIAL_READ: {
+                print_xlog_record_line();
+		printf("%s: physical end of log\n", progname);
+                print_xlog_record_line();
+		blkno = 0;
+		xlog_print_lseek(log, fd, 0, SEEK_SET);
+		/*
+		 * We may have hit the end of the log when we started at 0.
+		 * In this case, just end.
+		 */
+		if (block_start == 0)
+			goto end;
+		goto partial_log_read;
+	    }
+	    default: xlog_panic("illegal value");
+	}
+	print_xlog_record_line();
+loop:
+	if (blkno >= logBBsize) {
+                if (zeroed) {
+                    printf("%s: skipped %d zeroed blocks\n", progname, zeroed);
+                    if (zeroed == logBBsize)
+                        printf("%s: totally zeroed log\n", progname);
+                    
+                    zeroed=0;
+                }
+		printf("%s: physical end of log\n", progname);
+		print_xlog_record_line();
+		break;
+	}
+    }
+
+    /* Do we need to print the first part of physical log? */
+    if (block_start != 0) {
+	blkno = 0;
+	xlog_print_lseek(log, fd, 0, SEEK_SET);
+	for (;;) {
+	    if (read(fd, hbuf, 512) == 0) {
+		xlog_panic("xlog_find_head: bad read");
+	    }
+	    if (print_only_data) {
+		printf("BLKNO: %lld\n", (__int64_t)blkno);
+		xlog_recover_print_data(hbuf, 512);
+		blkno++;
+		goto loop2;
+	    }
+	    num_ops = xlog_print_rec_head((xlog_rec_header_t *)hbuf, &len);
+	    blkno++;
+        
+	    if (num_ops == ZEROED_LOG || num_ops == BAD_HEADER) {
+                /* we only expect zeroed log entries at the end
+                 * of the _physical_ log, so treat them the same
+                 * as bad blocks here
+                 */
+		print_xlog_bad_header(blkno-1, hbuf);
+                
+		if (blkno >= block_end)
+		    break;
+		continue;
+	    }
+partial_log_read:
+	    error= xlog_print_record(fd, num_ops, len, &read_type,
+				    &partial_buf, (xlog_rec_header_t *)hbuf);
+	    if (read_type != FULL_READ)
+		len -= read_type;
+	    read_type = FULL_READ;
+	    if (!error)
+		blkno += BTOBB(len);
+	    else {
+		print_xlog_bad_data(blkno-1);
+		xlog_print_lseek(log, fd, blkno, SEEK_SET);
+		goto loop2;
+	    }
+	    print_xlog_record_line();
+loop2:
+	    if (blkno >= block_end)
+		break;
+        }
+    }
+    
+end:
+    printf("%s: logical end of log\n", progname);
+    print_xlog_record_line();
+}
diff --git a/logprint/log_print_all.c b/logprint/log_print_all.c
new file mode 100644
index 000000000..a1a81cc99
--- /dev/null
+++ b/logprint/log_print_all.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "logprint.h"
+
+
+/*
+ * Start is defined to be the block pointing to the oldest valid log record.
+ * Used by log print code.  Don't put in cmd/xfs/logprint/xfs_log_print.c
+ * since most of the bread routines live in kern/fs/xfs/xfs_log_recover only.
+ */
+int
+xlog_print_find_oldest(
+	struct log  *log,
+	xfs_daddr_t *last_blk)
+{
+	xfs_buf_t	*bp;
+	xfs_daddr_t	first_blk;
+	uint	first_half_cycle, last_half_cycle;
+	int	error;
+	
+	if (xlog_find_zeroed(log, &first_blk))
+		return 0;
+
+	first_blk = 0;		/* read first block */
+	bp = xlog_get_bp(1, log->l_mp);
+	xlog_bread(log, 0, 1, bp);
+	first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
+	*last_blk = log->l_logBBsize-1;	/* read last block */
+	xlog_bread(log, *last_blk, 1, bp);
+	last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
+	ASSERT(last_half_cycle != 0);
+
+	if (first_half_cycle == last_half_cycle) { /* all cycle nos are same */
+		*last_blk = 0;
+	} else {		/* have 1st and last; look for middle cycle */
+		error = xlog_find_cycle_start(log, bp, first_blk,
+					      last_blk, last_half_cycle);
+		if (error)
+			return error;
+	}
+
+	xlog_put_bp(bp);
+	return 0;
+} /* xlog_print_find_oldest */
+
+
+void
+xlog_recover_print_data(
+	xfs_caddr_t 	p, 
+	int 		len)
+{
+	if (print_data) {
+		uint *dp  = (uint *)p;
+		int  nums = len >> 2;
+		int  j = 0;
+
+		while (j < nums) {
+			if ((j % 8) == 0)
+				printf("%2x ", j);
+			printf("%8x ", *dp);
+			dp++;
+			j++;
+			if ((j % 8) == 0)
+				printf("\n");
+		}
+		printf("\n");
+	}
+} /* xlog_recover_print_data */
+
+
+STATIC void
+xlog_recover_print_buffer(
+	xlog_recover_item_t *item)
+{
+	xfs_agi_t		*agi;
+	xfs_agf_t		*agf;
+	xfs_buf_log_format_v1_t	*old_f;
+	xfs_buf_log_format_t	*f;
+	xfs_caddr_t		p;
+	int			len, num, i;
+	xfs_daddr_t		blkno;
+	xfs_disk_dquot_t	*ddq;
+
+	f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
+	old_f = (xfs_buf_log_format_v1_t *)f;
+	len = item->ri_buf[0].i_len;
+	printf("	");
+	switch (f->blf_type)  {
+	    case XFS_LI_BUF: {
+		printf("BUF:  ");
+		break;
+	    }
+	    case XFS_LI_6_1_BUF: {
+		printf("6.1 BUF:  ");
+		break;
+	    }
+	    case XFS_LI_5_3_BUF: {
+		printf("5.3 BUF:  ");
+		break;
+	    }
+	} 
+	if (f->blf_type == XFS_LI_BUF) {
+		printf("#regs:%d   start blkno:0x%Lx   len:%d   bmap size:%d\n",
+		       f->blf_size, f->blf_blkno, f->blf_len, f->blf_map_size);
+		blkno = (xfs_daddr_t)f->blf_blkno;
+	} else {
+		printf("#regs:%d   start blkno:0x%x   len:%d   bmap size:%d\n",
+		       old_f->blf_size, old_f->blf_blkno, old_f->blf_len,
+		       old_f->blf_map_size);
+		blkno = (xfs_daddr_t)old_f->blf_blkno;
+	}
+	num = f->blf_size-1;
+	i = 1;
+	while (num-- > 0) {
+		p = item->ri_buf[i].i_addr;
+		len = item->ri_buf[i].i_len;
+		i++;
+		if (blkno == 0) { /* super block */
+			printf("	SUPER Block Buffer:\n");
+			if (!print_buffer) continue;
+			printf("		icount:%Ld  ifree:%Ld  ",
+			       INT_GET(*(long long *)(p), ARCH_CONVERT), 
+                               INT_GET(*(long long *)(p+8), ARCH_CONVERT));
+			printf("fdblks:%Ld  frext:%Ld\n",
+			       INT_GET(*(long long *)(p+16), ARCH_CONVERT),
+			       INT_GET(*(long long *)(p+24), ARCH_CONVERT));
+			printf("		sunit:%u  swidth:%u\n", 
+			       INT_GET(*(uint *)(p+56), ARCH_CONVERT),
+			       INT_GET(*(uint *)(p+60), ARCH_CONVERT));
+		} else if (INT_GET(*(uint *)p, ARCH_CONVERT) == XFS_AGI_MAGIC) {
+			agi = (xfs_agi_t *)p;
+			printf("	AGI Buffer: (XAGI)\n");
+			if (!print_buffer) continue;
+			printf("		ver:%d  ",
+				INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+			printf("seq#:%d  len:%d  cnt:%d  root:%d\n",
+				INT_GET(agi->agi_seqno, ARCH_CONVERT),
+				INT_GET(agi->agi_length, ARCH_CONVERT),
+				INT_GET(agi->agi_count, ARCH_CONVERT),
+				INT_GET(agi->agi_root, ARCH_CONVERT));
+			printf("		level:%d  free#:0x%x  newino:0x%x\n",
+				INT_GET(agi->agi_level, ARCH_CONVERT),
+				INT_GET(agi->agi_freecount, ARCH_CONVERT),
+				INT_GET(agi->agi_newino, ARCH_CONVERT));
+		} else if (INT_GET(*(uint *)p, ARCH_CONVERT) == XFS_AGF_MAGIC) {
+			agf = (xfs_agf_t *)p;
+			printf("	AGF Buffer: (XAGF)\n");
+			if (!print_buffer) continue;
+			printf("		ver:%d  seq#:%d  len:%d  \n",
+				INT_GET(agf->agf_versionnum, ARCH_CONVERT),
+				INT_GET(agf->agf_seqno, ARCH_CONVERT),
+				INT_GET(agf->agf_length, ARCH_CONVERT));
+			printf("		root BNO:%d  CNT:%d\n",
+				INT_GET(agf->agf_roots[XFS_BTNUM_BNOi],
+					ARCH_CONVERT),
+				INT_GET(agf->agf_roots[XFS_BTNUM_CNTi],
+					ARCH_CONVERT));
+			printf("		level BNO:%d  CNT:%d\n",
+				INT_GET(agf->agf_levels[XFS_BTNUM_BNOi],
+					ARCH_CONVERT),
+				INT_GET(agf->agf_levels[XFS_BTNUM_CNTi],
+					ARCH_CONVERT));
+			printf("		1st:%d  last:%d  cnt:%d  "
+				"freeblks:%d  longest:%d\n",
+				INT_GET(agf->agf_flfirst, ARCH_CONVERT),
+				INT_GET(agf->agf_fllast, ARCH_CONVERT),
+				INT_GET(agf->agf_flcount, ARCH_CONVERT),
+				INT_GET(agf->agf_freeblks, ARCH_CONVERT),
+				INT_GET(agf->agf_longest, ARCH_CONVERT));
+		} else if (*(uint *)p == XFS_DQUOT_MAGIC) {
+			ddq = (xfs_disk_dquot_t *)p;
+			printf("	DQUOT Buffer:\n");
+			if (!print_buffer) continue;
+			printf("		UIDs 0x%x-0x%x\n", 
+			       INT_GET(ddq->d_id, ARCH_CONVERT),
+			       INT_GET(ddq->d_id, ARCH_CONVERT) +
+			       (BBTOB(f->blf_len) / sizeof(xfs_dqblk_t)) - 1);
+		} else {
+			printf("	BUF DATA\n");
+			if (!print_buffer) continue;
+			xlog_recover_print_data(p, len);
+		}
+	}
+} /* xlog_recover_print_buffer */
+
+STATIC void
+xlog_recover_print_quotaoff(
+	xlog_recover_item_t *item)
+{
+	xfs_qoff_logformat_t *qoff_f;
+	char str[20];
+
+	qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr;
+	ASSERT(qoff_f);
+	if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 
+		strcpy(str, "USER QUOTA");
+	if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
+		strcat(str, "PROJ QUOTA");
+	printf("\tQUOTAOFF: #regs:%d   type:%s\n",
+	       qoff_f->qf_size, str);
+}
+
+
+STATIC void
+xlog_recover_print_dquot(
+	xlog_recover_item_t *item)
+{
+	xfs_dq_logformat_t 	*f;
+	xfs_disk_dquot_t	*d;
+
+	f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr;
+	ASSERT(f);
+	ASSERT(f->qlf_len == 1);
+	d = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
+	printf("\tDQUOT: #regs:%d  blkno:%Ld  boffset:%u id: %d\n",
+	       f->qlf_size, f->qlf_blkno, f->qlf_boffset, f->qlf_id);
+	if (!print_quota)
+		return;
+	printf("\t\tmagic 0x%x\tversion 0x%x\tID 0x%x (%d)\t\n",
+	       INT_GET(d->d_magic, ARCH_CONVERT),
+	       INT_GET(d->d_version, ARCH_CONVERT),
+	       INT_GET(d->d_id, ARCH_CONVERT),
+	       INT_GET(d->d_id, ARCH_CONVERT));
+	printf("\t\tblk_hard 0x%x\tblk_soft 0x%x\tino_hard 0x%x"
+	       "\tino_soft 0x%x\n",
+	       (int)INT_GET(d->d_blk_hardlimit, ARCH_CONVERT),
+	       (int)INT_GET(d->d_blk_softlimit, ARCH_CONVERT),
+	       (int)INT_GET(d->d_ino_hardlimit, ARCH_CONVERT),
+	       (int)INT_GET(d->d_ino_softlimit, ARCH_CONVERT));
+	printf("\t\tbcount 0x%x (%d) icount 0x%x (%d)\n",
+	       (int)INT_GET(d->d_bcount, ARCH_CONVERT),
+	       (int)INT_GET(d->d_bcount, ARCH_CONVERT),
+	       (int)INT_GET(d->d_icount, ARCH_CONVERT),
+	       (int)INT_GET(d->d_icount, ARCH_CONVERT));
+	printf("\t\tbtimer 0x%x itimer 0x%x \n",
+	       (int)INT_GET(d->d_btimer, ARCH_CONVERT),
+	       (int)INT_GET(d->d_itimer, ARCH_CONVERT));
+}
+
+STATIC void
+xlog_recover_print_inode_core(
+	xfs_dinode_core_t *di)
+{
+	printf("	CORE inode:\n");
+	if (!print_inode)
+		return;
+	printf("		magic:%c%c  mode:0x%x  ver:%d  format:%d  "
+	     "onlink:%d\n",
+               (di->di_magic>>8) & 0xff, di->di_magic & 0xff, 
+	       di->di_mode, di->di_version, di->di_format, di->di_onlink);
+	printf("		uid:%d  gid:%d  nlink:%d projid:%d\n",
+	       di->di_uid, di->di_gid, di->di_nlink, (uint)di->di_projid);
+	printf("		atime:%d  mtime:%d  ctime:%d\n",
+	       di->di_atime.t_sec, di->di_mtime.t_sec, di->di_ctime.t_sec);
+	printf("		size:0x%Lx  nblks:0x%Lx  exsize:%d  nextents:%d"
+	       "  anextents:%d\n",
+	       di->di_size, di->di_nblocks, di->di_extsize, di->di_nextents,
+	       (int)di->di_anextents);
+	printf("		forkoff:%d  dmevmask:0x%x  dmstate:%d  flags:0x%x  "
+	     "gen:%d\n",
+	       (int)di->di_forkoff, di->di_dmevmask, (int)di->di_dmstate,
+	       (int)di->di_flags, di->di_gen);
+} /* xlog_recover_print_inode_core */
+
+
+STATIC void
+xlog_recover_print_inode(
+	xlog_recover_item_t *item)
+{
+	xfs_inode_log_format_t	*f;
+	int			attr_index;
+	int			hasdata;
+	int			hasattr;
+
+	f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
+	ASSERT(item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t));
+	printf("	INODE: #regs:%d   ino:0x%Lx  flags:0x%x   dsize:%d\n",
+	       f->ilf_size, f->ilf_ino, f->ilf_fields, f->ilf_dsize);
+
+	/* core inode comes 2nd */
+	ASSERT(item->ri_buf[1].i_len == sizeof(xfs_dinode_core_t));
+	xlog_recover_print_inode_core((xfs_dinode_core_t *)
+				      item->ri_buf[1].i_addr);
+
+	hasdata = (f->ilf_fields & XFS_ILOG_DFORK) != 0;
+	hasattr = (f->ilf_fields & XFS_ILOG_AFORK) != 0;
+	/* does anything come next */
+	switch (f->ilf_fields & (XFS_ILOG_DFORK | XFS_ILOG_DEV | XFS_ILOG_UUID)) {
+	      case XFS_ILOG_DEXT: {
+		      ASSERT(f->ilf_size == 3 + hasattr);
+		      printf("		DATA FORK EXTENTS inode data:\n");
+		      if (print_inode && print_data) {
+			      xlog_recover_print_data(item->ri_buf[2].i_addr,
+						      item->ri_buf[2].i_len);
+		      }
+		      break;
+	      }
+	      case XFS_ILOG_DBROOT: {
+		      ASSERT(f->ilf_size == 3 + hasattr);
+		      printf("		DATA FORK BTREE inode data:\n");
+		      if (print_inode && print_data) {
+			      xlog_recover_print_data(item->ri_buf[2].i_addr,
+						      item->ri_buf[2].i_len);
+		      }
+		      break;
+	      }
+	      case XFS_ILOG_DDATA: {
+		      ASSERT(f->ilf_size == 3 + hasattr);
+		      printf("		DATA FORK LOCAL inode data:\n");
+		      if (print_inode && print_data) {
+			      xlog_recover_print_data(item->ri_buf[2].i_addr,
+						      item->ri_buf[2].i_len);
+		      }
+		      break;
+	      }
+	      case XFS_ILOG_DEV: {
+		      ASSERT(f->ilf_size == 2 + hasattr);
+		      printf("		DEV inode: no extra region\n");
+		      break;
+	      }
+	      case XFS_ILOG_UUID: {
+		      ASSERT(f->ilf_size == 2 + hasattr);
+		      printf("		UUID inode: no extra region\n");
+		      break;
+	      }
+
+
+	      case 0: {
+		      ASSERT(f->ilf_size == 2 + hasattr);
+		      break;
+	      }
+	      default: {
+		      xlog_panic("xlog_print_trans_inode: illegal inode type");
+	      }
+	}
+
+	if (hasattr) {
+		attr_index = 2 + hasdata;
+		switch (f->ilf_fields & XFS_ILOG_AFORK) {
+		      case XFS_ILOG_AEXT: {
+			      ASSERT(f->ilf_size == 3 + hasdata);
+			      printf("		ATTR FORK EXTENTS inode data:\n");
+			      if (print_inode && print_data) {
+				      xlog_recover_print_data(
+						item->ri_buf[attr_index].i_addr,
+						item->ri_buf[attr_index].i_len);
+			      }
+			      break;
+		      }
+		      case XFS_ILOG_ABROOT: {
+			      ASSERT(f->ilf_size == 3 + hasdata);
+			      printf("		ATTR FORK BTREE inode data:\n");
+			      if (print_inode && print_data) {
+				      xlog_recover_print_data(
+						item->ri_buf[attr_index].i_addr,
+						item->ri_buf[attr_index].i_len);
+			      }
+			      break;
+		      }
+		      case XFS_ILOG_ADATA: {
+			      ASSERT(f->ilf_size == 3 + hasdata);
+			      printf("		ATTR FORK LOCAL inode data:\n");
+			      if (print_inode && print_data) {
+				      xlog_recover_print_data(
+						item->ri_buf[attr_index].i_addr,
+						item->ri_buf[attr_index].i_len);
+			      }
+			      break;
+		      }
+		      default: {
+			      xlog_panic("xlog_print_trans_inode: "
+					 "illegal inode log flag");
+		      }
+		}
+	}
+    
+} /* xlog_recover_print_inode */
+
+
+STATIC void
+xlog_recover_print_efd(
+	xlog_recover_item_t *item)
+{
+	xfs_efd_log_format_t *f;
+	xfs_extent_t	 *ex;
+	int			 i;
+
+	f = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr;
+	/*
+	 * An xfs_efd_log_format structure contains a variable length array
+	 * as the last field.  Each element is of size xfs_extent_t.
+	 */
+	ASSERT(item->ri_buf[0].i_len == 
+	       sizeof(xfs_efd_log_format_t) + sizeof(xfs_extent_t) *
+	       (f->efd_nextents-1));
+	printf("	EFD:  #regs: %d    num_extents: %d  id: 0x%Lx\n",
+	       f->efd_size, f->efd_nextents, f->efd_efi_id);
+	ex = f->efd_extents;
+	printf("	");
+	for (i=0; i < f->efd_size; i++) {
+		printf("(s: 0x%Lx, l: %d) ", ex->ext_start, ex->ext_len);
+		if (i % 4 == 3)
+			printf("\n");
+		ex++;
+	}
+	if (i % 4 != 0) printf("\n");
+	return;
+} /* xlog_recover_print_efd */
+
+
+STATIC void
+xlog_recover_print_efi(
+	xlog_recover_item_t *item)
+{
+	xfs_efi_log_format_t *f;
+	xfs_extent_t	 *ex;
+	int			 i;
+    
+	f = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr;
+	/*
+	 * An xfs_efi_log_format structure contains a variable length array
+	 * as the last field.  Each element is of size xfs_extent_t.
+	 */
+	ASSERT(item->ri_buf[0].i_len == 
+	       sizeof(xfs_efi_log_format_t) + sizeof(xfs_extent_t) *
+	       (f->efi_nextents-1));
+	
+	printf("	EFI:  #regs:%d    num_extents:%d  id:0x%Lx\n",
+	       f->efi_size, f->efi_nextents, f->efi_id);
+	ex = f->efi_extents;
+	printf("	");
+	for (i=0; i< f->efi_nextents; i++) {
+		printf("(s: 0x%Lx, l: %d) ", ex->ext_start, ex->ext_len);
+		if (i % 4 == 3) printf("\n");
+		ex++;
+	}
+	if (i % 4 != 0) printf("\n");
+	return;
+} /* xlog_recover_print_efi */
+
+void
+xlog_recover_print_logitem(
+	xlog_recover_item_t *item)
+{
+	switch (ITEM_TYPE(item)) {
+	      case XFS_LI_BUF:
+	      case XFS_LI_6_1_BUF:
+	      case XFS_LI_5_3_BUF: {
+		      xlog_recover_print_buffer(item);
+		      break;
+	      }
+	      case XFS_LI_INODE:
+	      case XFS_LI_6_1_INODE:
+	      case XFS_LI_5_3_INODE: {
+		      xlog_recover_print_inode(item);
+		      break;
+	      }
+	      case XFS_LI_EFD: {
+		      xlog_recover_print_efd(item);
+		      break;
+	      }
+	      case XFS_LI_EFI: {
+		      xlog_recover_print_efi(item);
+		      break;
+	      }
+	      case XFS_LI_DQUOT: {
+		      xlog_recover_print_dquot(item);
+		      break;
+	      }
+	      case XFS_LI_QUOTAOFF: {
+		      xlog_recover_print_quotaoff(item);
+		      break;
+	      }
+	      default: {
+		      printf("xlog_recover_print_logitem: illegal type\n");
+		      break;
+	      }
+	}
+} /* xlog_recover_print_logitem */
+
+void
+xlog_recover_print_item(xlog_recover_item_t *item)
+{
+	int i;
+
+	switch (ITEM_TYPE(item)) {
+	    case XFS_LI_BUF: {
+		printf("BUF");
+		break;
+	    }
+	    case XFS_LI_INODE: {
+		printf("INO");
+		break;
+	    }
+	    case XFS_LI_EFD: {
+		printf("EFD");
+		break;
+	    }
+	    case XFS_LI_EFI: {
+		printf("EFI");
+		break;
+	    }
+	    case XFS_LI_6_1_BUF:  {
+		printf("6.1 BUF");
+		break;
+	    }
+	    case XFS_LI_5_3_BUF: {
+		printf("5.3 BUF");
+		break;
+	    }
+	    case XFS_LI_6_1_INODE: {
+		printf("6.1 INO");
+		break;
+	    }
+	    case XFS_LI_5_3_INODE: {
+		printf("5.3 INO");
+		break;
+	    }
+	    case XFS_LI_DQUOT: {
+		printf("DQ ");
+		break;
+	    }
+	    case XFS_LI_QUOTAOFF: {
+		printf("QOFF");
+		break;
+	    } 
+	    default: {
+		cmn_err(CE_PANIC, "xlog_recover_print_item: illegal type");
+		break;
+	    }
+	}
+
+/*	type isn't filled in yet
+	printf("ITEM: type: %d cnt: %d total: %d ",
+	       item->ri_type, item->ri_cnt, item->ri_total);
+*/
+	printf(": cnt:%d total:%d ", item->ri_cnt, item->ri_total);
+	for (i=0; i<item->ri_cnt; i++) {
+		printf("a:%p len:%d ",
+		       item->ri_buf[i].i_addr, item->ri_buf[i].i_len);
+	}
+	printf("\n");
+	xlog_recover_print_logitem(item);
+}	/* xlog_recover_print_item */
+
+void
+xlog_recover_print_trans(xlog_recover_t	     *trans,
+			 xlog_recover_item_t *itemq,
+			 int		     print)
+{
+	xlog_recover_item_t *first_item, *item;
+
+	if (print < 3)
+		return;
+
+        print_xlog_record_line();
+	xlog_recover_print_trans_head(trans);
+	item = first_item = itemq;
+	do {
+		xlog_recover_print_item(item);
+		item = item->ri_next;
+	} while (first_item != item);
+}	/* xlog_recover_print_trans */
diff --git a/logprint/log_print_trans.c b/logprint/log_print_trans.c
new file mode 100644
index 000000000..9b830468a
--- /dev/null
+++ b/logprint/log_print_trans.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "logprint.h"
+
+void
+xlog_recover_print_trans_head(
+        xlog_recover_t *tr)
+{
+        printf("TRANS: tid:0x%x  type:%s  #items:%d  trans:0x%x  q:%p\n",
+               tr->r_log_tid, trans_type[tr->r_theader.th_type],
+               tr->r_theader.th_num_items,
+               tr->r_theader.th_tid, tr->r_itemq);
+}       /* xlog_recover_print_trans_head */
+
+int
+xlog_recover_do_trans(xlog_t	     *log,
+		      xlog_recover_t *trans,
+		      int	     pass)
+{
+	xlog_recover_print_trans(trans, trans->r_itemq, 3);
+	return 0;
+}	/* xlog_recover_do_trans */
+
+static int print_record_header=0;
+
+void
+xfs_log_print_trans(xlog_t      *log,
+		    int		print_block_start)
+{
+	xfs_daddr_t	head_blk, tail_blk;
+
+	if (xlog_find_tail(log, &head_blk, &tail_blk, 0))
+            exit(1);
+        
+	printf("    log tail: %lld head: %lld state: %s\n",
+                (__int64_t)tail_blk, 
+                (__int64_t)head_blk,
+                (tail_blk == head_blk)?"<CLEAN>":"<DIRTY>");
+        
+        if (print_block_start != -1) {
+	    printf("    override tail: %lld\n",
+		    (__int64_t)print_block_start);
+	    tail_blk = print_block_start;
+        }
+        printf("\n");
+        
+        print_record_header=1;
+        if (xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS1))
+            exit(1);
+
+}	/* xfs_log_print_trans */
+
+static int
+header_check_uuid(xfs_mount_t *mp, xlog_rec_header_t *head)
+{
+    char uu_log[64], uu_sb[64];
+    
+    if (!uuid_compare(mp->m_sb.sb_uuid, head->h_fs_uuid)) return 0;
+
+    uuid_unparse(mp->m_sb.sb_uuid, uu_sb);
+    uuid_unparse(head->h_fs_uuid, uu_log);
+
+    printf("* ERROR: mismatched uuid in log\n"
+           "*            SB : %s\n*            log: %s\n",
+            uu_sb, uu_log);
+    
+    return 1;
+}
+
+int
+xlog_header_check_recover(xfs_mount_t *mp, xlog_rec_header_t *head)
+{
+    if (print_record_header) 
+        printf("\nLOG REC AT LSN cycle %d block %d (0x%x, 0x%x)\n",
+	       CYCLE_LSN(head->h_lsn, ARCH_CONVERT), 
+               BLOCK_LSN(head->h_lsn, ARCH_CONVERT),
+	       CYCLE_LSN(head->h_lsn, ARCH_CONVERT), 
+               BLOCK_LSN(head->h_lsn, ARCH_CONVERT));
+    
+    if (INT_GET(head->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) {
+        
+        printf("* ERROR: bad magic number in log header: 0x%x\n",
+                INT_GET(head->h_magicno, ARCH_CONVERT));
+        
+    } else if (header_check_uuid(mp, head)) {
+        
+        /* failed - fall through */
+        
+    } else if (INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT) {
+        
+	printf("* ERROR: log format incompatible (log=%d, ours=%d)\n",
+                INT_GET(head->h_fmt, ARCH_CONVERT), XLOG_FMT);
+        
+    } else {
+        /* everything is ok */
+        return 0;
+    }
+    
+    /* bail out now or just carry on regardless */
+    if (print_exit)
+        xlog_exit("Bad log");
+ 
+    return 0;   
+}
+
+int
+xlog_header_check_mount(xfs_mount_t *mp, xlog_rec_header_t *head)
+{
+    if (uuid_is_null(head->h_fs_uuid)) return 0;
+    if (header_check_uuid(mp, head)) {
+        /* bail out now or just carry on regardless */
+        if (print_exit)
+            xlog_exit("Bad log");
+    }
+    return 0;
+}
diff --git a/logprint/logprint.c b/logprint/logprint.c
new file mode 100644
index 000000000..16a652d56
--- /dev/null
+++ b/logprint/logprint.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "logprint.h"
+#include <errno.h>
+#include <fcntl.h>
+
+int	print_data;
+int	print_only_data;
+int	print_inode;
+int	print_quota;
+int	print_buffer;
+int	print_transactions;
+int	print_overwrite;
+int     print_no_data;
+int     print_no_print;
+int     print_exit = 1; /* -e is now default. specify -c to override */
+
+libxfs_init_t	x;
+xfs_mount_t	mp;
+
+void
+usage(void)
+{
+	fprintf(stderr, "Usage: %s [options...] <device>\n\n\
+Options:\n\
+    -c	            try to continue if error found in log\n\
+    -l <device>     filename of external log\n\
+    -n	            don't try and interpret log data\n\
+    -o	            print buffer data in hex\n\
+    -s <start blk>  block # to start printing\n\
+    -v              print \"overwrite\" data\n\
+    -t	            print out transactional view\n\
+        -b          in transactional view, extract buffer info\n\
+        -i          in transactional view, extract inode info\n\
+        -q          in transactional view, extract quota info\n\
+    -D              print only data; no decoding\n\
+    -V              print version information\n", 
+        progname);
+	exit(1);
+}
+
+int
+logstat(libxfs_init_t *x)
+{
+	int		fd;
+	char		buf[BBSIZE];
+	xfs_sb_t	*sb;
+
+	/* On Linux we always read the superblock of the
+	 * filesystem. We need this to get the length of the
+	 * log. Otherwise we end up seeking forever. -- mkp
+	 */
+	if ((fd = open(x->dname, O_RDONLY)) == -1) {
+		fprintf(stderr, "    Can't open device %s: %s\n",
+			x->dname, strerror(errno));
+		exit(1);
+	}
+	lseek64(fd, 0, SEEK_SET);
+	if (read(fd, buf, sizeof(buf)) != sizeof(buf)) {
+		fprintf(stderr, "    read of XFS superblock failed\n");
+		exit(1);
+	} 
+        close (fd);
+
+	/* 
+	 * Conjure up a mount structure 
+	 */
+	libxfs_xlate_sb(buf, &(mp.m_sb), 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+	sb = &(mp.m_sb);
+	mp.m_blkbb_log = sb->sb_blocklog - BBSHIFT;
+
+	x->logBBsize = XFS_FSB_TO_BB(&mp, sb->sb_logblocks);
+	x->logBBstart = XFS_FSB_TO_DADDR(&mp, sb->sb_logstart);
+
+	if (!x->logname && sb->sb_logstart == 0) {
+		fprintf(stderr, "    external log device not specified\n\n");
+                usage();
+                /*NOTREACHED*/
+	}	    
+
+	if (x->logname && *x->logname) {    /* External log */
+		if ((fd = open(x->logname, O_RDONLY)) == -1) {
+			fprintf(stderr, "Can't open file %s: %s\n",
+				x->logname, strerror(errno));
+			exit(1);
+		}
+                close(fd);
+	} else {                            /* Internal log */
+		x->logdev = x->ddev;
+	}
+
+	return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+	int		print_start = -1;
+	int		c;
+        int             logfd;
+        xlog_t	        log = {0};
+
+	progname = basename(argv[0]);
+	while ((c = getopt(argc, argv, "bel:iqnors:tDVvc")) != EOF) {
+		switch (c) {
+			case 'D': {
+				print_only_data++;
+				print_data++;
+				break;
+			}
+			case 'b': {
+				print_buffer++;
+				break;
+			}
+			case 'l': {
+				x.logname = optarg;
+				x.lisfile = 1;
+				break;
+			}
+			case 'c': { 
+                            /* default is to stop on error. 
+                             * -c turns this off.
+                             */
+				print_exit=0;
+				break;
+			}
+			case 'e': { 
+                            /* -e is now default
+                             */
+				print_exit++;
+				break;
+			}
+			case 'i': {
+				print_inode++;
+				break;
+			}
+			case 'q': {
+				print_quota++;
+				break;
+			}
+			case 'n': {
+				print_no_data++;
+				break;
+			}
+			case 'o': {
+				print_data++;
+				break;
+			}
+			case 's': {
+				print_start = atoi(optarg);
+				break;
+			}
+			case 't': {
+				print_transactions++;
+				break;
+			}
+			case 'V': {
+				printf("%s version %s\n", progname, VERSION);
+				break;
+                        }
+                        case 'v': {
+                                print_overwrite++;
+                                break;
+			}
+			case '?': {
+				usage();
+			}
+	        }
+	}
+
+	if (argc - optind != 1)
+		usage();
+
+	x.dname = argv[optind];
+
+	if (x.dname == NULL)
+		usage();
+
+	x.notvolok = 1;
+	x.isreadonly = LIBXFS_ISINACTIVE;
+	x.notvolmsg = "You should never see this message.\n";
+
+        printf("xfs_logprint:\n");
+	if (!libxfs_init(&x))
+		exit(1);
+
+	logstat(&x);
+
+        logfd=(x.logfd<0)?(x.dfd):(x.logfd);
+        
+        printf("    data device: 0x%Lx\n", x.ddev);
+        
+        if (x.logname) {
+                printf("    log file: \"%s\" ", x.logname);
+        } else {
+                printf("    log device: 0x%Lx ", x.logdev);
+        }
+
+        printf("daddr: %Ld length: %Ld\n\n",
+                (__int64_t)x.logBBstart, (__int64_t)x.logBBsize);
+        
+        ASSERT(x.logBBstart <= INT_MAX);
+
+        /* init log structure */
+	log.l_dev	   = x.logdev;
+	log.l_logsize     = BBTOB(x.logBBsize);
+	log.l_logBBstart  = x.logBBstart;
+	log.l_logBBsize   = x.logBBsize;
+        log.l_mp          = &mp;
+ 
+	if (print_transactions)
+		xfs_log_print_trans(&log, print_start);
+	else
+		xfs_log_print(&log, logfd, print_start);
+        
+	exit(0);
+}
diff --git a/logprint/logprint.h b/logprint/logprint.h
new file mode 100644
index 000000000..17eb3ba46
--- /dev/null
+++ b/logprint/logprint.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef XFS_LOGPRINT_H
+#define XFS_LOGPRINT_H
+
+#include <libxfs.h>
+#include <string.h>
+#include <errno.h>
+
+/*
+ * define the userlevel xlog_t to be the subset of the kernel's
+ * xlog_t that we actually need to get our work done, avoiding
+ * the need to define any exotic kernel types in userland.
+ */
+typedef struct log {
+	xfs_lsn_t	l_tail_lsn;     /* lsn of 1st LR w/ unflush buffers */
+	xfs_lsn_t	l_last_sync_lsn;/* lsn of last LR on disk */
+	xfs_mount_t	*l_mp;	        /* mount point */
+	dev_t		l_dev;	        /* dev_t of log */
+	xfs_daddr_t	l_logBBstart;   /* start block of log */
+	int		l_logsize;      /* size of log in bytes */
+	int		l_logBBsize;    /* size of log in 512 byte chunks */
+	int		l_roundoff;	/* round off error of all iclogs */
+	int		l_curr_cycle;   /* Cycle number of log writes */
+	int		l_prev_cycle;   /* Cycle # b4 last block increment */
+	int		l_curr_block;   /* current logical block of log */
+	int		l_prev_block;   /* previous logical block of log */
+	int		l_iclog_size;	 /* size of log in bytes */
+	int		l_iclog_size_log;/* log power size of log */
+	int		l_iclog_bufs;	 /* number of iclog buffers */
+	int		l_grant_reserve_cycle;	/* */
+	int		l_grant_reserve_bytes;	/* */
+	int		l_grant_write_cycle;	/* */
+	int		l_grant_write_bytes;	/* */
+} xlog_t;
+
+#include <xfs_log_recover.h>
+#include <xfs_buf_item.h>
+#include <xfs_inode_item.h>
+#include <xfs_extfree_item.h>
+#include <xfs_dquot_item.h>
+
+
+/*
+ * macros mapping kernel code to user code
+ */
+#define STATIC			static
+#define EFSCORRUPTED            EIO
+#define XFS_ERROR(e)		(e)
+
+#define xlog_warn(fmt,args...) \
+	( fprintf(stderr,fmt,## args), fputc('\n', stderr) )
+#define cmn_err(sev,fmt,args...) \
+        xlog_warn(fmt,## args)
+#define xlog_exit(fmt,args...) \
+	( xlog_warn(fmt,## args), exit(1) )
+#define xlog_panic(fmt,args...) \
+	xlog_exit(fmt,## args)
+
+#define xlog_get_bp(nbblks, mp)	libxfs_getbuf(x.logdev, 0, (nbblks))
+#define xlog_put_bp(bp)		libxfs_putbuf(bp)
+#define xlog_bread(log,blkno,nbblks,bp)	\
+	(libxfs_readbufr(x.logdev,	\
+			(log)->l_logBBstart+(blkno), bp, (nbblks), 1), 0)
+                         
+#define kmem_zalloc(size, foo)			calloc(size,1)
+#define kmem_free(ptr, foo)			free(ptr)
+#define kmem_realloc(ptr, len, old, foo)	realloc(ptr, len)
+
+/* command line flags */
+extern int	print_data;
+extern int	print_only_data;
+extern int	print_inode;
+extern int	print_quota;
+extern int	print_buffer;
+extern int	print_transactions;
+extern int	print_overwrite;
+
+extern int	print_exit;
+extern int	print_no_data;
+extern int	print_no_print;
+
+/* exports */
+
+extern char *trans_type[];
+
+/* libxfs parameters */
+extern libxfs_init_t	x;
+
+extern void xfs_log_print_trans(xlog_t          *log,
+				int		print_block_start);
+
+extern void xfs_log_print(      xlog_t          *log,
+                                int             fd,
+				int		print_block_start);
+
+extern int  xlog_find_zeroed(xlog_t *log, xfs_daddr_t *blk_no);
+extern int  xlog_find_cycle_start(xlog_t *log, xfs_buf_t *bp,
+		xfs_daddr_t first_blk, xfs_daddr_t *last_blk, uint cycle);
+extern int  xlog_find_tail(xlog_t *log, xfs_daddr_t *head_blk,
+		xfs_daddr_t *tail_blk, int readonly);
+
+extern int  xlog_test_footer(xlog_t *log);
+extern int  xlog_recover(xlog_t *log, int readonly);
+extern void xlog_recover_print_data(xfs_caddr_t p, int len);
+extern void xlog_recover_print_logitem(xlog_recover_item_t *item);
+extern void xlog_recover_print_trans_head(xlog_recover_t *tr);
+extern int  xlog_print_find_oldest(xlog_t *log, xfs_daddr_t *last_blk);
+
+extern void print_xlog_op_line(void);
+extern void print_xlog_record_line(void);
+extern void print_stars(void);
+
+/* for transactional view */
+extern void xlog_recover_print_trans_head(xlog_recover_t *tr);
+
+extern void xlog_recover_print_trans(	xlog_recover_t		*trans,
+					xlog_recover_item_t	*itemq,
+					int			print);
+
+extern int  xlog_do_recovery_pass(	xlog_t		*log,
+					xfs_daddr_t	head_blk,
+					xfs_daddr_t	tail_blk,
+					int		pass);
+extern int  xlog_recover_do_trans(	xlog_t		*log,
+					xlog_recover_t	*trans,
+					int		pass);
+extern int  xlog_header_check_recover(  xfs_mount_t         *mp, 
+                                        xlog_rec_header_t   *head);
+extern int  xlog_header_check_mount(    xfs_mount_t         *mp, 
+                                        xlog_rec_header_t   *head);
+
+#endif	/* XFS_LOGPRINT_H */
diff --git a/man/Makefile b/man/Makefile
new file mode 100644
index 000000000..139d5ae85
--- /dev/null
+++ b/man/Makefile
@@ -0,0 +1,41 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+SUBDIRS = man1 man2 man3 man5 man8
+
+default install : $(SUBDIRS)
+	$(SUBDIRS_MAKERULE)
+
+include $(BUILDRULES)
diff --git a/man/man5/Makefile b/man/man5/Makefile
new file mode 100644
index 000000000..8602606f0
--- /dev/null
+++ b/man/man5/Makefile
@@ -0,0 +1,49 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/include/builddefs
+
+MAN_SECTION	= 5
+
+MAN_PAGES	= $(shell echo *.$(MAN_SECTION))
+MAN_DEST	= $(XFS_CMDS_MAN_DIR)/man$(MAN_SECTION)
+LSRCFILES	= $(MAN_PAGES)
+
+
+default : $(MAN_PAGES)
+
+include $(BUILDRULES)
+
+install : default
+	$(INSTALL) -m 755 -d $(MAN_DEST)
+	$(INSTALL_MAN)
diff --git a/man/man5/xfs.5 b/man/man5/xfs.5
new file mode 100644
index 000000000..a358f4aaf
--- /dev/null
+++ b/man/man5/xfs.5
@@ -0,0 +1,114 @@
+.TH xfs 5
+.SH NAME
+xfs \- layout of the XFS filesystem
+.SH DESCRIPTION
+An XFS filesystem can reside on a regular disk partition or on a
+logical volume (see
+.IR lvm (8)).
+An XFS filesystem has up to three parts:
+a data section, a log section, and a real-time section.
+For disk partition filesystems,
+the real-time section is absent, and
+the log area is contained within the data section.
+For logical volume filesystems,
+the real-time section is optional,
+and the log section can be separate from the data section
+or contained within it.
+The filesystem sections are divided into a certain number of
+.IR blocks ,
+whose size is specified at
+.IR mkfs (8)
+time with the
+.B \-b
+option.
+.PP
+The data section contains all the filesystem metadata
+(inodes, directories, indirect blocks)
+as well as the user file data for ordinary (non-real-time) files
+and the log area if the log is
+.I internal
+to the data section.
+The data section is divided into a number of
+\f2allocation groups\f1.
+The number and size of the allocation groups are chosen by
+.I mkfs
+so that there is normally a small number of equal-sized groups.
+The number of allocation groups controls the amount of parallelism
+available in file and block allocation.
+It should be increased from
+the default if there is sufficient memory and a lot of allocation
+activity.
+The number of allocation groups should not be set very high,
+since this can cause large amounts of CPU time to be used by
+the filesystem, especially when the filesystem is nearly full.
+More allocation groups are added (of the original size) when
+.IR xfs_growfs (8)
+is run.
+.PP
+The log section (or area, if it is internal to the data section)
+is used to store changes to filesystem metadata while the
+filesystem is running until those changes are made to the data
+section.
+It is written sequentially during normal operation and read only
+during mount.
+When mounting a filesystem after a crash, the log
+is read to complete operations that were
+in progress at the time of the crash.
+.PP
+The real-time section is used to store the data of real-time files.
+These files had an attribute bit set through
+.IR fcntl (2)
+after file creation, before any data was written to the file.
+The real-time section is divided into a number of
+.I extents
+of fixed size (specified at
+.I mkfs
+time).
+Each file in the real-time section has an extent size that
+is a multiple of the real-time section extent size.
+.PP
+Each allocation group contains several data structures.
+The first sector contains the superblock.
+For allocation groups after the first,
+the superblock is just a copy and is not updated after
+.IR mkfs .
+The next three sectors contain information for block and inode
+allocation within the allocation group.
+Also contained within each allocation group are data structures
+to locate free blocks and inodes;
+these are located through the header structures.
+.PP
+Each XFS filesystem is labeled with a unique
+universal identifier (UUID).
+The UUID is stored in every allocation group header and
+is used to help distinguish one XFS filesystem from another,
+therefore you should avoid using
+.I dd
+or other block-by-block copying programs to copy XFS filesystems.
+If two XFS filesystems on the same machine have the UUID,
+.I xfsdump
+may become confused when doing incremental and resumed dumps.
+(See
+.IR xfsdump (8)
+for more details.)
+.I xfs_copy
+or
+.IR xfsdump / xfsrestore
+are recommended for making copies of XFS filesystems.
+.PP
+All these data structures are subject to change, and the
+headers that specify their layout on disk are not provided.
+.SH SEE ALSO
+fs(5),
+mkfs.xfs(8),
+xfs_bmap(8),
+xfs_check(8),
+xfs_copy(8),
+xfs_estimate(8),
+xfs_growfs(8),
+xfs_logprint(8),
+xfs_repair(8),
+xfsdump(8),
+xfsrestore(8),
+fcntl(2),
+lvm(8).
diff --git a/man/man8/Makefile b/man/man8/Makefile
new file mode 100644
index 000000000..9ccd9c446
--- /dev/null
+++ b/man/man8/Makefile
@@ -0,0 +1,49 @@
+#! gmake
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/include/builddefs
+
+MAN_SECTION	= 8
+
+MAN_PAGES	= $(shell echo *.$(MAN_SECTION))
+MAN_DEST	= $(XFS_CMDS_MAN_DIR)/man$(MAN_SECTION)
+LSRCFILES	= $(MAN_PAGES)
+
+default : $(MAN_PAGES)
+
+include $(BUILDRULES)
+
+install : default
+	$(INSTALL) -m 755 -d $(MAN_DEST)
+	$(INSTALL_MAN)
diff --git a/man/man8/fsck.xfs.8 b/man/man8/fsck.xfs.8
new file mode 100644
index 000000000..128691bee
--- /dev/null
+++ b/man/man8/fsck.xfs.8
@@ -0,0 +1,23 @@
+.TH fsck.xfs 8
+.SH NAME
+fsck.xfs \- do nothing, successfully
+.SH SYNOPSIS
+.nf
+\f3fsck.xfs\f1 [ \f3...\f1]
+.fi
+.SH DESCRIPTION
+.I fsck.xfs
+is called by the generic Linux
+.IR fsck (8)
+program at startup to check and repair an XFS filesystem.
+XFS is a journalled filesystem and performs recovery at
+.IR mount (8)
+time if necessary, so
+.I fsck.xfs
+simply exits with a zero exit status.
+.SH FILES
+.IR /etc/fstab .
+.SH SEE ALSO
+fsck(8),
+fstab(5),
+xfs(5).
diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
new file mode 100644
index 000000000..7366bcf6d
--- /dev/null
+++ b/man/man8/mkfs.xfs.8
@@ -0,0 +1,485 @@
+.TH mkfs.xfs 8
+.SH NAME
+mkfs.xfs \- construct an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3mkfs.xfs\f1 [ \f3\-b\f1 subopt=value ] \c
+[ \f3\-d\f1 subopt[=value] ] [ \f3\-i\f1 subopt=value ]
+	[ \f3\-l\f1 subopt[=value] ] \c
+[ \f3\-n\f1 subopt[=value] ] \c
+[ \f3\-p\f1 protofile ] [ \f3\-q\f1 ] 
+	[ \f3\-r\f1 subopt[=value] ] [ \f3\-C\f1 ] device
+.fi
+.SH DESCRIPTION
+.I mkfs.xfs
+constructs an XFS filesystem by writing on a special
+file using the values found in the arguments of the command line.
+It is invoked automatically by \f2mkfs\f1(8) when \f2mkfs\f1 is
+given the \f3\-t xfs\f1 option.
+.PP
+In its simplest (and most commonly used form), the size of the
+filesystem is determined from the disk driver.  As an example, to make
+a filesystem with an internal log on the first partition on the first
+SCSI disk, use:
+.PP
+.nf
+	mkfs.xfs /dev/sda1
+.fi
+.PP
+The metadata log can be placed on another device to reduce the number
+of disk seeks.  To create a filesystem on the first partition on the
+first SCSI disk with a 10000 block log located on the first partition
+on the second SCSI disk, use:
+.PP
+.nf
+	mkfs.xfs -l logdev=/dev/sdb1,size=10000b /dev/sda1
+.fi
+.PP
+Each of the
+.I subopt=value
+elements in the argument list above can be given as multiple comma-separated
+.I subopt=value
+suboptions if multiple suboptions apply to the same option.
+Equivalently, each main option can be given multiple times with
+different suboptions.
+For example,
+.B \-l internal,size=10000b
+and
+.B \-l internal \-l size=10000b
+are equivalent.
+.PP
+In the descriptions below, sizes are given in bytes, blocks, kilobytes,
+or megabytes.
+Sizes are treated as hexadecimal if prefixed by 0x or 0X,
+octal if prefixed by 0, or decimal otherwise.
+If suffixed with \f3b\f1 then the size is converted by multiplying it
+by the filesystem's block size.
+If suffixed with \f3k\f1 then the size is converted by multiplying it by 1024.
+If suffixed with \f3m\f1 then the size is converted by multiplying it by
+If suffixed with \f3g\f1 then the size is converted by multiplying it by
+1073741824 (1024 * 1024 * 1024).
+.TP
+.B \-b
+Block size options.
+.IP
+This option specifies the fundamental block size of the filesystem.
+The valid suboptions are:
+.BI log= value
+and
+\f3size=\f1\f2value\f1;
+only one can be supplied.
+The block size is specified either as a base two logarithm value with
+.BR log= ,
+or in bytes with
+.BR size= .
+The default value is 4096 bytes (4 KB).  The minimum value for block
+size is 512; the maximum is 65536 (64 KB).  XFS on Linux currently
+only supports 4KB blocks.
+.TP
+.B \-d
+Data section options.
+.IP
+These options specify the location, size, and other parameters of the
+data section of the filesystem.
+The valid suboptions are:
+\f3agcount=\f1\f2value\f1,
+\f3file\f1[\f3=\f1\f2value\f1],
+\f3name=\f1\f2value\f1,
+\f3size=\f1\f2value\f1,
+\f3sunit=\f1\f2value\f1,
+\f3swidth=\f1\f2value\f1,
+and
+\f3unwritten\f1[\f3=\f1\f2value\f1].
+.IP
+The
+.B agcount
+suboption is used to specify the number of allocation groups.
+The data section of the filesystem is divided into allocation groups
+to improve the performance of XFS.
+More allocation groups imply that more parallelism can be achieved
+when allocating blocks and inodes.
+The minimum allocation group size is 16 MB;
+the maximum size is just under 4 GB.
+The data section of the filesystem is divided into
+.I agcount
+allocation groups (default value 8, unless the filesystem is smaller
+than 128 MB or larger than 8 GB).
+Setting
+.I agcount
+to a very large number should be avoided, since this causes an unreasonable
+amount of CPU time to be used when the filesystem is close to full.
+.IP
+The
+.B name
+suboption can be used to specify the name of the special file containing
+the filesystem.
+In this case, the log section must be specified as
+.B internal
+(with a size, see the
+.B \-l
+option below) and there can be no real-time section.
+Note that the default log in this case is an internal log with
+at least 1000 blocks, actual size depending on the filesystem block
+size and the directory block size.
+.IP
+The
+.B file
+suboption is used to specify that the file given by the
+.B name
+suboption is a regular file.
+The suboption value is either 0 or 1,
+with 1 signifying that the file is regular.
+This suboption is used only to make a filesystem image
+(for instance, a miniroot image).
+If the value is omitted then 1 is assumed.
+.IP
+The
+.B size
+suboption is used to specify the size of the data section.
+This suboption is required if
+.B \-d file[=1]
+is given.
+Otherwise, it is only needed if the filesystem should occupy
+less space than the size of the special file.
+.IP
+The
+.B sunit
+suboption is used to specify the stripe unit for a RAID device or a
+logical volume.  The suboption value has to be specified in 512-byte
+block units.  This suboption ensures that data allocations will be
+stripe unit aligned when the current end of file is being extended and
+the file size is larger than 512KB.  Also inode allocations and the
+internal log will be stripe unit aligned.
+.IP
+The
+.B swidth
+suboption is used to specify the stripe width for a RAID device or a
+striped logical volume.
+The suboption value has to be specified in 512-byte block units.
+This suboption is required if
+.B \-d sunit
+has been specified and it has to be a multiple of the 
+.B \-d sunit 
+suboption.
+The stripe width will be the preferred iosize returned in the 
+.IR stat (2)
+system call.
+.IP
+The
+.B unwritten
+suboption is used to specify whether unwritten extents are flagged as such,
+or not.
+The suboption value is either 0 or 1, with 1 signifying that unwritten
+extent flagging should occur.
+If the suboption is omitted, unwritten extent flagging is enabled.
+If unwritten extents are flagged, filesystem write performance
+will be negatively affected for preallocated file extents, since
+extra filesystem transactions are required to convert extent flags 
+for the range of the file written.
+This suboption should be disabled if the filesystem
+needs to be used on operating system versions which do not support the
+flagging capability.
+.TP
+.B \-i
+Inode options.
+.IP
+This option specifies the inode size of the filesystem, and other
+inode allocation parameters.
+The XFS inode contains a fixed-size part and a variable-size part.
+The variable-size part, whose size is affected by this option, can contain:
+directory data, for small directories;
+attribute data, for small attribute sets;
+symbolic link data, for small symbolic links;
+the extent list for the file, for files with a small number of extents;
+and the root of a tree describing the location of extents for the file,
+for files with a large number of extents.
+.IP
+The valid suboptions for specifying inode size are:
+\f3log=\f1\f2value\f1,
+\f3perblock=\f1\f2value\f1,
+and
+\f3size=\f1\f2value\f1;
+only one can be supplied.
+The inode size is specified either as a base two logarithm value with
+.BR log= ,
+in bytes with
+.BR size= ,
+or as the number fitting in a filesystem block with
+.BR perblock= .
+The mininum (and default) value is 256 bytes.
+The maximum value is 2048 (2 KB) subject to the restriction that
+the inode size cannot exceed one half of the filesystem block size.
+.IP
+The option \f3maxpct=\f1\f2value\f1 specifies the maximum percentage
+of space in the filesystem that can be allocated to inodes.
+The default value is 25%.
+Setting the value to 0 means that
+essentially all of the filesystem can become inode blocks.
+.IP
+The option
+.BI align[= value ]
+is used to specify that inode allocation is or is not aligned.
+The value is either 0 or 1,
+with 1 signifying that inodes are allocated aligned.
+If the value is omitted, 1 is assumed.
+The default is that inodes are aligned.
+Aligned inode access is normally more efficient than unaligned access;
+alignment must be established at the time the filesystem is created,
+since inodes are allocated at that time.
+This option can be used to turn off inode alignment when the
+filesystem needs to be mountable by a version of IRIX
+that does not have the inode alignment feature
+(any release of IRIX before 6.2, and IRIX 6.2 without XFS patches).
+.TP
+.B \-l
+Log section options.
+.IP
+These options specify the location, size, and other parameters of the
+log section of the filesystem.
+The valid suboptions are:
+.BI internal[= value ]
+and
+\f3size=\f1\f2value\f1.
+.IP
+The
+.B internal
+suboption is used to specify that the log section is a piece of
+the data section instead of being another device or logical volume.
+The suboption value is either 0 or 1,
+with 1 signifying that the log is internal.
+If the value is omitted, 1 is assumed.
+.IP
+The
+.B size
+suboption is used to specify the size of the log section.
+This suboption is required if
+.B \-l internal[=1]
+is given.
+Otherwise, it is only needed if the log section of the filesystem
+should occupy less space than the size of the special file.
+The size is specified in bytes or blocks, with a \f3b\f1 suffix 
+meaning multiplication by the filesystem block size, as described above.
+The overriding minimum value for size is 512 blocks.
+With some combinations of filesystem block size, inode size,
+and directory block size, the minimum log size is larger than 512 blocks.
+.TP
+.B \-n
+Naming options.
+.IP
+These options specify the version and size parameters for the naming
+(directory) area of the filesystem.
+The valid suboptions are:
+\f3log=\f1\f2value\f1,
+\f3size=\f1\f2value\f1,
+and
+\f3version=\f1\f2value\f1.
+The naming (directory) version is 1 or 2,
+defaulting to 1 if unspecified.
+With version 2 directories,
+the directory block size can be any power of 2 size
+from the filesystem block size up to 65536.
+The block size is specified either as a base two logarithm value with
+.BR log= ,
+or in bytes with
+.BR size= .
+The default size value for version 2 directories is 4096 bytes (4 KB), 
+unless the filesystem block size is larger than 4096,
+in which case the default value is the filesystem block size.
+For version 1 directories the block size is the same as the 
+filesystem block size.
+.TP
+\f3\-p\f1 \f2protofile\f1
+If the optional
+.B \-p
+.I protofile
+argument is given,
+.I mkfs.xfs
+uses
+.I protofile
+as a prototype file
+and takes its directions from that file.
+The blocks and inodes
+specifiers in the
+.I protofile
+are provided for backwards compatibility, but are otherwise unused.
+The prototype file
+contains tokens separated by spaces or
+newlines.
+A sample prototype specification follows (line numbers have been added to
+aid in the explanation):
+.nf
+.sp .8v
+.in +5
+\f71       /stand/\f1\f2diskboot\f1\f7
+2       4872 110
+3       d--777 3 1
+4       usr     d--777 3 1
+5       sh      ---755 3 1 /bin/sh
+6       ken     d--755 6 1
+7               $
+8       b0      b--644 3 1 0 0
+9       c0      c--644 3 1 0 0
+10      fifo    p--644 3 1
+11      slink   l--644 3 1 /a/symbolic/link
+12      :  This is a comment line
+13      $
+14      $\f1
+.in -5
+.fi
+.IP
+Line 1 is a dummy string.
+(It was formerly the bootfilename.)
+It is present for backward
+compatibility; boot blocks are not used on SGI systems.
+.IP
+Note that some string of characters must be present as the first line of
+the proto file to cause it to be parsed correctly; the value
+of this string is immaterial since it is ignored.
+.IP
+Line 2 contains two numeric values (formerly the numbers of blocks and inodes).
+These are also merely for backward compatibility: two numeric values must
+appear at this point for the proto file to be correctly parsed,
+but their values are immaterial since they are ignored.
+.IP
+Lines 3-11 tell
+.I mkfs.xfs
+about files and directories to
+be included in this filesystem.
+Line 3 specifies the root directory.
+Lines 4-6 and 8-10 specifies other directories and files.
+Note the special symbolic link syntax on line 11.
+.IP
+The
+.B $
+on line 7 tells
+.I mkfs.xfs
+to end the branch of the filesystem it is on, and continue
+from the next higher directory.
+It must be the last character
+on a line.
+The colon
+on line 12 introduces a comment; all characters up until the
+following newline are ignored.
+Note that this means you cannot
+have a file in a prototype file whose name contains a colon.
+The
+.B $
+on lines 13 and 14 end the process, since no additional
+specifications follow.
+.IP
+File specifications give the mode,
+the user ID,
+the group ID,
+and the initial contents of the file.
+Valid syntax for the contents field
+depends on the first character of the mode.
+.IP
+The mode for a file is specified by a 6-character string.
+The first character
+specifies the type of the file.
+The character range is
+.B \-bcdpl
+to specify regular, block special,
+character special, directory files, named pipes (fifos), and symbolic
+links, respectively.
+The second character of the mode
+is either
+.B u
+or
+.B \-
+to specify setuserID mode or not.
+The third is
+.B g
+or
+.B \-
+for the setgroupID mode.
+The rest of the mode
+is a three digit octal number giving the
+owner, group, and other read, write, execute
+permissions (see
+.IR chmod (1)).
+.IP
+Two decimal number
+tokens come after the mode; they specify the
+user and group IDs of the owner of the file.
+.IP
+If the file is a regular file,
+the next token of the specification can be a pathname
+from which the contents and size are copied.
+If the file is a block or character special file,
+two decimal numbers
+follow that give the major and minor device numbers.
+If the file is a symbolic link, the next token of the specification
+is used as the contents of the link.
+If the file is a directory,
+.I mkfs.xfs
+makes the entries
+.BR . ""
+and
+.B  ..
+and then
+reads a list of names and
+(recursively)
+file specifications for the entries
+in the directory.
+As noted above, the scan is terminated with the
+token
+.BR $ .
+.TP
+.B \-q
+Quiet option.
+.IP
+Normally
+.I mkfs.xfs
+prints the parameters of the filesystem
+to be constructed;
+the
+.B \-q
+flag suppresses this.
+.TP
+.B \-r
+Real-time section options.
+.IP
+These options specify the location, size, and other parameters of the
+real-time section of the filesystem.
+The valid suboptions are:
+.BI extsize= value
+and
+\f3size=\f1\f2value\f1.
+.IP
+The
+.B extsize
+suboption is used to specify the size of the blocks in the real-time
+section of the filesystem.
+This size must be a multiple of the filesystem block size.
+The minimum allowed value is the filesystem block size
+or 4 KB (whichever is larger);
+the default value is the stripe width for striped volumes or 64 KB for
+non-striped volumes;
+the maximum allowed value is 1 GB.
+The real-time extent size should be carefully chosen to match the
+parameters of the physical media used.
+.IP
+The
+.B size
+suboption is used to specify the size of the real-time section.
+This suboption is only needed if the real-time section of the
+filesystem should occupy
+less space than the size of the partition or logical volume containing the section.
+.TP
+.B \-C
+Disable overlapping partition/volume checks.
+.IP
+By default \f2mkfs.xfs\f1 checks to see if the destination partition or logical
+volume overlaps any mounted or reserved partitions in the system.  If an
+overlap or mount conflict is found, the user will be notified and prevented
+from potentially corrupting the existing data.  For systems with
+a large number of disks, this additional checking may add noticable overhead
+to the command's execution time.  For situations where command performance is
+necessary, this switch may be used to disable the safeguards.  Due to the
+potential for user-error causing corrupted filesystems or other on-disk
+data corruption, we strongly discourage use of this switch in normal operation.
+.SH SEE ALSO
+mkfs(8).
+.SH BUGS
+With a prototype file, it is not possible to specify hard links.
diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
new file mode 100644
index 000000000..50cfc3e6d
--- /dev/null
+++ b/man/man8/xfs_admin.8
@@ -0,0 +1,68 @@
+.TH xfs_admin 8
+.SH NAME
+xfs_admin \- change parameters of an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_admin\f1 [ \f3-lu\f1] [ \f3\-L \f2label\f1 ] [ \f3\-U \f2uuid\f1 ] device
+\f3xfs_admin \-f\f1 [ \f3-lu\f1] [ \f3\-L \f2label\f1 ] [ \f3\-U \f2uuid\f1 ] filename
+.fi
+.SH DESCRIPTION
+.I xfs_admin
+uses the
+.IR xfs_db (8)
+command to modify various parameters of a filesystem.
+.PP
+Devices that are mounted cannot be modified.
+Administrators must unmount filesystems before
+.I xfs_admin
+or
+.I xfs_db
+can convert parameters.
+A number of parameters of a mounted filesystem can be examined
+and modified using the
+.IR xfs_growfs (8)
+command.
+.SH OPTIONS
+.TP 5
+\f3\-f\f1
+Specifies that the filesystem image to be processed is stored in a
+regular file (see the \f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+.TP 5
+\f3\-l\f1
+Print the current filesystem label.
+.TP 5
+\f3\-u\f1
+Print the current filesystem UUID (Universally Unique IDentifier).
+.TP 5
+\f3\-L\f1 \f2label\f1
+Set the filesystem label.
+XFS filesystem labels can be at most 12 characters long; if
+.I label
+is longer than 12 characters,
+.I xfs_admin
+will truncate it and print a warning message.
+The filesystem label can be cleared using the special ``\c
+.BR \-\- ''
+value for
+.IR label .
+.TP 5
+\f3\-U\f1 \f2UUID\f1
+Set the UUID of the filesystem.
+A sample UUID looks like this: "c1b9d5a2-f162-11cf-9ece-0020afc76f16".
+The uuid may also be
+.IR null ,
+which will set the filesystem UUID to the null UUID.
+The uuid may also be
+.IR generate ,
+which will generate a new UUID for the filesystem.
+.PP
+The
+.IR mount (8)
+manual entry describes how to mount a filesystem using its label or UUID,
+rather than its block special device name.
+.SH SEE ALSO
+mkfs.xfs(8),
+mount(8),
+xfs_db(8),
+xfs_growfs(8),
+xfs(5).
diff --git a/man/man8/xfs_bmap.8 b/man/man8/xfs_bmap.8
new file mode 100644
index 000000000..0d08ec362
--- /dev/null
+++ b/man/man8/xfs_bmap.8
@@ -0,0 +1,54 @@
+.TH xfs_bmap 8
+.SH NAME
+xfs_bmap \- print block mapping for an XFS file
+.SH SYNOPSIS
+.nf
+\f3xfs_bmap\f1 [ \f3\-a\f1 ] [ \f3\-l\f1 ] [ \f3\-d\f1 ] [ \f3\-n \f2nnn\f1 ] file ...
+.fi
+.SH DESCRIPTION
+.I xfs_bmap
+prints the map of disk blocks used by files in an XFS filesystem.
+The map lists each \f2extent\fP used by the file, as well as regions
+in the file that do not have any corresponding blocks (\f2hole\f1s).
+Each line of the listings takes the following form:
+
+.Ex
+\f2extent\f1\f7: [\f1\f2startoffset\f1\f7..\f1\f2endoffset\f1\f7]: \c
+\f1\f2startblock\f1\f7..\f1\f2endblock\f1
+.Ee
+
+Holes are marked by replacing the \f2startblock..endblock\f1 with \f2hole\fP.
+All the file offsets and disk blocks are in units of 512-byte blocks,
+no matter what the filesystem's block size is.
+.PP
+If portions of the file have been migrated offline by
+a DMAPI application, a DMAPI read event will be generated to
+bring those portions back online before the disk block map is
+printed.  However if the \f3-d\f1 option is used, no DMAPI read event
+will be generated for a DMAPI file and offline portions will be reported as holes.
+.PP
+If the \f3-l\f1 option is used, then
+
+.Ex
+\f1\f2<nblocks>\f1\f7 \f1\f2blocks\f1\f7
+.Ee
+
+will be appended to each line.  \f1\f2Nblocks\f1\f7 is the length
+of the extent described on the line in units of 512-byte blocks.
+.PP
+If the \f3\-a\f1 option is given, information about the file's
+attribute fork is printed instead of the default data fork.
+.PP
+If the \f3\-n \f2nnn\f1 option is given, \f3xfs_bmap\f1 obtains the extent
+list of the file in groups of \f2nnn\f1 extents.
+In the absence of \f3\-n\f1, \f3xfs_bmap\f1 queries the system for
+the number of extents in the file and uses that value to compute 
+the group size.
+.SH DIAGNOSTICS
+.TP 10
+\f7fcntl(F_GETBMAPX) \f1\f2filename\f1\f7: Invalid argument\f1
+The file \f2filename\f1 is not in an XFS filesystem.
+.SH SEE ALSO
+fcntl(2),
+lvm(8).
+
diff --git a/man/man8/xfs_check.8 b/man/man8/xfs_check.8
new file mode 100644
index 000000000..96480f4da
--- /dev/null
+++ b/man/man8/xfs_check.8
@@ -0,0 +1,177 @@
+.TH xfs_check 8
+.SH NAME
+xfs_check \- check XFS filesystem consistency
+.SH SYNOPSIS
+.nf
+\f3xfs_check\f1 [ \f3\-i\f1 ino ] ... [ \f3\-b\f1 bno ] ... \c
+[ \f3\-s\f1 ] [ \f3\-v\f1 ] xfs_special
+.sp .8v
+\f3xfs_check\f1 \f3\-f\f1 [ \f3\-i\f1 ino ] ... [ \f3\-b\f1 bno ] ... \c
+[ \f3\-s\f1 ] [ \f3\-v\f1 ] file
+.fi
+.SH DESCRIPTION
+.I xfs_check
+checks whether an XFS filesystem is consistent.
+It is normally run only when there is reason to believe that the
+filesystem has a consistency problem.
+The filesystem to be checked is specified by the
+.I xfs_special
+argument, which should be the disk or volume device for the filesystem.
+Filesystems stored in files can also be checked, using the \f3\-f\f1 flag.
+The filesystem should normally be unmounted or read-only
+during the execution of
+.IR xfs_check .
+Otherwise, spurious problems are reported.
+.PP
+The options to \f2xfs_check\f1 are:
+.TP 9
+.B \-f
+Specifies that the special device is actually a file (see the
+\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+This might happen if an image copy
+of a filesystem has been made into an ordinary file.
+.TP
+.B \-s
+Specifies that only serious errors should be reported.
+Serious errors are those that make it impossible to find major data
+structures in the filesystem.
+This option can be used to cut down the
+amount of output when there is a serious problem, when the output might make it
+difficult to see what the real problem is.
+.TP
+.B \-v
+Specifies verbose output; it is impossibly long for a
+reasonably-sized filesystem.
+This option is intended for internal use only.
+.TP
+.BI \-i " ino"
+Specifies verbose behavior for a
+specific inode.
+For instance, it can be used to locate all the blocks
+associated with a given inode.
+.TP
+.BI \-b " bno"
+Specifies verbose behavior for a specific filesystem block.
+For instance, it can be used to determine what a specific block
+is used for.
+The block number is a "file system block number".
+Conversion between disk addresses (i.e. addresses reported by
+.IR xfs_bmap )
+and file system blocks may be accomplished using
+.IR xfs_db 's
+.B convert
+command.
+.PP
+Any non-verbose output from
+.I xfs_check
+means that the filesystem has an inconsistency.
+The filesystem can be repaired using either
+.IR xfs_repair(8)
+to fix the filesystem in place,
+or by using
+.IR xfsdump (8)
+and
+.IR mkfs.xfs (8)
+to dump the filesystem,
+make a new filesystem,
+then use
+.IR xfsrestore (8)
+to restore the data onto the new filesystem.
+Note that xfsdump may fail on a corrupt filesystem.
+However, if the filesystem is mountable, xfsdump can
+be used to try and save important data before
+repairing the filesystem with xfs_repair.
+If the filesystem is not mountable though, xfs_repair is
+the only viable option.
+.SH DIAGNOSTICS
+Under one circumstance,
+.I xfs_check
+unfortunately might dump core
+rather than produce useful output.
+If the filesystem is completely corrupt, a core dump might
+be produced instead of the message
+.Ex
+\f2xxx\f1\f7 is not a valid filesystem\f1
+.Ee
+.PP
+If the filesystem is very large (has many files) then
+.I xfs_check
+might run out of memory.
+In this case the message
+.Ex
+out of memory
+.Ee
+is printed.
+.PP
+The following is a description of the most likely problems and the associated
+messages.
+Most of the diagnostics produced are only meaningful with an understanding
+of the structure of the filesystem.
+.TP
+\f7agf_freeblks \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1
+The freeblocks count in the allocation group header for allocation group
+.I a
+doesn't match the number of blocks counted free.
+.TP
+\f7agf_longest \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1
+The longest free extent in the allocation group header for allocation group
+.I a
+doesn't match the longest free extent found in the allocation group.
+.TP
+\f7agi_count \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1
+The allocated inode count in the allocation group header for allocation group
+.I a
+doesn't match the number of inodes counted in the allocation group.
+.TP
+\f7agi_freecount \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1
+The free inode count in the allocation group header for allocation group
+.I a
+doesn't match the number of inodes counted free in the allocation group.
+.TP
+\f7block \f1\f2a/b\f1\f7 expected inum 0 got \f1\f2i\f1
+The block number is specified as a pair
+(allocation group number, block in the allocation group).
+The block is used multiple times (shared), between multiple inodes.
+This message usually follows a message of the next type.
+.TP
+\f7block \f1\f2a/b\f1\f7 expected type unknown got \f1\f2y\f1
+The block is used multiple times (shared).
+.TP
+\f7block \f1\f2a/b\f1\f7 type unknown not expected\f1
+The block is unaccounted for (not in the freelist and not in use).
+.TP
+\f7link count mismatch for inode \f1\f2nnn\f1\f7 (name \f1\f2xxx\f1\f7), nlink \f1\f2m\f1\f7, counted \f1\f2n\f1
+The inode has a bad link count (number of references in directories).
+.TP
+\f7rtblock \f1\f2b\f1\f7 expected inum 0 got \f1\f2i\f1
+The block is used multiple times (shared), between multiple inodes.
+This message usually follows a message of the next type.
+.TP
+\f7rtblock \f1\f2b\f1\f7 expected type unknown got \f1\f2y\f1
+The real-time block is used multiple times (shared).
+.TP
+\f7rtblock \f1\f2b\f1\f7 type unknown not expected\f1
+The real-time block is unaccounted for (not in the freelist and not in use).
+.TP
+\f7sb_fdblocks \f1\f2n\f1\f7, counted \f1\f2m\f1
+The number of free data blocks recorded
+in the superblock doesn't match the number counted free in the filesystem.
+.TP
+\f7sb_frextents \f1\f2n\f1\f7, counted \f1\f2m\f1
+The number of free real-time extents recorded
+in the superblock doesn't match the number counted free in the filesystem.
+.TP
+\f7sb_icount \f1\f2n\f1\f7, counted \f1\f2m\f1
+The number of allocated inodes recorded
+in the superblock doesn't match the number allocated in the filesystem.
+.TP
+\f7sb_ifree \f1\f2n\f1\f7, counted \f1\f2m\f1
+The number of free inodes recorded
+in the superblock doesn't match the number free in the filesystem.
+.SH SEE ALSO
+mkfs.xfs(8),
+xfsdump(8),
+xfsrestore(8),
+xfs_ncheck(8),
+xfs_repair(8),
+xfs(5).
diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
new file mode 100644
index 000000000..82c2ad944
--- /dev/null
+++ b/man/man8/xfs_db.8
@@ -0,0 +1,1187 @@
+.TH xfs_db 8
+.SH NAME
+xfs_db \- debug an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_db\f1 [ \f3\-c\f1 cmd ] ... [ \f3\-p\f1 prog ] [ \f3\-r\f1 ] [ \f3\-x\f1 ] xfs_special
+.sp .8v
+\f3xfs_db\f1 \f3\-f\f1 [ \f3\-c\f1 cmd ] ... [ \f3\-p\f1 prog ] [ \f3\-f\f1 ] [ \f3\-r\f1 ] [ \f3\-x\f1 ] file
+.fi
+.SH DESCRIPTION
+\f2xfs_db\f1 is used to examine an XFS filesystem.
+Under rare circumstances it can also be used to modify an XFS filesystem,
+but that task is normally left to \f2xfs_repair\f1(8) or to
+scripts such as \f2xfs_chver\f1 that run \f2xfs_db\f1.
+.PP
+The options to \f2xfs_db\f1 are:
+.TP 10
+\f3\-c\f1 \f2cmd\f1
+\f2xfs_db\f1 commands may be run interactively (the default)
+or as arguments on the command line.
+Multiple \f3\-c\f1 arguments may be given.
+The commands are run in the sequence given, then the program exits.
+This is the mechanism used to implement \f2xfs_check\f1(8).
+.TP
+\f3\-f\f1
+Specifies that the filesystem image to be processed is stored in a 
+regular file
+(see the \f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+This might happen if an image copy
+of a filesystem has been made into an ordinary file with \f2xfs_copy\f1(8).
+.TP
+\f3\-i\f1
+Allows execution on a mounted filesystem, provided it is mounted read-only.
+Useful for shell scripts such as \f2xfs_check\f1(8), which must only
+operate on filesystems in a guarenteed consistent state
+(either unmounted or mounted read-only).
+These semantics are slightly different to that of the \f3\-r\f1 option.
+.TP
+\f3\-p\f1 \f2prog\f1
+Set the program name for prompts and some error messages,
+the default value is \f2xfs_db\f1.
+.TP
+\f3\-r\f1
+Open \f2file\f1 or \f2xfs_special\f1 read-only.
+This option is required if \f2xfs_special\f1 is a mounted filesystem.
+It is only necessary to omit this flag if a command that changes data
+(\f3write\f1, \f3blocktrash\f1) is to be used.
+.TP
+\f3\-x\f1
+Specifies expert mode.
+This enables the \f3write\f1 command.
+.SH CONCEPTS
+\f2xfs_db\f1 commands can be broken up into two classes.
+Most commands are for the navigation and display of data structures in
+the filesystem.
+Other commands are for scanning the filesystem in some way.
+.PP
+Commands which are used to navigate the filesystem structure take arguments
+which reflect the names of filesystem structure fields.
+There can be multiple field names separated by dots when the underlying
+structures are nested, as in C.
+The field names can be indexed (as an array index)
+if the underlying field is an array.
+The array indices can be specified as a range, two numbers separated by a dash.
+.PP
+\f2xfs_db\f1 maintains a current address in the filesystem.
+The granularity of the address is a filesystem structure.
+This can be a filesystem block,
+an inode or quota (smaller than a filesystem block),
+or a directory block (could be larger than a filesystem block).
+There are a variety of commands to set the current address.
+Associated with the current address is the current data type,
+which is the structural type of this data.
+Commands which follow the structure of the filesystem always set the type
+as well as the address.
+Commands which examine pieces of an individual file (inode) need the current
+inode to be set, this is done with the \f3inode\f1 command.
+.PP
+The current address/type information is actually maintained in a
+stack that can be explicitly manipulated with the
+\f3push\f1, \f3pop\f1, and \f3stack\f1 commands.
+This allows for easy examination of a nested filesystem structure.
+Also, the last several locations visited are stored in a ring buffer
+which can be manipulated with the
+\f3forward\f1, \f3back\f3, and \f3ring\f1 commands.
+.PP
+XFS filesystems are divided into a small number of allocation groups.
+\f2xfs_db\f1 maintains a notion of the current allocation group which is
+manipulated by some commands.
+The initial allocation group is 0.
+.SH COMMANDS
+.PP
+Many commands have extensive online help.
+Use the \f3help\f1 command for more details on any command.
+.TP 10
+\f3a\f1
+See the \f3addr\f1 command.
+.TP
+\f3ablock\f1 \f2filoff\f1
+Set current address to the offset \f2filoff\f1 (a filesystem block number)
+in the attribute area of the current inode.
+.TP
+\f3addr\f1 [ \f2field-expression\f1 ]
+Set current address to the value of the \f2field-expression\f1.
+This is used to ``follow'' a reference in one structure to the object
+being referred to.
+If no argument is given the current address is printed.
+.TP
+\f3agf\f1 [ \f2agno\f1 ]
+Set current address to the AGF block for allocation group \f2agno\f1.
+If no argument is given use the current allocation group.
+.TP
+\f3agfl\f1 [ \f2agno\f1 ]
+Set current address to the AGFL block for allocation group \f2agno\f1.
+If no argument is given use the current allocation group.
+.TP
+\f3agi\f1 [ \f2agno\f1 ]
+Set current address to the AGI block for allocation group \f2agno\f1.
+If no argument is given use the current allocation group.
+.TP
+\f3b\f1
+See the \f3back\f1 command.
+.TP
+\f3back\f1
+Move to the previous location in the position ring.
+.TP
+\f3blockfree\f1
+Free block usage information collected by the last execution of the
+\f3blockget\f1 command.
+This must be done before another \f3blockget\f1 command can be given,
+presumably with different arguments than the previous one.
+.TP
+\f3blockget\f1 [ \f3\-npsv\f1 ] [ \f3\-b\f1 \f2bno\f1 ] ... [ \f3\-i\f1 \f2ino\f1 ] ...
+Get block usage and check filesystem consistency.
+The information is saved for use by a subsequent
+\f3blockuse\f1, \f3ncheck\f1, or \f3blocktrash\f1 command.
+See \f2xfs_check\f1(8) for more information.
+.br
+The \f3\-b\f1 option is used to specify filesystem block numbers
+about which verbose information should be printed.
+.br
+The \f3\-i\f1 option is used to specify inode numbers about which
+verbose information should be printed.
+.br
+The \f3\-n\f1 option is used to save pathnames for inodes visited,
+this is used to support the \f2xfs_ncheck\f1(8) command.
+It also means that pathnames will be printed for inodes that have problems.
+This option uses a lot of memory so is not enabled by default.
+.br
+The \f3\-p\f1 option causes error messages to be prefixed with the
+filesystem name being processed.
+This is useful if several copies of \f2xfs_db\f1 are run in parallel.
+.br
+The \f3\-s\f1 option restricts output to severe errors only.
+This is useful if the output is too long otherwise.
+.br
+The \f3\-v\f1 option enables verbose output.
+Messages will be printed for every block and inode processed.
+.TP
+\f3blocktrash\f1 [ \f3\-n\f1 \f2c\f1 ] [ \f3\-x\f1 \f2a\f1 ] [ \f3\-y\f1 \f2b\f1 ] [ \f3\-s\f1 \f2s\f1 ] [ \f3\-0123\f1 ] [ \f3\-t\f1 \f2t\f1 ] ...
+Trash randomly selected filesystem metadata blocks.
+Trashing occurs to randomly selected bits in the chosen blocks.
+This command is available only in debugging versions of \f2xfs_db\f1.
+It is useful for testing \f2xfs_repair\f1(8) and \f2xfs_check\f1(8).
+.br
+The \f3\-0\f1, \f3\-1\f1, \f3\-2\f1, and \f3\-3\f1 options (mutually exclusive)
+set the operating mode for \f3blocktrash\f1.
+In \f3\-0\f1 mode, changed bits are cleared.
+In \f3\-1\f1 mode, changed bits are set.
+In \f3\-2\f1 mode, changed bits are inverted.
+In \f3\-3\f1 mode, changed bits are randomized.
+.br
+The \f3\-n\f1 option supplies the count of block-trashings to perform
+(default 1).
+.br
+The \f3\-s\f1 option supplies a seed to the random processing.
+.br
+The \f3\-t\f1 option gives a type of blocks to be selected
+for trashing.
+Multiple \f3\-t\f1 options may be given.
+If no \f3\-t\f1 options are given then all metadata types can be trashed.
+.br
+The \f3\-x\f1 option sets the minimum size of bit range to be trashed.
+The default value is 1.
+.br
+The \f3\-y\f1 option sets the maximum size of bit range to be trashed.
+The default value is 1024.
+.TP
+\f3blockuse\f1 [ \f3\-n\f1 ] [ \f3\-c\f1 \f2blockcount\f1 ]
+Print usage for current filesystem block(s).
+For each block, the type and (if any) inode are printed.
+.br
+The \f3\-c\f1 option specifies a count of blocks to process.
+The default value is 1 (the current block only).
+.br
+The \f3\-n\f1 option specifies that file names should be printed.
+The prior \f3blockget\f1 command must have also specified the \f3\-n\f1 option.
+.TP
+\f3bmap\f1 [ \f3\-a\f1 ] [ \f3\-d\f1 ] [ \f2block\f1 [ \f2len\f1 ] ]
+Show the block map for the current inode.
+The map display can be restricted to an area of the file with the
+\f2block\f1 and \f2len\f1 arguments.
+If \f2block\f1 is given and \f2len\f1 is omitted then 1 is assumed for len.
+.br
+The \f3\-a\f1 and \f3\-d\f1 options are used to select the attribute or data
+area of the inode, if neither option is given then both areas are shown.
+.TP
+\f3check\f1
+See the \f3blockget\f1 command.
+.TP
+\f3convert\f1 \f2type\f1 \f2number\f1 [ \f2type\f1 \f2number\f1 ] ... \f2type\f1
+Convert from one address form to another.
+The known \f2type\f1s, with alternate names, are:
+\f3agblock\f1 or \f3agbno\f1 (filesystem block within an allocation group),
+\f3agino\f1 or \f3aginode\f1 (inode number within an allocation group),
+\f3agnumber\f1 or \f3agno\f1 (allocation group number),
+\f3bboff\f1 or \f3daddroff\f1 (byte offset in a \f3daddr\f1),
+\f3blkoff\f1 or \f3fsboff\f1 or \f3agboff\f1 (byte offset in a \f3agblock\f1
+or \f3fsblock\f1),
+\f3byte\f1 or \f3fsbyte\f1 (byte address in filesystem),
+\f3daddr\f1 or \f3bb\f1 (disk address, 512-byte blocks),
+\f3fsblock\f1 or \f3fsb\f1 or \f3fsbno\f1 (filesystem block, see the
+\f3fsblock\f1 command),
+\f3ino\f1 or \f3inode\f1 (inode number),
+\f3inoidx\f1 or \f3offset\f1 (index of inode in filesystem block),
+and \f3inooff\f1 or \f3inodeoff\f1 (byte offset in inode).
+Only conversions that ``make sense'' are allowed.
+The compound form (with more than three arguments) is useful for
+conversions such as
+\f3convert\f1 \f3agno\f1 \f2ag\f1 \f3agbno\f1 \f2agb\f1 \f3fsblock\f1.
+.TP
+\f3daddr\f1 [ \f2d\f1 ]
+Set current address to the daddr (512 byte block) given by \f2d\f1.
+If no value for \f2d\f1 is given the current address is printed,
+expressed as a daddr.
+The type is set to \f3data\f1 (uninterpreted).
+.TP
+\f3dblock\f1 \f2filoff\f1
+Set current address to the offset \f2filoff\f1 (a filesystem block number)
+in the data area of the current inode.
+.TP
+\f3debug\f1 [ \f2flagbits\f1 ]
+Set debug option bits.
+These are used for debugging \f2xfs_db\f1.
+If no value is given for \f2flagbits\f1, print the current debug option bits.
+These are for the use of the implementor.
+.TP
+\f3dquot\f1 [ \f2projectid_or_userid\f1 ]
+Set current address to a project or user quota block.
+.TP
+\f3echo\f1 [ \f2arg\f1 ] ...
+Echo the arguments to the output.
+.TP
+\f3f\f1
+See the \f3forward\f1 command.
+.TP
+\f3forward\f1
+Move forward to the next entry in the position ring.
+.TP
+\f3frag\f1 [ \f3\-adflqRrv\f1 ]
+Get file fragmentation data.
+This prints information about fragmentation of file data in the filesystem
+(as opposed to fragmentation of freespace,
+for which see the \f3freesp\f1 command).
+Every file in the filesystem is examined to see how far from ideal
+its extent mappings are.
+A summary is printed giving the totals.
+.br
+The \f3\-v\f1 option sets verbosity,
+every inode has information printed for it.
+The remaining options select which inodes and extents are examined.
+If no options are given then all are assumed set,
+otherwise just those given are enabled.
+.br
+The \f3\-a\f1 option enables processing of attribute data.
+.br
+The \f3\-d\f1 option enables processing of directory data.
+.br
+The \f3\-f\f1 option enables processing of regular file data.
+.br
+The \f3\-l\f1 option enables processing of symbolic link data.
+.br
+The \f3\-q\f1 option enables processing of quota file data.
+.br
+The \f3\-R\f1 option enables processing of realtime control file data.
+.br
+The \f3\-r\f1 option enables processing of realtime file data.
+.TP
+\f3freesp\f1 [ \f3\-bcds\f1 ] [ \f3\-a\f1 \f2a\f1 ] ... [ \f3\-e\f1 \f2i\f1 ] [ \f3\-h\f1 \f2h1\f1 ] ... [ \f3\-m\f1 \f2m\f1 ]
+Summarize free space for the filesystem.
+The free blocks are examined and totalled,
+and displayed in the form of a histogram,
+with a count of extents in each range of free extent sizes.
+.br
+The \f3\-a\f1 \f2a\f1 option adds \f2a\f1 to the list of
+allocation groups to be processed.
+If no \f3\-a\f1 options are given then all allocation groups are processed.
+.br
+The \f3\-b\f1 option specifies that the histogram buckets are binary-sized,
+with the starting sizes being the powers of 2.
+.br
+The \f3\-c\f1 option specifies that \f3freesp\f1 will search the
+by-size (cnt) space Btree instead of the default by-block (bno) space Btree.
+.br
+The \f3\-d\f1 option specifies that every free extent will be displayed.
+.br
+The \f3\-e\f1 \f2i\f1 option specifies that the histogram buckets are
+equal-sized, with the size specified as \f2i\f1.
+.br
+The \f3\-h\f1 \f2h1\f1 option specifies a starting block number
+for a histogram bucket as \f2h1\f1.
+Multiple \f3\-h\f1 options are given to specify the complete set of buckets.
+.br
+The \f3\-m\f1 \f2m\f1 option specifies that the histogram
+starting block numbers are powers of \f2m\f1.
+This is the general case of \f3\-b\f1.
+.br
+The \f3\-s\f1 option specifies that a final summary of total free extents,
+free blocks, and the average free extent size is printed.
+.TP
+\f3fsb\f1
+See the \f3fsblock\f1 command.
+.TP
+\f3fsblock\f1 [ \f2fsb\f1 ]
+Set current address to the fsblock value given by \f2fsb\f1.
+If no value for \f2fsb\f1 is given the current address is printed,
+expressed as an fsb.
+The type is set to \f3data\f1 (uninterpreted).
+XFS filesystem block numbers are computed
+((\f2agno\f1 << \f2agshift\f1) | \f2agblock\f1)
+where \f2agshift\f1 depends on the size of an allocation group.
+Use the \f3convert\f1 command to convert to and from this form.
+Block numbers given for file blocks
+(for instance from the \f3bmap\f1 command)
+are in this form.
+.TP
+\f3hash\f1 \f2string\f1
+Prints the hash value of \f2string\f1 using the hash function of the XFS
+directory and attribute implementation.
+.TP
+\f3help\f1 [ \f2command\f1 ]
+Print help for one or all commands.
+.TP
+\f3inode\f1 [ \f2inode#\f1 ]
+Set the current inode number.
+If no \f2inode#\f1 is given, print the current inode number.
+.TP
+\f3log\f1 [ \f3stop\f1 | \f3start\f1 \f2filename\f1 ]
+Start logging output to \f2filename\f1, stop logging,
+or print the current logging status.
+.TP
+\f3ncheck\f1 [ \f3\-s\f1 ] [ \f3\-i\f1 \f2ino\f1 ] ...
+Print name-inode pairs.
+A \f3blockget -n\f1 command must be run first to gather the information.
+.br
+The \f3\-i\f1 option specifies an inode number to be printed.
+If no \f3\-i\f1 options are given then all inodes are printed.
+.br
+The \f3\-s\f1 option specifies that only setuid and setgid files are printed.
+.TP
+\f3p\f1
+See the \f3print\f1 command.
+.TP
+\f3pop\f1
+Pop location from the stack.
+.TP
+\f3print\f1 [ \f2field-expression\f1 ] ...
+Print field values.
+If no argument is given, print all fields in the current structure.
+.TP
+\f3push\f1 [ \f2command\f1 ]
+Push location to the stack.
+If \f2command\f1 is supplied,
+set the current location to the results of \f2command\f1
+after pushing the old location.
+.TP
+\f3q\f1
+See the \f3quit\f1 command.
+.TP
+\f3quit\f1
+Exit \f2xfs_db\f1.
+.TP
+\f3ring\f1 [ \f2index\f1 ]
+Show position ring (if no \f2index\f1 argument is given),
+or move to a specific entry in the position ring given by \f2index\f1.
+.TP
+\f3sb\f1 [ \f2agno\f1 ]
+Set current address to SB header in allocation group \f2agno\f1.
+If no \f2agno\f1 is given use the current allocation group number.
+.TP
+\f3source\f1 \f2source-file\f1
+Process commands from \f2source-file\f1.
+\f3source\f1 commands can be nested.
+.TP
+\f3stack\f1
+View the location stack.
+.TP
+\f3type\f1 [ \f2type\f1 ]
+Set the current data type to \f2type\f1.
+If no argument is given, show the current data type.
+The possible data types are:
+\f3agf\f1, \f3agfl\f1, \f3agi\f1, \f3attr\f1, \f3bmapbta\f1, \f3bmapbtd\f1,
+\f3bnobt\f1, \f3cntbt\f1, \f3data\f1, \f3dir\f1, \f3dir2\f1, \f3dqblk\f1,
+\f3inobt\f1, \f3inode\f1, \f3log\f1, \f3rtbitmap\f1, \f3rtsummary\f1,
+\f3sb\f1, and \f3symlink\f1.
+See the TYPES section below for more information on these data types.
+.TP
+\f3write\f1 [ \f2field or value\f1 ] ...
+Write a value to disk.
+Specific fields can be set in structures (struct mode),
+or a block can be set to data values (data mode),
+or a block can be set to string values (string mode, for symlink blocks).
+The operation happens immediately: there is no buffering.
+.br
+Struct mode is in effect when the current type is structural,
+i.e. not data.
+For struct mode, the syntax is ``\f3write\f1 \f2field\f1 \f2value\f1''.
+.br
+Data mode is in effect when the current type is data.
+In this case the contents of the block can be shifted or rotated left or right,
+or filled with a sequence, a constant value, or a random value.
+In this mode \f3write\f1 with no arguments gives more information on
+the allowed commands.
+.SH TYPES
+This section gives the fields in each structure type and their meanings.
+Note that some types of block cover multiple actual structures,
+for instance directory blocks.
+.TP 10
+\f3agf\f1
+The AGF block is the header for block allocation information;
+it is in the second 512-byte block of each allocation group.
+The following fields are defined:
+.br
+\f3magicnum\f1: AGF block magic number, 0x58414746 ('XAGF')
+.br
+\f3versionnum\f1: version number, currently 1
+.br
+\f3seqno\f1: sequence number starting from 0
+.br
+\f3length\f1: size in filesystem blocks of the allocation group.
+All allocation groups except the last one of the filesystem have
+the superblock's \f3agblocks\f1 value here
+.br
+\f3bnoroot\f1: block number of the root of the Btree holding free space
+information sorted by block number
+.br
+\f3cntroot\f1: block number of the root of the Btree holding free space
+information sorted by block count
+.br
+\f3bnolevel\f1: number of levels in the by-block-number Btree
+.br
+\f3cntlevel\f1: number of levels in the by-block-count Btree
+.br
+\f3flfirst\f1: index into the AGFL block of the first active entry
+.br
+\f3fllast\f1: index into the AGFL block of the last active entry
+.br
+\f3flcount\f1: count of active entries in the AGFL block
+.br
+\f3freeblks\f1: count of blocks represented in the freespace Btrees
+.br
+\f3longest\f1: longest free space represented in the freespace Btrees
+.TP
+\f3agfl\f1
+The AGFL block contains block numbers for use of the block allocator;
+it is in the fourth 512-byte block of each allocation group.
+Each entry in the active list is a block number within the allocation group
+that can be used for any purpose if space runs low.
+The AGF block fields \f3flfirst\f1, \f3fllast\f1, and \f3flcount\f1
+designate which entries are currently active.
+Entry space is allocated in a circular manner within the AGFL block.
+Fields defined:
+.br
+\f3bno\f1: array of all block numbers.
+Even those which are not active are printed
+.TP
+\f3agi\f1
+The AGI block is the header for inode allocation information;
+it is in the third 512-byte block of each allocation group.
+Fields defined:
+.br
+\f3magicnum\f1: AGI block magic number, 0x58414749 ('XAGI')
+.br
+\f3versionnum\f1: version number, currently 1
+.br
+\f3seqno\f1: sequence number starting from 0
+.br
+\f3length\f1: size in filesystem blocks of the allocation group
+.br
+\f3count\f1: count of inodes allocated
+.br
+\f3root\f1: block number of the root of the Btree holding inode allocation
+information
+.br
+\f3level\f1: number of levels in the inode allocation Btree
+.br
+\f3freecount\f1: count of allocated inodes that are not in use
+.br
+\f3newino\f1: last inode number allocated
+.br
+\f3dirino\f1: unused
+.br
+\f3unlinked\f1: an array of inode numbers within the allocation group.
+The entries in the AGI block are the heads of lists which run through the
+inode \f3next_unlinked\f1 field.
+These inodes are to be unlinked the next time the filesystem is mounted
+.TP
+\f3attr\f1
+An attribute fork is organized as a Btree with the actual data
+embedded in the leaf blocks.
+The root of the Btree is found in block 0 of the fork.
+The index (sort order) of the Btree is the hash value of the attribute name.
+All the blocks contain a \f3blkinfo\f1 structure at the beginning,
+see type \f3dir\f1 for a description.
+Nonleaf blocks are identical in format to those for version 1 and
+version 2 directories, see type \f3dir\f1 for a description.
+Leaf blocks can refer to ``local'' or ``remote'' attribute values.
+Local values are stored directly in the leaf block.
+Remote values are stored in an independent block in the attribute fork
+(with no structure).
+Leaf blocks contain the following fields:
+.br
+\f3hdr\f1: header containing
+a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfbee),
+a \f3count\f1 of active entries,
+\f3usedbytes\f1 total bytes of names and values,
+the \f3firstused\f1 byte in the name area,
+\f3holes\f1 set if the block needs compaction,
+and array \f3freemap\f1 as for \f3dir\f1 leaf blocks
+.br
+\f3entries\f1: array of structures containing
+a \f3hashval\f1,
+\f3nameidx\f1 (index into the block of the name),
+and flags \f3incomplete\f1,
+\f3root\f1,
+and \f3local\f1
+.br
+\f3nvlist\f1: array of structures describing the attribute names and values.
+Fields always present:
+\f3valuelen\f1 (length of value in bytes),
+\f3namelen\f1,
+and \f3name\f1.
+Fields present for local values:
+\f3value\f1 (value string).
+Fields present for remote values:
+\f3valueblk\f1 (fork block number of containing the value).
+.TP
+\f3bmapbt\f1
+Files with many extents in their data or attribute fork will have the
+extents described by the contents of a Btree for that fork,
+instead of being stored directly in the inode.
+Each bmap Btree starts with a root block contained within the inode.
+The other levels of the Btree are stored in filesystem blocks.
+The blocks are linked to sibling left and right blocks at each level,
+as well as by pointers from parent to child blocks.
+Each block contains the following fields:
+.br
+\f3magic\f1: bmap Btree block magic number, 0x424d4150 ('BMAP')
+.br
+\f3level\f1: level of this block above the leaf level
+.br
+\f3numrecs\f1: number of records or keys in the block
+.br
+\f3leftsib\f1: left (logically lower) sibling block, 0 if none
+.br
+\f3rightsib\f1: right (logically higher) sibling block, 0 if none
+.br
+\f3recs\f1: [leaf blocks only] array of extent records.
+Each record contains
+\f3startoff\f1,
+\f3startblock\f1,
+\f3blockcount\f1,
+and \f3extentflag\f1 (1 if the extent is unwritten)
+.br
+\f3keys\f1: [nonleaf blocks only] array of key records.
+These are the first key value of each block in the level below this one.
+Each record contains \f3startoff\f1
+.br
+\f3ptrs\f1: [nonleaf blocks only] array of child block pointers.
+Each pointer is a filesystem block number to the next level in the Btree
+.TP
+\f3bnobt\f1
+There is one set of filesystem blocks forming the by-block-number allocation
+Btree for each allocation group.
+The root block of this Btree is designated by the \f3bnoroot\f1 field in the
+coresponding AGF block.
+The blocks are linked to sibling left and right blocks at each level,
+as well as by pointers from parent to child blocks.
+Each block has the following fields:
+.br
+\f3magic\f1: BNOBT block magic number, 0x41425442 ('ABTB')
+.br
+\f3level\f1: level number of this block, 0 is a leaf
+.br
+\f3numrecs\f1: number of data entries in the block
+.br
+\f3leftsib\f1: left (logically lower) sibling block, 0 if none
+.br
+\f3rightsib\f1: right (logically higher) sibling block, 0 if none
+.br
+\f3recs\f1: [leaf blocks only] array of freespace records.
+Each record contains
+\f3startblock\f1
+and \f3blockcount\f1
+.br
+\f3keys\f1: [nonleaf blocks only] array of key records.
+These are the first value of each block in the level below this one.
+Each record contains 
+\f3startblock\f1
+and \f3blockcount\f1
+.br
+\f3ptrs\f1: [nonleaf blocks only] array of child block pointers.
+Each pointer is a block number within the allocation group to the next level
+in the Btree
+.TP
+\f3cntbt\f1
+There is one set of filesystem blocks forming the by-block-count allocation
+Btree for each allocation group.
+The root block of this Btree is designated by the \cntroot\f1 field in the
+coresponding AGF block.
+The blocks are linked to sibling left and right blocks at each level,
+as well as by pointers from parent to child blocks.
+Each block has the following fields:
+.br
+\f3magic\f1: CNTBT block magic number, 0x41425443 ('ABTC')
+.br
+\f3level\f1: level number of this block, 0 is a leaf
+.br
+\f3numrecs\f1: number of data entries in the block
+.br
+\f3leftsib\f1: left (logically lower) sibling block, 0 if none
+.br
+\f3rightsib\f1: right (logically higher) sibling block, 0 if none
+.br
+\f3recs\f1: [leaf blocks only] array of freespace records.
+Each record contains 
+\f3startblock\f1
+and \f3blockcount\f1
+.br
+\f3keys\f1: [nonleaf blocks only] array of key records.
+These are the first value of each block in the level below this one.
+Each record contains 
+\f3blockcount\f1
+and \f3startblock\f1
+.br
+\f3ptrs\f1: [nonleaf blocks only] array of child block pointers.
+Each pointer is a block number within the allocation group to the next level
+in the Btree
+.TP
+\f3data\f1
+User file blocks, and other blocks whose type is unknown,
+have this type for display purposes in \f2xfs_db\f1.
+The block data is displayed in hexadecimal format.
+.TP
+\f3dir\f1
+A version 1 directory is organized as a Btree with the directory data
+embedded in the leaf blocks.
+The root of the Btree is found in block 0 of the file.
+The index (sort order) of the Btree is the hash value of the entry name.
+All the blocks contain a \f3blkinfo\f1 structure at the beginning
+with the following fields:
+.br
+\f3forw\f1: next sibling block
+.br
+\f3back\f1: previous sibling block
+.br
+\f3magic\f1: magic number for this block type
+.sp
+The nonleaf (node) blocks have the following fields:
+.br
+\f3hdr\f1: header containing
+a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfebe),
+the \f3count\f1 of active entries,
+and the \f3level\f1 of this block above the leaves
+.br
+\f3btree\f1: array of entries containing
+\f3hashval\f1 and
+\f3before\f1 fields.
+The \f3before\f1 value is a block number within the directory file to the
+child block,
+the \f3hashval\f1 is the last hash value in that block
+.sp
+The leaf blocks have the following fields:
+.br
+\f3hdr\f1: header containing
+a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfeeb),
+the \f3count\f1 of active entries,
+\f3namebytes\f1 (total name string bytes),
+\f3holes\f1 flag (block needs compaction),
+and \f3freemap\f1 (array of \f3base\f1, \f3size\f1 entries for free regions)
+.br
+\f3entries\f1: array of structures containing
+\f3hashval\f1,
+\f3nameidx\f1 (byte index into the block of the name string),
+and \f3namelen\f1
+.br
+\f3namelist\f1: array of structures containing
+\f3inumber\f1
+and \f3name\f1
+.TP
+\f3dir2\f1
+A version 2 directory has four kinds of blocks.
+Data blocks start at offset 0 in the file.
+There are two kinds of data blocks: single-block directories have
+the leaf information embedded at the end of the block, data blocks
+in multi-block directories do not.
+Node and leaf blocks start at offset 32GB (with either a single
+leaf block or the root node block).
+Freespace blocks start at offset 64GB.
+The node and leaf blocks form a Btree, with references to the data
+in the data blocks.
+The freespace blocks form an index of longest free spaces within the
+data blocks.
+.sp
+A single-block directory block contains the following fields:
+.br
+\f3bhdr\f1: header containing 
+\f3magic\f1 number 0x58443242 ('XD2B')
+and an array \f3bestfree\f1 of the longest 3 free spaces in the block
+(\f3offset\f1, \f3length\f1)
+.br
+\f3bu\f1: array of union structures.
+Each element is either an entry or a freespace.
+For entries, there are the following fields:
+\f3inumber\f1,
+\f3namelen\f1,
+\f3name\f1,
+and \f3tag\f1.
+For freespace, there are the following fields:
+\f3freetag\f1 (0xffff),
+\f3length\f1,
+and \f3tag\f1.
+The \f3tag\f1 value is the byte offset in the block of the start
+of the entry it is contained in
+.br
+\f3bleaf\f1: array of leaf entries containing
+\f3hashval\f1
+and \f3address\f1.
+The \f3address\f1 is a 64-bit word offset into the file
+.br
+\f3btail\f1: tail structure containing
+the total \f3count\f1 of leaf entries
+and \f3stale\f1 count of unused leaf entries
+.sp
+A data block contains the following fields:
+.br
+\f3dhdr\f1:
+header containing 
+\f3magic\f1 number 0x58443244 ('XD2D')
+and an array \f3bestfree\f1 of the longest 3 free spaces in the block
+(\f3offset\f1, \f3length\f1)
+.br
+\f3du\f1: array of union structures as for \f3bu\f1
+.sp
+Leaf blocks have two possible forms.
+If the Btree consists of a single leaf then the freespace information
+is in the leaf block,
+otherwise it is in separate blocks and the root of the Btree is
+a node block.
+A leaf block contains the following fields:
+.br
+\f3lhdr\f1: header containing
+a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xd2f1 for the single
+leaf case, 0xd2ff for the true Btree case),
+the total \f3count\f1 of leaf entries,
+and \f3stale\f1 count of unused leaf entries
+.br
+\f3lents\f1: leaf entries, as for \f3bleaf\f1
+.br
+\f3lbests\f1: [single leaf only]
+array of values which represent the longest freespace
+in each data block in the directory
+.br
+\f3ltail\f1: [single leaf only] tail structure containing
+\f3bestcount\f1 count of \f3lbests\f1
+.sp
+A node block is identical to that for types \f3attr\f1 and \f3dir\f1.
+.sp
+A freespace block contains the following fields:
+.br
+\f3fhdr\f1: header containing
+\f3magic\f1 number 0x58443246 ('XD2F'), 
+\f3firstdb\f1 first data block number covered by this freespace block,
+\f3nvalid\f1 number of valid entries,
+and \f3nused\f1 number of entries representing real data blocks
+.br
+\f3fbests\f1: array of values as for \f3lbests\f1
+.TP
+\f3dqblk\f1
+The quota information is stored in files referred to by the superblock
+\f3uquotino\f1 and \f3pquotino\f1 fields.
+Each filesystem block in a quota file contains a constant number of
+quota entries.
+The quota entry size is currently 136 bytes,
+so with a 4KB filesystem block size there are 30 quota entries per block.
+The \f3dquot\f1 command is used to locate these entries in the filesystem.
+The file entries are indexed by the user or project identifier
+to determine the block and offset.
+Each quota entry has the following fields:
+.br
+\f3magic\f1: magic number, 0x4451 ('DQ')
+.br
+\f3version\f1: version number, currently 1
+.br
+\f3flags\f1: flags, values include
+0x01 for user quota,
+0x02 for project quota
+.br
+\f3id\f1: user or project identifier
+.br
+\f3blk_hardlimit\f1: absolute limit on blocks in use
+.br
+\f3blk_softlimit\f1: preferred limit on blocks in use
+.br
+\f3ino_hardlimit\f1: absolute limit on inodes in use
+.br
+\f3ino_softlimit\f1: preferred limit on inodes in use
+.br
+\f3bcount\f1: blocks actually in use
+.br
+\f3icount\f1: inodes actually in use
+.br
+\f3itimer\f1: time when service will be refused if soft limit is violated
+for inodes
+.br
+\f3btimer\f1: time when service will be refused if soft limit is violated
+for blocks
+.br
+\f3iwarns\f1: number of warnings issued about inode limit violations
+.br
+\f3bwarns\f1: number of warnings issued about block limit violations
+.br
+\f3rtb_hardlimit\f1: absolute limit on realtime blocks in use
+.br
+\f3rtb_softlimit\f1: preferred limit on realtime blocks in use
+.br
+\f3rtbcount\f1: realtime blocks actually in use
+.br
+\f3rtbtimer\f1: time when service will be refused if soft limit is violated
+for realtime blocks
+.br
+\f3rtbwarns\f1: number of warnings issued about realtime block limit violations
+.TP
+\f3inobt\f1
+There is one set of filesystem blocks forming the inode allocation
+Btree for each allocation group.
+The root block of this Btree is designated by the \f3root\f1 field in the
+coresponding AGI block.
+The blocks are linked to sibling left and right blocks at each level,
+as well as by pointers from parent to child blocks.
+Each block has the following fields:
+.br
+\f3magic\f1: INOBT block magic number, 0x49414254 ('IABT')
+.br
+\f3level\f1: level number of this block, 0 is a leaf
+.br
+\f3numrecs\f1: number of data entries in the block
+.br
+\f3leftsib\f1: left (logically lower) sibling block, 0 if none
+.br
+\f3rightsib\f1: right (logically higher) sibling block, 0 if none
+.br
+\f3recs\f1: [leaf blocks only] array of inode records.
+Each record contains 
+\f3startino\f1 allocation-group relative inode number,
+\f3freecount\f1 count of free inodes in this chunk,
+and \f3free\f1 bitmap, LSB corresponds to inode 0
+.br
+\f3keys\f1: [nonleaf blocks only] array of key records.
+These are the first value of each block in the level below this one.
+Each record contains 
+\f3startino\f1
+.br
+\f3ptrs\f1: [nonleaf blocks only] array of child block pointers.
+Each pointer is a block number within the allocation group to the next level
+in the Btree
+.TP
+\f3inode\f1
+Inodes are allocated in ``chunks'' of 64 inodes each.
+Usually a chunk is multiple filesystem blocks, although there are cases
+with large filesystem blocks where a chunk is less than one block.
+The inode Btree (see \f3inobt\f1 above)
+refers to the inode numbers per allocation group.
+The inode numbers directly reflect the location of the inode block on disk.
+Use the \f3inode\f1 command to point \f2xfs_db\f1 to a specific inode.
+Each inode contains four regions:
+\f3core\f1,
+\f3next_unlinked\f1,
+\f3u\f1,
+and \f3a\f1.
+\f3core\f1 contains the fixed information.
+\f3next_unlinked\f1 is separated from the core due to
+journalling considerations, see type \f3agi\f1 field \f3unlinked\f1.
+\f3u\f1 is a union structure that is different in size and format depending
+on the type and representation of the file data (``data fork'').
+\f3a\f1 is an optional union structure to describe attribute data,
+that is different in size, format, and location depending on the presence
+and representation of attribute data, and the size of the \f3u\f1 data
+(``attribute fork'').
+\f2xfs_db\f1 automatically selects the proper union members based on
+information in the inode.
+.br
+The following are fields in the inode core:
+.br
+\f3magic\f1: inode magic number, 0x494e ('IN')
+.br
+\f3mode\f1: mode and type of file, as described in \f3chmod\f1(2),
+\f3mknod\f1(2), and \f3stat\f1(2)
+.br
+\f3version\f1: inode version, 1 or 2
+.br
+\f3format\f1: format of \f3u\f1 union data
+(0: dev_t,
+1: local file \- in-inode directory or symlink,
+2: extent list,
+3: Btree root,
+4: unique id [unused])
+.br
+\f3nlinkv1\f1: number of links to the file in a version 1 inode
+.br
+\f3nlinkv2\f1: number of links to the file in a version 2 inode
+.br
+\f3projid\f1: owner's project id (version 2 inode only)
+.br
+\f3uid\f1: owner's user id
+.br
+\f3gid\f1: owner's group id
+.br
+\f3atime\f1: time last accessed (seconds and nanoseconds)
+.br
+\f3mtime\f1: time last modified
+.br
+\f3ctime\f1: time created or inode last modified
+.br
+\f3size\f1: number of bytes in the file
+.br
+\f3nblocks\f1: total number of blocks in the file including
+indirect and attribute
+.br
+\f3extsize\f1: basic/minimum extent size for the file, used only for realtime
+.br
+\f3nextents\f1: number of extents in the data fork
+.br
+\f3naextents\f1: number of extents in the attribute fork
+.br
+\f3forkoff\f1: attribute fork offset in the inode,
+in 64-bit words from the start of \f3u\f1
+.br
+\f3aformat\f1: format of \f3a\f1 data
+(1: local attribute data,
+2: extent list,
+3: Btree root)
+.br
+\f3dmevmask\f1: DMAPI event mask
+.br
+\f3dmstate\f1: DMAPI state information
+.br
+\f3newrtbm\f1: file is the realtime bitmap and is ``new'' format
+.br
+\f3prealloc\f1: file has preallocated data space after EOF
+.br
+\f3realtime\f1: file data is in the realtime subvolume
+.br
+\f3gen\f1: inode generation number
+.sp
+The following fields are in the \f3u\f1 data fork union:
+.br
+\f3bmbt\f1: bmap Btree root.
+This looks like a \f3bmapbtd\f1 block with redundant information removed
+.br
+\f3bmx\f1: array of extent descriptors
+.br
+\f3dev\f1: dev_t for the block or character device
+.br
+\f3sfdir\f1: shortform (in-inode) version 1 directory.
+This consists of 
+a \f3hdr\f1 containing 
+the \f3parent\f1 inode number
+and a \f3count\f1 of active entries in the directory,
+followed by 
+an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries.
+Each such entry contains 
+\f3inumber\f1, 
+\f3namelen\f1,
+and \f3name\f1 string
+.br
+\f3sfdir2\f1: shortform (in-inode) version 2 directory.
+This consists of 
+a \f3hdr\f1 containing 
+a \f3count\f1 of active entries in the directory,
+an \f3i8count\f1 of entries with inumbers that don't fit in a 32-bit value,
+and the \f3parent\f1 inode number,
+followed by 
+an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries.
+Each such entry contains 
+\f3namelen\f1,
+a saved \f3offset\f1 used when the directory is converted to a larger form,
+a \f3name\f1 string,
+and the \f3inumber\f1
+.br
+\f3symlink\f1: symbolic link string value
+.sp
+The following fields are in the \f3a\f1 attribute fork union if it exists:
+.br
+\f3bmbt\f1: bmap Btree root, as above
+.br
+\f3bmx\f1: array of extent descriptors
+.br
+\f3sfattr\f1: shortform (in-inode) attribute values.
+This consists of
+a \f3hdr\f1 containing
+a \f3totsize\f1 (total size in bytes)
+and a \f3count\f1 of active entries,
+followed by
+an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries.
+Each such entry contains
+\f3namelen\f1,
+\f3valuelen\f1,
+\f3root\f1 flag,
+\f3name\f1,
+and \f3value\f1
+.TP
+\f3log\f1
+Log blocks contain the journal entries for XFS.
+It's not useful to examine these with \f2xfs_db\f1,
+use \f2xfs_logprint\f1(8) instead.
+.TP
+\f3rtbitmap\f1
+If the filesystem has a realtime subvolume, then the \f3rbmino\f1 field
+in the superblock refers to a file that contains the realtime bitmap.
+Each bit in the bitmap file controls the allocation of a single realtime extent
+(set == free).
+The bitmap is processed in 32-bit words,
+the LSB of a word is used for the first extent controlled by that bitmap word.
+The \f3atime\f1 field of the realtime bitmap inode contains a counter
+that is used to control where the next new realtime file will start.
+.TP
+\f3rtsummary\f1
+If the filesystem has a realtime subvolume,
+then the \f3rsumino\f1 field in the superblock refers to a file
+that contains the realtime summary data.
+The summary file contains a two-dimensional array of 16-bit values.
+Each value counts the number of free extent runs
+(consecutive free realtime extents)
+of a given range of sizes that starts in a given bitmap block.
+The size ranges are binary buckets (low size in the bucket is a power of 2).
+There are as many size ranges as are necessary given the size of the
+realtime subvolume.
+The first dimension is the size range,
+the second dimension is the starting bitmap block number
+(adjacent entries are for the same size, adjacent bitmap blocks).
+.TP
+\f3sb\f1
+There is one sb (superblock) structure per allocation group.
+It is the first disk block in the allocation group.
+Only the first one (block 0 of the filesystem) is actually used;
+the other blocks are redundant information for \f2xfs_repair\f1(8)
+to use if the first superblock is damaged.
+Fields defined:
+.br
+\f3magicnum\f1: superblock magic number, 0x58465342 ('XFSB')
+.br
+\f3blocksize\f1: filesystem block size in bytes
+.br
+\f3dblocks\f1: number of filesystem blocks present in the data subvolume
+.br
+\f3rblocks\f1: number of filesystem blocks present in the realtime subvolume
+.br
+\f3rextents\f1: number of realtime extents that \f3rblocks\f1 contain
+.br
+\f3uuid\f1: unique identifier of the filesystem
+.br
+\f3logstart\f1: starting filesystem block number of the log (journal).
+If this value is 0 the log is ``external''
+.br
+\f3rootino\f1: root inode number
+.br
+\f3rbmino\f1: realtime bitmap inode number
+.br
+\f3rsumino\f1: realtime summary data inode number
+.br
+\f3rextsize\f1: realtime extent size in filesystem blocks
+.br
+\f3agblocks\f1: size of an allocation group in filesystem blocks
+.br
+\f3agcount\f1: number of allocation groups
+.br
+\f3rbmblocks\f1: number of realtime bitmap blocks
+.br
+\f3logblocks\f1: number of log blocks (filesystem blocks)
+.br
+\f3versionnum\f1: filesystem version information.
+This value is currently 1, 2, 3, or 4 in the low 4 bits.
+If the low bits are 4 then the other bits have additional meanings.
+1 is the original value.
+2 means that attributes were used.
+3 means that version 2 inodes (large link counts) were used.
+4 is the bitmask version of the version number.
+In this case, the other bits are used as flags
+(0x0010: attributes were used,
+0x0020: version 2 inodes were used,
+0x0040: quotas were used,
+0x0080: inode cluster alignment is in force,
+0x0100: data stripe alignment is in force,
+0x0200: the \f3shared_vn\f1 field is used,
+0x1000: unwritten extent tracking is on,
+0x2000: version 2 directories are in use)
+.br
+\f3sectsize\f1: sector size in bytes, currently always 512.
+This is the size of the superblock and the other header blocks
+.br
+\f3inodesize\f1: inode size in bytes
+.br
+\f3inopblock\f1: number of inodes per filesystem block
+.br
+\f3fname\f1: obsolete, filesystem name
+.br
+\f3fpack\f1: obsolete, filesystem pack name
+.br
+\f3blocklog\f1: log2 of \f3blocksize\f1
+.br
+\f3sectlog\f1: log2 of \f3sectsize\f1
+.br
+\f3inodelog\f1: log2 of \f3inodesize\f1
+.br
+\f3inopblog\f1: log2 of \f3inopblock\f1
+.br
+\f3agblklog\f1: log2 of \f3agblocks\f1 (rounded up)
+.br
+\f3rextslog\f1: log2 of \f3rextents\f1
+.br
+\f3inprogress\f1: \f2mkfs.xfs\f1(8) aborted before completing this filesystem
+.br
+\f3imax_pct\f1: maximum percentage of filesystem space used for inode blocks
+.br
+\f3icount\f1: number of allocated inodes
+.br
+\f3ifree\f1: number of allocated inodes that are not in use
+.br
+\f3fdblocks\f1: number of free data blocks
+.br
+\f3frextents\f1: number of free realtime extents
+.br
+\f3uquotino\f1: user quota inode number
+.br
+\f3pquotino\f1: project quota inode number; this is currently unused
+.br
+\f3qflags\f1: quota status flags
+(0x01: user quota accounting is on,
+0x02: user quota limits are enforced,
+0x04: quotacheck has been run on user quotas,
+0x08: project quota accounting is on,
+0x10: project quota limits are enforced,
+0x20: quotacheck has been run on project quotas)
+.br
+\f3flags\f1: random flags.
+0x01: only read-only mounts are allowed
+.br
+\f3shared_vn\f1: shared version number (shared readonly filesystems)
+.br
+\f3inoalignmt\f1: inode chunk alignment in filesystem blocks
+.br
+\f3unit\f1: stripe or RAID unit
+.br
+\f3width\f1: stripe or RAID width
+.br
+\f3dirblklog\f1: log2 of directory block size (filesystem blocks)
+.TP
+\f3symlink\f1
+Symbolic link blocks are used only when the symbolic link value does
+not fit inside the inode.
+The block content is just the string value.
+Bytes past the logical end of the symbolic link value have arbitrary values.
+.SH DIAGNOSTICS
+Many messages can come from the \f3check\f1 (\f3blockget\f1) command;
+these are documented in \f2xfs_check\f1(8).
+.SH SEE ALSO
+mkfs.xfs(8),
+xfs_check(8),
+xfs_copy(8),
+xfs_logprint(8),
+xfs_ncheck(8),
+xfs_repair(8),
+chmod(2),
+mknod(2),
+stat(2),
+xfs(5).
diff --git a/man/man8/xfs_growfs.8 b/man/man8/xfs_growfs.8
new file mode 100644
index 000000000..5a2496c55
--- /dev/null
+++ b/man/man8/xfs_growfs.8
@@ -0,0 +1,135 @@
+.TH xfs_growfs 8
+.SH NAME
+xfs_growfs, xfs_info \- expand an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_growfs\f1 [ \f3\-dilnrxV\f1 ] [ \f3\-D\f1 size ] [ \f3\-e\f1 rtextsize ]
+		[ \f3\-L\f1 size ] [ \f3\-m\f1 maxpct ] [ \f3-t\f1 mtab ]
+		[ \f3\-R\f1 size ] mount-point
+\f3xfs_info\f1 [ \f3-t\f1 mtab ] mount-point
+.fi
+.SH DESCRIPTION
+.I xfs_growfs
+expands an existing XFS filesystem (see
+.IR xfs (5)).
+The
+.I mount-point
+argument is the pathname of the directory where the filesystem
+is mounted.
+The filesystem must be mounted to be grown (see
+.IR mount (8)).
+The existing contents of the filesystem are undisturbed, and the added space
+becomes available for additional file storage.
+.PP
+.I xfs_info
+is equivalent to invoking
+.I xfs_growfs
+with the
+.B \-n
+option (see discussion below).
+.PP
+The options to
+.I xfs_growfs
+are:
+.TP
+\f3\-d\f1, \f3\-D\f1 \f2size\f1
+Specifies that the data section of the filesystem should be grown.
+If the
+.B \-D
+.I size
+option is given, the data section is grown to that size, otherwise
+the data section is grown to the largest size possible.
+The size
+is expressed in
+filesystem blocks.
+.TP
+.B \-e
+Allows the real-time extent size to be specified.
+In
+.IR mkfs.xfs (8)
+this is specified with
+.B \-r
+.BI extsize= nnnn.
+.TP
+.B \-i
+The new log is an internal log
+(inside the data section).
+.TP
+\f3\-l\f1, \f3\-L\f1 \f2size\f1
+Specifies that the log section of the filesystem should be grown,
+shrunk, or moved.
+If the
+.B \-L
+.I size
+option is given, the log section is changed to be that size,
+if possible.
+The size is expressed in
+filesystem blocks.
+The size of an internal log must be smaller than the size
+of an allocation group (this value is printed at \f2mkfs\f1(8) time).
+If neither
+.B \-i
+nor
+.B \-x
+is given with
+.BR \-l ,
+the log continues to be internal or external as it was before.
+.TP
+.B \-m
+Specify a new value for the maximum percentage
+of space in the filesystem that can be allocated as inodes.
+In
+.I mkfs.xfs
+this is specified with
+.B -i
+.BI maxpct= nn.
+.TP
+.B \-n
+Specifies that no change to the filesystem is to be made.
+The filesystem geometry is printed, and argument checking is performed,
+but no growth occurs.
+.TP
+\f3\-r\f1, \f3\-R\f1 \f2size\f1
+Specifies that the real-time section of the filesystem should be grown.
+If the
+.B \-R
+.I size
+option is given, the real-time section is grown to that size, otherwise
+the real-time section is grown to the largest size possible.
+The size
+is expressed in
+filesystem blocks.
+The filesystem does not need to have contained a real-time section before
+the \f2xfs_growfs\f1 operation.
+.TP
+.B \-t
+Specifies an alternate mount table file (default is
+.IR /etc/mtab ).
+This is used when working with filesystems mounted without writing to
+.I /etc/mtab
+file - refer to
+.BR mount (8)
+for further details.
+.TP
+.PP
+.I xfs_growfs
+is most often used in conjunction with
+logical volumes
+(see
+.IR lvm (8)
+).
+However, it can also be used on a regular disk partition, for example if a
+partition has been enlarged while retaining the same starting block.
+.SH PRACTICAL USE
+Filesystems normally occupy all of the space on the device where they
+reside.
+In order to grow a filesystem, it is necessary to provide added
+space for it to occupy.
+Therefore there must be at least one spare new
+disk partition available.
+Adding the space is done through the mechanism of
+logical volumes.
+.SH SEE ALSO
+mkfs.xfs(8),
+lvm(8),
+mount(8).
diff --git a/man/man8/xfs_logprint.8 b/man/man8/xfs_logprint.8
new file mode 100644
index 000000000..15ddc18d4
--- /dev/null
+++ b/man/man8/xfs_logprint.8
@@ -0,0 +1,86 @@
+.TH xfs_logprint 8
+.SH NAME
+xfs_logprint \- print the log of an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_logprint\f1 [ options ] device-name
+\f3xfs_logprint \-f\f1 [ options ] filename
+.fi
+.SH DESCRIPTION
+.I xfs_logprint
+prints the log of an XFS filesystem (see
+.IR xfs (5)).
+The
+.I device-name
+argument is the pathname of the partition or logical volume
+containing the filesystem.
+The contents of the filesystem remain undisturbed.
+There are two major modes of operation in
+.IR xfs_logprint .
+.PP
+One mode is better for filesystem operation debugging.
+It is called the transactional view and is enabled through the \f3\-t\f1
+option.
+The transactional view prints only the portion of the log that
+pertains to recovery.
+In other words, it prints out complete transactions between the tail
+and the head.
+This view tries to display each transaction without
+regard to how they are split across log records.
+.PP
+The second mode starts printing out information from the beginning of the log.
+Some error blocks might print out in the beginning because the last log
+record usually overlaps the oldest log record.
+A message is
+printed when the physical end of the log is reached and when the
+logical end of the log is reached.
+A log record view is displayed
+one record at a time.
+Transactions that span log records may not be
+decoded fully.
+.PP
+Common options are:
+.TP
+\f3\-b\f1
+Extract and print buffer information.
+Only used in transactional view.
+.TP
+\f3\-D\f1
+Don't decode anything;
+just print data.
+.TP
+\f3\-e\f1
+Exit when an error is found in the log.
+Normally,
+.I xfs_logprint
+tries to continue and unwind from bad logs.
+However, sometimes it just dies in bad ways.
+Using this option prevents core dumps.
+.TP
+\f3\-f\f1
+The log is a file.
+.TP
+\f3\-i\f1
+Extract and print inode information.
+Only used in transactional view.
+.TP
+\f3\-q\f1
+Extract and print quota information.
+Only used in transactional view.
+.TP
+\f3\-n\f1
+Don't try and interpret log data;
+just interpret log header information.
+.TP
+\f3\-o\f1
+Also print buffer data in hex.
+Normally, buffer data is just decoded, so better information can be printed.
+.TP
+\f3\-s\f1 \f2start-block\f1
+Override any notion of where to start printing.
+.TP
+\f3\-t\f1
+Print out the transactional view.
+.SH SEE ALSO
+mkfs.xfs(8),
+mount(8).
diff --git a/man/man8/xfs_mkfile.8 b/man/man8/xfs_mkfile.8
new file mode 100644
index 000000000..2cc151741
--- /dev/null
+++ b/man/man8/xfs_mkfile.8
@@ -0,0 +1,27 @@
+.TH xfs_mkfile 8
+.SH NAME
+xfs_mkfile \- create an XFS file
+.SH SYNOPSIS
+.nf
+\f3xfs_mkfile\f1 [\f3\-v\f1] [\f3\-n\f1] \c
+\f2size\f1[\f3k\f1|\f3b\f1|\f3m\f1|\f3g\f1] \f2filename\f1...
+.fi
+.SH DESCRIPTION
+.I xfs_mkfile
+creates one or more files.
+The file is padded with zeroes by
+default.
+The default size is in bytes, but it can be
+flagged as kilobytes, blocks, megabytes, or gigabytes with the \f3k\f1,
+\f3b\f1, \f3m\f1, or \f3g\f1 suffixes, respectively.
+.SH OPTIONS
+.TP
+\f3\-v\f1
+Verbose.
+Report the names and sizes of created files.
+.TP
+\f3\-n\f1
+No bytes.
+Create a holey file - that is,
+do not write out any data, just
+seek to end of file and write a block.
diff --git a/man/man8/xfs_ncheck.8 b/man/man8/xfs_ncheck.8
new file mode 100644
index 000000000..201b43e28
--- /dev/null
+++ b/man/man8/xfs_ncheck.8
@@ -0,0 +1,53 @@
+.TH xfs_ncheck 8
+.SH NAME
+xfs_ncheck \- generate pathnames from i-numbers for XFS
+.SH SYNOPSIS
+.nf
+\f3xfs_ncheck\f1 [ \f3\-i\f1 ino ] ... \c
+[ \f3\-s\f1 ] xfs_special
+.sp .8v
+\f3xfs_ncheck\f1 \f3\-f\f1 [ \f3\-i\f1 ino ] ... \c
+[ \f3\-s\f1 ] file
+.fi
+.SH DESCRIPTION
+.I xfs_ncheck
+with no
+.B \-i
+arguments generates an inode number and pathname list of all
+files on the given filesystem.
+Names of directory files are followed by 
+.BR /. .
+The output is not sorted in any particular order.
+The filesystem to be examined is specified by the
+.I xfs_special
+argument, which should be the disk or volume device for the filesystem.
+Filesystems stored in files can also be checked, using the \f3\-f\f1 flag.
+.PP
+The options to \f2xfs_ncheck\f1 are:
+.TP 9
+.B \-f
+Specifies that the special device is actually a file (see the
+\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+This might happen if an image copy
+of a filesystem has been made into an ordinary file.
+.TP
+.B \-s
+Limits the report to special files and files with setuserid mode.
+This option may be used to detect violations of security policy.
+.TP
+.BI \-i " ino"
+Limits the report to only those files whose inode numbers follow.
+May be given multiple times to select multiple inode numbers.
+.PP
+If the filesystem is seriously corrupted, or very busy and looks
+like it is corrupt, a message of the form that would be generated by
+.IR xfs_check (8)
+may appear.
+.PP
+.I xfs_ncheck
+is only useful with XFS filesystems.
+.SH SEE ALSO
+mkfs.xfs(8),
+xfs_ncheck(8),
+xfs_check(8),
+xfs(5).
diff --git a/man/man8/xfs_repair.8 b/man/man8/xfs_repair.8
new file mode 100644
index 000000000..014620cce
--- /dev/null
+++ b/man/man8/xfs_repair.8
@@ -0,0 +1,353 @@
+.TH xfs_repair 8
+.SH NAME
+xfs_repair \- repair an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_repair\f1 [ \f3\-n\f1 ] [ \f3\-o\f1 subopt[=value] ] xfs_special
+.sp .8v
+\f3xfs_repair\f1 \f3\-f\f1 [ \f3\-n\f1 ] [ \f3\-o\f1 subopt[=value] ] ... file
+.fi
+.SH DESCRIPTION
+.I xfs_repair
+repairs corrupt or damaged XFS filesystems
+(see
+.IR xfs (5)).
+The filesystem is specified using the
+.I xfs_special
+argument which should be the device name of the
+disk partition or volume containing
+the filesystem.
+If given the name of a block device,
+.I xfs_repair
+will attempt to find the raw device associated
+with the specified block device and will use the raw device
+instead.
+.PP
+Regardless, the filesystem to be repaired
+must be unmounted,
+otherwise, the resulting filesystem may be inconsistent or corrupt.
+.PP
+The options to \f2xfs_repair\f1 are:
+.TP
+.B \-f
+Specifies that the special device is actually a file (see the
+\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+This might happen if an image copy
+of a filesystem has been copied or written into an ordinary file.
+.TP
+.B \-n
+No modify mode.
+Specifies that
+.I xfs_repair
+should not modify the filesystem but should only scan the
+filesystem and indicate what repairs would have been made.
+.TP
+.B \-o
+Override what the program might conclude about the filesystem
+if left to its own devices.
+.IP
+The
+.B assume_xfs
+suboption
+specifies that the filesystem is an XFS filesystem.
+Normally, if
+.I xfs_repair
+cannot find an XFS superblock, it checks to see if the
+filesystem is an EFS filesystem before it tries to
+regenerate the XFS superblock.
+If the
+.B assume_xfs
+option is in effect,
+.I xfs_repair
+will assume that the filesystem is an XFS filesystem and
+will ignore an EFS superblock if one is found.
+.SS Checks Performed
+Inconsistencies corrected include the following:
+.TP
+1.
+Inode and inode blockmap (addressing) checks:
+bad magic number in inode,
+bad magic numbers in inode blockmap blocks,
+extents out of order,
+incorrect number of records in inode blockmap blocks,
+blocks claimed that are not in a legal data area of the filesystem,
+blocks that are claimed by more than one inode.
+.TP
+2.
+Inode allocation map checks:
+bad magic number in inode map blocks,
+inode state as indicated by map (free or in-use) inconsistent
+with state indicated by the inode,
+inodes referenced by the filesystem that do not appear in
+the inode allocation map,
+inode allocation map referencing blocks that do not appear
+to contain inodes.
+.TP
+3.
+Size checks:
+number of blocks claimed by inode inconsistent with inode size,
+directory size not block aligned,
+inode size not consistent with inode format.
+.TP
+4.
+Directory checks:
+bad magic numbers in directory blocks,
+incorrect number of entries in a directory block,
+bad freespace information in a directory leaf block,
+entry pointing to an unallocated (free) or out
+of range inode,
+overlapping entries,
+missing or incorrect dot and dotdot entries,
+entries out of hashvalue order,
+incorrect internal directory pointers,
+directory type not consistent with inode format and size.
+.TP
+5.
+Pathname checks:
+files or directories not referenced by a pathname starting from
+the filesystem root,
+illegal pathname components.
+.TP
+6.
+Link count checks:
+link counts that do not agree with the number of
+directory references to the inode.
+.TP
+7.
+Freemap checks:
+blocks claimed free by the freemap but also claimed by an inode,
+blocks unclaimed by any inode but not appearing in the freemap.
+.TP
+8.
+Super Block checks:
+total free block and/or free i-node count incorrect,
+filesystem geometry inconsistent,
+secondary and primary superblocks contradictory.
+.PP
+Orphaned files and directories (allocated, in-use but unreferenced) are
+reconnected by placing them in the
+.I lost+found
+directory.
+The name assigned is the inode number.
+.SS Disk Errors
+.I xfs_repair
+aborts on most disk I/O errors.
+Therefore, if you are trying
+to repair a filesystem that was damaged due to a disk drive failure,
+steps should be taken to ensure that
+all blocks in the filesystem are readable and writeable
+before attempting to use
+.I xfs_repair
+to repair the filesystem.
+A possible method is using
+.IR dd (8)
+to copy the data onto a good disk.
+.SS lost+found
+The directory
+.I lost+found
+does not have to already exist in the filesystem being repaired.
+If the directory does not exist, it is automatically created.
+If the \f2lost+found\f1 directory already exists,
+the \f2lost+found\f1
+directory is deleted and recreated every time \f2xfs_repair\f1
+runs.
+This ensures that there are no name conflicts in \f2lost+found\f1.
+However, if you rename a file in \f2lost+found\f1 and leave it there,
+if \f2xfs_repair\f1 is run again, that file is renamed back to
+its inode number.
+.SS Corrupted Superblocks
+XFS has both primary and secondary superblocks.
+\f2xfs_repair\f1 uses information in the primary superblock
+to automatically find and validate the primary superblock
+against the secondary superblocks before proceeding.
+Should the primary be too corrupted to be useful in locating
+the secondary superblocks, the program scans the filesystem
+until it finds and validates some secondary superblocks.
+At that point, it generates a primary superblock.
+.SS Quotas
+If quotas are in use, it is possible that \f2xfs_repair\f1 will clear
+some or all of the filesystem quota information.
+If so, the program issues a warning just before it terminates.
+If all quota information is lost, quotas are disabled and the
+program issues a warning to that effect.
+.PP
+Note that \f2xfs_repair\f1 does not check the validity of quota limits.
+It is recommended that you check the quota limit information manually
+after \f2xfs_repair\f1.
+Also, space usage information is automatically regenerated the
+next time the filesystem is mounted with quotas turned on, so the
+next quota mount of the filesystem may take some time.
+.SH DIAGNOSTICS
+.I xfs_repair
+issues informative messages as it proceeds
+indicating what it has found that is abnormal or any corrective
+action that it has taken.
+Most of the messages are completely understandable only to those
+who are knowledgeable about the structure of the filesystem.
+Some of the more common messages are explained here.
+Note that the language of the messages is slightly different
+if \f2xfs_repair\f1 is run in no-modify mode because the program is not
+changing anything on disk.
+No-modify mode indicates what it would do to repair the filesystem
+if run without the no-modify flag.
+.PP
+disconnected inode \f3xxxx\f1, moving to \f2lost+found\f1
+.IP
+An inode numbered
+.B xxxx
+was not connected to the filesystem
+directory tree and was reconnected to the \f2lost+found\f1 directory.
+The inode is assigned the name of its inode number (i-number).
+If a \f2lost+found\f1 directory does not exist, it is automatically
+created.
+.PP
+disconnected dir inode \f3xxxx\f1, moving to \f2lost+found\f1
+.IP
+As above only the inode is a directory inode.
+If a directory inode is attached to \f2lost+found\f1, all of its
+children (if any) stay attached to the directory and therefore
+get automatically reconnected when the directory is reconnected.
+.PP
+imap claims in-use inode \f3xxxx\f1 is free, correcting imap
+.IP
+The inode allocation map thinks that inode \f3xxxx\f1 is
+free whereas examination of the inode indicates that the
+inode may be in use (although it may be disconnected).
+The program updates the inode allocation map.
+.PP
+imap claims free inode \f3xxxx\f1 is in use, correcting imap
+.IP
+The inode allocation map thinks that inode \f3xxxx\f1 is
+in use whereas examination of the inode indicates that the
+inode is not in use and therefore is free.
+The program updates the inode allocation map.
+.PP
+resetting inode \f3xxxx\f1 nlinks from \f3x\f1 to \f3y\f1
+.IP
+The program detected a mismatch between the
+number of valid directory entries referencing inode \f3xxxx\f1
+and the number of references recorded in the inode and corrected the
+the number in the inode.
+.PP
+\f3fork-type\f1 fork in ino \f3xxxx\f1 claims used block \f3yyyy\f1
+.IP
+Inode \f3xxxx\f1 claims a block \f3yyyy\f1 that is used (claimed)
+by either another inode or the filesystem itself for metadata storage.
+The \f3fork-type\f1 is either \f3data\f1 or \f3attr\f1
+indicating whether the problem lies in the portion of the
+inode that tracks regular data or the portion of the inode
+that stores XFS attributes.
+If the inode is a real-time (rt) inode, the message says so.
+Any inode that claims blocks used by the filesystem is deleted.
+If two or more inodes claim the same block, they are both deleted.
+.PP
+\f3fork-type\f1 fork in ino \f3xxxx\f1 claims dup extent ...
+.IP
+Inode \f3xxxx\f1 claims a block in an extent known to be
+claimed more than once.
+The offset in the inode, start and length of the extent is given.
+The message is slightly different
+if the inode is a real-time (rt) inode and the extent is therefore
+a real-time (rt) extent.
+.PP
+inode \f3xxxx\f1 - bad extent ...
+.IP
+An extent record in the blockmap of inode \f3xxxx\f1 claims
+blocks that are out of the legal range of the filesystem.
+The message supplies the start, end, and file offset of
+the extent.
+The message is slightly different
+if the extent is a real-time (rt) exent.
+.PP
+bad \f3fork-type\f1 fork in inode \f3xxxx\f1
+.IP
+There was something structurally wrong or inconsistent with the
+data structures that map offsets to filesystem blocks.
+.PP
+cleared inode \f3xxxx\f1
+.IP
+There was something wrong with the inode that
+was uncorrectable so the program freed the inode.
+This usually happens because the inode claims
+blocks that are used by something else or the inode itself
+is badly corrupted.
+Typically, this message
+is preceded by one or more messages indicating why the
+inode needed to be cleared.
+.PP
+bad attribute fork in inode \f3xxxx\f1, clearing attr fork
+.IP
+There was something wrong with the portion of the inode that
+stores XFS attributes (the attribute fork) so the program reset
+the attribute fork.
+As a result of this, all attributes on that inode are lost.
+.PP
+correcting nextents for inode \f3xxxx\f1, was \f3x\f1 - counted \f3y\f1
+.IP
+The program found that the number of extents used to store
+the data in the inode is wrong and corrected the number.
+The message refers to nextents if the count is wrong
+on the number of extents used to store attribute information.
+.PP
+entry \f3"name"\f1 in dir \f3xxxx\f1 not consistent
+with ..
+value (\f3yyyy\f1) in dir ino \f3xxxx\f1,
+junking entry \f3"name"\f1 in directory inode \f3xxxx\f1
+.IP
+The entry \f3"name"\f1 in directory inode \f3xxxx\f1 references a
+directory inode \f3yyyy\f1.
+However, the ..\& entry in directory \f3yyyy\f1 does not point
+back to directory \f3xxxx\f1,
+so the program deletes the entry \f3"name"\f1 in directory inode
+\f3xxxx\f1.
+If the directory inode \f3yyyy\f1 winds up becoming a disconnected
+inode as a result of this, it is moved to \f2lost+found\f1 later.
+.PP
+entry \f3"name"\f1 in dir \f3xxxx\f1 references already
+connected dir ino \f3yyyy\f1,
+junking entry \f3"name"\f1 in directory inode \f3xxxx\f1
+.IP
+The entry \f3"name"\f1 in directory inode \f3xxxx\f1 points to a
+directory inode \f3yyyy\f1 that is known to be a child of another
+directory.
+Therefore, the entry is invalid and is deleted.
+This message refers to an entry in a small directory.
+If this were a large directory, the last phrase would read
+"will clear entry".
+.PP
+entry references free inode \f3xxxx\f1 in directory \f3yyyy\f1,
+will clear entry
+.IP
+An entry in directory inode \f3yyyy\f1 references an inode \f3xxxx\f1
+that is known to be free.
+The entry is therefore invalid and is deleted.
+This message refers to a large directory.
+If the directory were small, the message would read "junking entry ...".
+.SH EXIT STATUS
+.I xfs_repair -n
+(no modify node)
+will return a status of 1 if filesystem corruption was detected and
+0 if no filesystem corruption was detected.
+.I xfs_repair
+run without the -n option will always return a status code of 0.
+.SH BUGS
+.I xfs_repair
+does not do a thorough job on XFS extended attributes.
+The structure of the attribute fork will be consistent,
+but only the contents of attribute forks that will fit into
+an inode are checked.
+This limitation will be fixed in the future.
+.PP
+The no-modify mode (\f3\-n\f1 option) is not completely
+accurate.
+It does not catch inconsistencies in the freespace and inode
+maps, particularly lost blocks or subtly corrupted maps (trees).
+.PP
+The no-modify mode can generate repeated warnings about
+the same problems because it cannot fix the problems as they
+are encountered.
+.SH SEE ALSO
+dd(1),
+mkfs.xfs(8),
+xfs_check(8),
+xfs(5).
diff --git a/mkfile/Makefile b/mkfile/Makefile
new file mode 100644
index 000000000..fc274e821
--- /dev/null
+++ b/mkfile/Makefile
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_mkfile
+CFILES = xfs_mkfile.c
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+	$(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
diff --git a/mkfile/xfs_mkfile.c b/mkfile/xfs_mkfile.c
new file mode 100644
index 000000000..f880d932c
--- /dev/null
+++ b/mkfile/xfs_mkfile.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/* 
+ * Make file utility for xfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <malloc.h>
+#include <errno.h>
+#include <libxfs.h>
+
+#undef O_DIRECT
+#define O_DIRECT 0	/* nathans TODO - remove this when direct IO done */
+
+#define	MAXBUFFERSIZE	(256 * 1024)
+
+static void usage(void);
+
+int
+main(int argc, char **argv)
+{
+	int fd;
+	loff_t result;
+	loff_t size = 0;
+	loff_t mult = 0;
+	int bytes = 0;
+	loff_t wrote = 0;
+	int len = 0;
+	int c;
+	int errflg = 0;
+	int errs = 0;
+	int nobytes = 0;
+	int prealloc = 0;
+	int verbose = 0;
+	struct dioattr da;
+	char *progname;
+	void *buf = NULL;
+	int buflen = 0, nbuflen;
+	int bufalign = 0, nbufalign, bufmin;
+	int oflags;
+	xfs_flock64_t flck;
+
+	progname = basename(argv[0]);
+	while ((c = getopt(argc, argv, "npvV")) != EOF) {
+		switch(c) {
+			case 'n':
+				nobytes++;
+				break;
+			case 'p':
+				prealloc++;
+				break;
+			case 'v':
+				verbose++;
+				break;
+			case 'V':
+				printf("%s version %s\n", progname, VERSION);
+				break;
+			default:
+				errflg++;
+				break;
+		}
+	}
+
+	if (argc < optind + 2 || errflg)
+		usage();
+
+	mult = 1;
+
+	len = strlen(argv[optind]);
+
+	if (isalpha(argv[optind][len-1])) {
+		switch (argv[optind][len-1]) {
+		case 'k':
+		case 'K':
+			mult = 1024;
+			break;
+		case 'b':
+		case 'B':
+			mult = 512;
+			break;
+		case 'm':
+		case 'M':
+			mult  = 1024;
+			mult *= 1024;
+			break;
+		case 'g':
+		case 'G':
+			mult  = 1024;
+			mult *= 1024;
+			mult *= 1024;
+			break;
+		default:
+			fprintf(stderr, "unknown size %s\n", argv[optind]);
+			usage();
+		}
+
+		argv[optind][len-1] = '\0';
+	}
+
+	size = atoll(argv[optind]) * mult;
+
+	optind++;
+
+	while (optind < argc) {
+		if (verbose)
+			fprintf(stdout, "%s %lld bytes %s\n",
+						argv[optind], size,
+						prealloc
+						  ? "(pre-allocated)"
+						  : "");
+
+		oflags = O_CREAT|O_TRUNC|O_WRONLY|(nobytes ? 0 : O_DIRECT);
+
+		fd = open(argv[optind], oflags, 0600);
+
+		if (   (oflags & O_DIRECT)
+		    && (   (fd < 0 && errno == EINVAL)
+			|| ioctl(fd, XFS_IOC_DIOINFO, &da) < 0)) {
+
+			close(fd);
+
+			oflags &= ~O_DIRECT;
+
+			fd = open(argv[optind], oflags, 0600);
+		}
+
+		if (fd < 0) {
+			perror(argv[optind]);
+			optind++;
+			errs++;
+			continue;
+		}
+
+		if (size == 0) {
+			close(fd);
+			optind++;
+			continue;
+		}
+
+		if ((result = lseek64(fd, size - 1, SEEK_SET)) < 0LL) {
+			/*
+			 * This check doesn't actually work for 6.2
+			 * efs and nfs2, although it should.
+			 */
+			fprintf(stderr,
+				"lseek64 error, result = %lld\n", result);
+			if (errno)
+				perror(argv[optind]);
+			errs++;
+		} else if (nobytes) {
+			if (write(fd, "", 1) < 0) {
+				perror(argv[optind]);
+				errs++;
+			}
+		} else {
+			flck.l_whence = SEEK_SET;
+			flck.l_start  = 0LL;
+			flck.l_len    = size;
+#if 0
+			(void)ioctl(fd, XFS_IOC_RESVSP64, &flck);
+
+			if (prealloc) {
+				if ( close(fd) < 0 ) {
+					perror(argv[optind]);
+					unlink(argv[optind]);
+					errs++;
+				}
+
+				optind++;
+
+				continue;
+			}
+#endif
+			if (oflags & O_DIRECT) {
+				nbufalign = da.d_mem;
+
+				if (   da.d_miniosz <= MAXBUFFERSIZE
+				    && MAXBUFFERSIZE <= da.d_maxiosz)
+					nbuflen = MAXBUFFERSIZE;
+				else if (da.d_maxiosz < MAXBUFFERSIZE)
+					nbuflen = da.d_maxiosz;
+				else
+					nbuflen = da.d_miniosz;
+
+				bufmin = da.d_miniosz;
+			} else {
+				nbuflen = MAXBUFFERSIZE;
+				nbufalign = sizeof(long);
+				bufmin = 0;
+			}
+
+			if (nbuflen > buflen || nbufalign > bufalign) {
+				if (buf)
+					free(buf);
+				buf = memalign(nbufalign, nbuflen);
+				buflen = nbuflen;
+				bzero(buf, nbuflen);
+				nbufalign = bufalign;
+			}
+
+			wrote = 0;
+
+			lseek64(fd, 0LL, SEEK_SET);
+
+			while (wrote < size) {
+				if (size - wrote >= buflen)
+					bytes = buflen;
+				else if (bufmin)
+					bytes = roundup(size - wrote, bufmin);
+				else
+					bytes = size - wrote;
+
+				len = write(fd, buf, bytes);
+
+				if (len < 0) {
+					perror(argv[optind]);
+					unlink(argv[optind]);
+					errs++;
+					break;
+				}
+
+				wrote += len;
+			}
+
+			if (wrote > size && ftruncate64(fd, size) < 0) {
+				perror(argv[optind]);
+				unlink(argv[optind]);
+				errs++;
+			}
+		}
+
+		if ( close(fd) < 0 ) {
+			perror(argv[optind]);
+			unlink(argv[optind]);
+			errs++;
+		}
+
+		optind++;
+	}
+
+	return errs != 0;
+}
+
+static void
+usage(void)
+{
+	fprintf(stderr, "mkfile: [-npv] <size> <name1> [<name2>] ...\n");
+	exit(2);
+}
diff --git a/mkfs/Makefile b/mkfs/Makefile
new file mode 100644
index 000000000..d6f813d13
--- /dev/null
+++ b/mkfs/Makefile
@@ -0,0 +1,59 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = mkfs.xfs
+CMDDEPS	= $(LIBXFS)
+MAXTRRES = maxtrres
+
+CFILES = xfs_mkfs.c mountinfo.c proto.c
+HFILES = xfs_mkfs.h mountinfo.h proto.h volume.h
+LLDLIBS = $(LIBXFS) $(LIBUUID) $(LIBLVM)
+MAXTRLIBS = $(LIBXFS) $(LIBUUID)
+LSRCFILES = $(MAXTRRES).c
+LDIRT = $(MAXTRRES) $(MAXTRRES).h
+
+default: $(MAXTRRES).h $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR)
+	$(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR)
+
+$(MAXTRRES):
+	$(CCF) $@.c -o $@ $(LDFLAGS) $(MAXTRLIBS)
+
+$(MAXTRRES).h: $(MAXTRRES)
+	./$(MAXTRRES) > $@ || ( rm -f $@ && exit 1 )
diff --git a/mkfs/maxtrres.c b/mkfs/maxtrres.c
new file mode 100644
index 000000000..638d945d4
--- /dev/null
+++ b/mkfs/maxtrres.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * maxtrres
+ * 
+ * Compute the maximum transaction reservation for every legal
+ * combination of block size, inode size, directory version, 
+ * and directory block size.
+ * Generates a table compiled into mkfs, to control the default
+ * and minimum log sizes.
+ */
+
+#include <libxfs.h>
+#include "xfs_mkfs.h"
+
+xfs_trans_reservations_t tr_count = {
+	XFS_WRITE_LOG_COUNT,		/* extent alloc trans */
+	XFS_ITRUNCATE_LOG_COUNT,	/* truncate trans */
+	XFS_RENAME_LOG_COUNT,		/* rename trans */
+	XFS_LINK_LOG_COUNT,		/* link trans */
+	XFS_REMOVE_LOG_COUNT,		/* unlink trans */
+	XFS_SYMLINK_LOG_COUNT,		/* symlink trans */
+	XFS_CREATE_LOG_COUNT,		/* create trans */
+	XFS_MKDIR_LOG_COUNT,		/* mkdir trans */
+	XFS_DEFAULT_LOG_COUNT,		/* inode free trans */
+	XFS_DEFAULT_LOG_COUNT,		/* inode update trans */
+	XFS_DEFAULT_LOG_COUNT,		/* fs data section grow trans */
+	XFS_DEFAULT_LOG_COUNT,		/* sync write inode trans */
+	XFS_ADDAFORK_LOG_COUNT,		/* cvt inode to attributed trans */
+	XFS_DEFAULT_LOG_COUNT,		/* write setuid/setgid file */
+	XFS_ATTRINVAL_LOG_COUNT,	/* attr fork buffer invalidation */
+	XFS_ATTRSET_LOG_COUNT,		/* set/create an attribute */
+	XFS_ATTRRM_LOG_COUNT,		/* remove an attribute */
+	XFS_DEFAULT_LOG_COUNT,		/* clear bad agi unlinked ino bucket */
+	XFS_DEFAULT_PERM_LOG_COUNT,	/* grow realtime allocations */
+	XFS_DEFAULT_LOG_COUNT,		/* grow realtime zeroing */
+	XFS_DEFAULT_LOG_COUNT,		/* grow realtime freeing */
+};
+
+static int
+max_trans_res(
+	xfs_mount_t			*mp,
+	int				*mul)
+{
+	uint				*p;
+	uint				*q;
+	int				rval;
+	xfs_trans_reservations_t	*tr;
+	xfs_da_args_t 			args;
+	int				local;
+	int				size;
+	int				nblks;
+	int				res;
+
+	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
+
+	/*
+	 * Fill in the arg structure for this request.
+	 */
+	bzero(&args, sizeof(args));
+	args.name = NULL;
+	args.namelen = MAXNAMELEN;
+	args.value = NULL;
+	args.valuelen = 65536;
+	args.flags = 0;
+	args.hashval = 0;
+	args.dp = NULL;
+	args.firstblock = NULL;
+	args.flist = NULL;
+	args.whichfork = XFS_ATTR_FORK;
+	args.oknoent = 1;
+
+	/*
+	 * Determine space new attribute will use, and if it will be
+	 * inline or out of line.
+	 */
+	size = libxfs_attr_leaf_newentsize(
+			&args, mp->m_sb.sb_blocksize, &local);
+
+	if (local) {
+		printf("Uh-oh.. attribute is local\n");
+	} else {
+		/* Out of line attribute, cannot double split, but make
+		 * room for the attribute value itself.
+		 */
+		nblks += XFS_B_TO_FSB(mp, size);
+		nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
+	}
+	res = XFS_ATTRSET_LOG_RES(mp, nblks);
+#if 0
+	printf("size = %d nblks = %d res = %d\n", size, nblks, res);
+#endif
+	mp->m_reservations.tr_attrset = res;
+
+	for (rval = 0, tr = &mp->m_reservations, p = (uint *)tr,
+	     q = (uint *)&tr_count;
+	     p < (uint *)(tr + 1);
+	     p++, q++) {
+		if ((int)*p > rval) {
+			rval = (int)*p;
+			*mul = (int)*q;
+		}
+	}
+	return rval;
+}
+
+int
+main(int argc, char **argv)
+{
+	int		bl;
+	int		dl;
+	int		dv;
+	int		i;
+	int		il;
+	xfs_mount_t	m;
+	xfs_sb_t	*sbp;
+	int		mul;
+
+	progname = basename(argv[0]);
+	if (argc > 1) {
+		fprintf(stderr, "Usage: %s\n", progname);
+		return 1;
+	}
+	memset(&m, 0, sizeof(m));
+	sbp = &m.m_sb;
+	sbp->sb_magicnum = XFS_SB_MAGIC;
+	sbp->sb_sectlog = 9;
+	sbp->sb_sectsize = 1 << sbp->sb_sectlog;
+	for (bl = XFS_MIN_BLOCKSIZE_LOG; bl <= XFS_MAX_BLOCKSIZE_LOG; bl++) {
+		sbp->sb_blocklog = bl;
+		sbp->sb_blocksize = 1 << bl;
+		sbp->sb_agblocks = XFS_AG_MIN_BYTES / (1 << bl);
+		for (il = XFS_DINODE_MIN_LOG; il <= XFS_DINODE_MAX_LOG; il++) {
+			if ((1 << il) > (1 << bl) / XFS_MIN_INODE_PERBLOCK)
+				continue;
+			sbp->sb_inodelog = il;
+			sbp->sb_inopblog = bl - il;
+			sbp->sb_inodesize = 1 << il;
+			sbp->sb_inopblock = 1 << (bl - il);
+			for (dl = bl; dl <= XFS_MAX_BLOCKSIZE_LOG; dl++) {
+				sbp->sb_dirblklog = dl - bl;
+				for (dv = 1; dv <= 2; dv++) {
+					if (dv == 1 && dl != bl)
+						continue;
+					sbp->sb_versionnum =
+						XFS_SB_VERSION_4 |
+						(dv == 2 ?
+						    XFS_SB_VERSION_DIRV2BIT :
+						    0);
+					libxfs_mount(&m, sbp, 0, 0, 0, 0);
+					i = max_trans_res(&m, &mul);
+					printf(
+				"#define\tMAXTRRES_B%d_I%d_D%d_V%d\t%lld\t"
+				"/* LOG_FACTOR %d */\n",
+						bl, il, dl, dv,
+						XFS_B_TO_FSB(&m, i), mul);
+					libxfs_umount(&m);
+				}
+			}
+		}
+	}
+	return 0;
+}
diff --git a/mkfs/proto.c b/mkfs/proto.c
new file mode 100644
index 000000000..8570d140d
--- /dev/null
+++ b/mkfs/proto.c
@@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "proto.h"
+
+/*
+ * Prototypes for internal functions.
+ */
+extern long long cvtnum(int blocksize, char *s);
+extern void parseproto(xfs_mount_t *mp, xfs_inode_t *pip, char **pp,
+	char *name); 
+static long getnum(char **pp);
+static char *getstr(char **pp);
+static void fail(char *msg, int i);
+static void getres(xfs_trans_t *tp, uint blocks);
+static void rsvfile(xfs_mount_t *mp, xfs_inode_t *ip, long long len);
+static int newfile(xfs_trans_t *tp, xfs_inode_t *ip, xfs_bmap_free_t *flist,
+	xfs_fsblock_t *first, int dolocal, int logit, char *buf, int len);
+static char *newregfile(char **pp, int *len); 
+static void rtinit(xfs_mount_t *mp);
+static long filesize(int fd);
+
+/*
+ * Use this for block reservations needed for mkfs's conditions
+ * (basically no fragmentation).
+ */
+#define	MKFS_BLOCKRES_INODE	\
+	((uint)(XFS_IALLOC_BLOCKS(mp) + (XFS_IN_MAXLEVELS(mp) - 1)))
+#define	MKFS_BLOCKRES(rb)	\
+	((uint)(MKFS_BLOCKRES_INODE + XFS_DA_NODE_MAXDEPTH + \
+	(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1) + (rb)))
+
+
+char *
+setup_proto(
+	char	*fname)
+{
+	char		*buf;
+	static char	dflt[] = "d--755 0 0 $";
+	int		fd;
+	long		size;
+
+	if (!fname)
+		return dflt;
+	if ((fd = open(fname, O_RDONLY)) < 0 || (size = filesize(fd)) < 0) {
+		fprintf(stderr, "%s: failed to open %s: %s\n",
+			progname, fname, strerror(errno));
+		exit(1);
+	}
+	buf = malloc(size + 1);
+	if (read(fd, buf, size) < size) {
+		fprintf(stderr, "%s: read failed on %s: %s\n",
+			progname, fname, strerror(errno));
+		exit(1);
+	}
+	if (buf[size - 1] != '\n') {
+		fprintf(stderr, "%s: proto file %s premature EOF\n",
+			progname, fname);
+		exit(1);
+	}
+	buf[size] = '\0';
+	/*
+	 * Skip past the stuff there for compatibility, a string and 2 numbers.
+	 */
+	(void)getstr(&buf);	/* boot image name */
+	(void)getnum(&buf);	/* block count */
+	(void)getnum(&buf);	/* inode count */
+	return buf;
+}
+
+static long
+getnum(
+	char	**pp)
+{
+	char	*s;
+
+	s = getstr(pp);
+	return atol(s);
+}
+
+static void
+fail(
+	char	*msg,
+	int	i)
+{
+	fprintf(stderr, "%s: %s %d\n", progname, msg, i);
+	ASSERT(0);
+	exit(1);
+}
+
+static void
+getres(
+	xfs_trans_t	*tp,
+	uint		blocks)
+{
+	int		i;
+	xfs_mount_t	*mp;
+	uint		r;
+
+	mp = tp->t_mountp;
+	for (i = 0, r = MKFS_BLOCKRES(blocks); r >= blocks; r--) {
+		i = libxfs_trans_reserve(tp, r, 0, 0, 0, 0);
+		if (i == 0)
+			return;
+	}
+	res_failed(i);
+	/* NOTREACHED */
+}
+
+static char *
+getstr(
+	char	**pp)
+{
+	int	c;
+	char	*p;
+	char	*rval;
+
+	p = *pp;
+	while (c = *p) {
+		switch (c) {
+		case ' ':
+		case '\t':
+		case '\n':
+			p++;
+			continue;
+		case ':':
+			p++;
+			while (*p++ != '\n')
+				;
+			continue;
+		default:
+			rval = p;
+			while (c != ' ' && c != '\t' && c != '\n' && c != '\0')
+				c = *++p;
+			*p++ = '\0';
+			*pp = p;
+			return rval;
+		}
+	}
+	if (!c) {
+		fprintf(stderr, "%s: premature EOF in prototype file\n",
+			progname);
+		exit(1);
+	}
+	return NULL;
+}
+
+static void
+rsvfile(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	long long	llen)
+{
+	int		error;
+	xfs_trans_t	*tp;
+
+	error = libxfs_alloc_file_space(ip, 0, llen, 1, 0);
+
+	if (error) {
+		fail("error reserving space for a file", error);
+		exit(1);
+	}
+
+	/*
+	 * update the inode timestamp, mode, and prealloc flag bits
+	 */
+	tp = libxfs_trans_alloc(mp, 0);
+
+	libxfs_trans_ijoin(tp, ip, 0);
+	libxfs_trans_ihold(tp, ip);
+
+	ip->i_d.di_mode &= ~ISUID;
+
+	/*
+	 * Note that we don't have to worry about mandatory
+	 * file locking being disabled here because we only
+	 * clear the ISGID bit if the Group execute bit is
+	 * on, but if it was on then mandatory locking wouldn't
+	 * have been enabled.
+	 */
+	if (ip->i_d.di_mode & (IEXEC >> 3))
+		ip->i_d.di_mode &= ~ISGID;
+
+	libxfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+	ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
+
+	libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	libxfs_trans_commit(tp, 0, NULL);
+}
+
+static int
+newfile(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	xfs_bmap_free_t	*flist,
+	xfs_fsblock_t	*first,
+	int		dolocal,
+	int		logit,
+	char		*buf,
+	int		len)
+{
+	xfs_buf_t	*bp;
+	xfs_daddr_t	d;
+	int		error;
+	int		flags;
+	xfs_bmbt_irec_t	map;
+	xfs_mount_t	*mp;
+	xfs_extlen_t	nb;
+	int		nmap;
+
+	flags = 0;
+	mp = ip->i_mount;
+	if (dolocal && len <= XFS_IFORK_DSIZE(ip)) {
+		libxfs_idata_realloc(ip, len, XFS_DATA_FORK);
+		if (buf)
+			bcopy(buf, ip->i_df.if_u1.if_data, len);
+		ip->i_d.di_size = len;
+		ip->i_df.if_flags &= ~XFS_IFEXTENTS;
+		ip->i_df.if_flags |= XFS_IFINLINE;
+		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+		flags = XFS_ILOG_DDATA;
+	} else if (len > 0) {
+		nb = XFS_B_TO_FSB(mp, len);
+		nmap = 1;
+		error = libxfs_bmapi(tp, ip, 0, nb, XFS_BMAPI_WRITE, first, nb,
+				&map, &nmap, flist);
+		if (error) {
+			fail("error allocating space for a file", error);
+		}
+		if (nmap != 1) {
+			fprintf(stderr, "%s: cannot allocate space for file\n",
+				progname);
+			exit(1);
+		}
+		d = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+		bp = libxfs_trans_get_buf(logit ? tp : 0, mp->m_dev, d,
+			nb << mp->m_blkbb_log, 0);
+		bcopy(buf, XFS_BUF_PTR(bp), len);
+		if (len < XFS_BUF_COUNT(bp))
+			bzero(XFS_BUF_PTR(bp) + len, XFS_BUF_COUNT(bp) - len);
+		if (logit)
+			libxfs_trans_log_buf(tp, bp, 0, XFS_BUF_COUNT(bp) - 1);
+		else
+			libxfs_writebuf(bp, 1);
+	}
+	ip->i_d.di_size = len;
+	return flags;
+}
+
+static char *
+newregfile(
+	char		**pp,
+	int		*len)
+{
+	char		*buf;
+	int		fd;
+	char		*fname;
+	long		size;
+
+	fname = getstr(pp);
+	if ((fd = open(fname, O_RDONLY)) < 0 || (size = filesize(fd)) < 0) {
+		fprintf(stderr, "%s: cannot open %s: %s\n",
+			progname, fname, strerror(errno));
+		exit(1);
+	}
+	if (*len = (int)size) {
+		buf = malloc(size);
+		if (read(fd, buf, size) < size) {
+			fprintf(stderr, "%s: read failed on %s: %s\n",
+				progname, fname, strerror(errno));
+			exit(1);
+		}
+	} else
+		buf = 0;
+	close(fd);
+	return buf;
+}
+
+static void
+newdirent(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_inode_t	*pip,
+	char		*name,
+	int		namelen,
+	xfs_ino_t	inum,
+	xfs_fsblock_t	*first,
+	xfs_bmap_free_t	*flist,
+	xfs_extlen_t	total)
+{
+	int	error;
+
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		error = libxfs_dir2_createname(tp, pip, name, namelen,
+						inum, first, flist, total);
+	else
+		error = libxfs_dir_createname(tp, pip, name, namelen,
+						inum, first, flist, total);
+	if (error)
+		fail("directory createname error", error);
+}
+
+static void
+newdirectory(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
+	xfs_inode_t	*pdp)
+{
+	int	error;
+
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		error = libxfs_dir2_init(tp, dp, pdp);
+	else
+		error = libxfs_dir_init(tp, dp, pdp);
+	if (error)
+		fail("directory create error", error);
+}
+
+void
+parseproto(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*pip,
+	char		**pp,
+	char		*name)
+{
+#define	IF_REGULAR	0
+#define	IF_RESERVED	1
+#define	IF_BLOCK	2
+#define	IF_CHAR		3
+#define	IF_DIRECTORY	4
+#define	IF_SYMLINK	5
+#define	IF_FIFO		6
+
+	char		*buf;
+	int		committed;
+	int		error;
+	xfs_fsblock_t	first;
+	int		flags;
+	xfs_bmap_free_t	flist;
+	int		fmt;
+	int		i;
+	xfs_inode_t	*ip;
+	int		len;
+	long long	llen;
+	int		majdev;
+	int		mindev;
+	int		mode;
+	char		*mstr;
+	xfs_trans_t	*tp;
+	int		val;
+	int		isroot = 0;
+	cred_t		creds;
+	char		*value;
+
+	bzero(&creds, sizeof(creds));
+	mstr = getstr(pp);
+	switch (mstr[0]) {
+	case '-':
+		fmt = IF_REGULAR;
+		break;
+	case 'r':
+		fmt = IF_RESERVED;
+		break;
+	case 'b':
+		fmt = IF_BLOCK;
+		break;
+	case 'c':
+		fmt = IF_CHAR;
+		break;
+	case 'd':
+		fmt = IF_DIRECTORY;
+		break;
+	case 'l':
+		fmt = IF_SYMLINK;
+		break;
+	case 'p':
+		fmt = IF_FIFO;
+		break;
+	default:
+		fprintf(stderr, "%s: bad format string %s\n", progname, mstr);
+		exit(1);
+	}
+	mode = 0;
+	switch (mstr[1]) {
+	case '-':
+		break;
+	case 'u':
+		mode |= ISUID;
+		break;
+	default:
+		fprintf(stderr, "%s: bad format string %s\n", progname, mstr);
+		exit(1);
+	}
+	switch (mstr[2]) {
+	case '-':
+		break;
+	case 'g':
+		mode |= ISGID;
+		break;
+	default:
+		fprintf(stderr, "%s: bad format string %s\n", progname, mstr);
+		exit(1);
+	}
+	val = 0;
+	for (i = 3; i < 6; i++) {
+		if (mstr[i] < '0' || mstr[i] > '7') {
+			fprintf(stderr, "%s: bad format string %s\n",
+				progname, mstr);
+			exit(1);
+		}
+		val = val * 8 + mstr[i] - '0';
+	}
+	mode |= val;
+	creds.cr_uid = (int)getnum(pp);
+	creds.cr_gid = (int)getnum(pp);
+	tp = libxfs_trans_alloc(mp, 0);
+	flags = XFS_ILOG_CORE;
+	XFS_BMAP_INIT(&flist, &first);
+	switch (fmt) {
+	case IF_REGULAR:
+		buf = newregfile(pp, &len);
+		getres(tp, XFS_B_TO_FSB(mp, len));
+		error = libxfs_inode_alloc(&tp, pip, mode|IFREG, 1,
+					mp->m_dev, &creds, &ip);
+		if (error)
+			fail("Inode allocation failed", error);
+		flags |= newfile(tp, ip, &flist, &first, 0, 0, buf, len);
+		if (buf)
+			free(buf);
+		libxfs_trans_ijoin(tp, pip, 0);
+		i = strlen(name);
+		newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+		libxfs_trans_ihold(tp, pip);
+		break;
+
+	case IF_RESERVED:			/* pre-allocated space only */
+		value = getstr(pp);
+		llen = cvtnum(mp->m_sb.sb_blocksize, value);
+		getres(tp, XFS_B_TO_FSB(mp, llen));
+
+		error = libxfs_inode_alloc(&tp, pip, mode|IFREG, 1,
+						mp->m_dev, &creds, &ip);
+		if (error)
+			fail("Inode pre-allocation failed", error);
+
+		libxfs_trans_ijoin(tp, pip, 0);
+
+		i = strlen(name);
+		newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+		libxfs_trans_ihold(tp, pip);
+		libxfs_trans_log_inode(tp, ip, flags);
+
+		error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+		if (error)
+			fail("Pre-allocated file creation failed", error);
+		libxfs_trans_commit(tp, 0, NULL);
+		rsvfile(mp, ip, llen);
+		return;
+
+	case IF_BLOCK:
+		getres(tp, 0);
+		majdev = (int)getnum(pp);
+		mindev = (int)getnum(pp);
+		error = libxfs_inode_alloc(&tp, pip, mode|IFBLK, 1,
+				makedev(majdev, mindev), &creds, &ip);
+		if (error) {
+			fail("Inode allocation failed", error);
+		}
+		libxfs_trans_ijoin(tp, pip, 0);
+		i = strlen(name);
+		newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+		libxfs_trans_ihold(tp, pip);
+		flags |= XFS_ILOG_DEV;
+		break;
+
+	case IF_CHAR:
+		getres(tp, 0);
+		majdev = (int)getnum(pp);
+		mindev = (int)getnum(pp);
+		error = libxfs_inode_alloc(&tp, pip, mode|IFCHR, 1,
+				makedev(majdev, mindev), &creds, &ip);
+		if (error)
+			fail("Inode allocation failed", error);
+		libxfs_trans_ijoin(tp, pip, 0);
+		i = strlen(name);
+		newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+		libxfs_trans_ihold(tp, pip);
+		flags |= XFS_ILOG_DEV;
+		break;
+
+	case IF_FIFO:
+		getres(tp, 0);
+		error = libxfs_inode_alloc(&tp, pip, mode|IFIFO, 1,
+				mp->m_dev, &creds, &ip);
+		if (error)
+			fail("Inode allocation failed", error);
+		libxfs_trans_ijoin(tp, pip, 0);
+		i = strlen(name);
+		newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+		libxfs_trans_ihold(tp, pip);
+		break;
+	case IF_SYMLINK:
+		buf = getstr(pp);
+		len = (int)strlen(buf);
+		getres(tp, XFS_B_TO_FSB(mp, len));
+		error = libxfs_inode_alloc(&tp, pip, mode|IFLNK, 1,
+				mp->m_dev, &creds, &ip);
+		if (error)
+			fail("Inode allocation failed", error);
+		flags |= newfile(tp, ip, &flist, &first, 1, 1, buf, len);
+		libxfs_trans_ijoin(tp, pip, 0);
+		i = strlen(name);
+		newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+		libxfs_trans_ihold(tp, pip);
+		break;
+	case IF_DIRECTORY:
+		getres(tp, 0);
+		error = libxfs_inode_alloc(&tp, pip, mode|IFDIR, 1,
+				mp->m_dev, &creds, &ip);
+		if (error)
+			fail("Inode allocation failed", error);
+		ip->i_d.di_nlink++;		/* account for . */
+		if (!pip) {
+			pip = ip;
+			mp->m_sb.sb_rootino = ip->i_ino;
+			libxfs_mod_sb(tp, XFS_SB_ROOTINO);
+			mp->m_rootip = ip;
+			isroot = 1;
+		} else {
+			libxfs_trans_ijoin(tp, pip, 0);
+			i = strlen(name);
+			newdirent(mp, tp, pip, name, i, ip->i_ino,
+				  &first, &flist, 1);
+			pip->i_d.di_nlink++;
+			libxfs_trans_ihold(tp, pip);
+			libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
+		}
+		newdirectory(mp, tp, ip, pip);
+		libxfs_trans_log_inode(tp, ip, flags);
+		error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+		if (error)
+			fail("Directory creation failed", error);
+		libxfs_trans_ihold(tp, ip);
+		libxfs_trans_commit(tp, 0, NULL);
+		/*
+		 * RT initialization.  Do this here to ensure that
+		 * the RT inodes get placed after the root inode.
+		 */
+		if (isroot)
+			rtinit(mp);
+		tp = NULL;
+		for (;;) {
+			name = getstr(pp);
+			if (strcmp(name, "$") == 0)
+				break;
+			parseproto(mp, ip, pp, name);
+		}
+		libxfs_iput(ip, 0);
+		return;
+	}
+	libxfs_trans_log_inode(tp, ip, flags);
+	error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+	if (error) {
+		fail("Error encountered creating file from prototype", error);
+	}
+	libxfs_trans_commit(tp, 0, NULL);
+}
+
+/*
+ * Allocate the realtime bitmap and summary inodes, and fill in data if any.
+ */
+static void
+rtinit(
+	xfs_mount_t	*mp)
+{
+	xfs_dfiloff_t	bno;
+	int		committed;
+	xfs_dfiloff_t	ebno;
+	xfs_bmbt_irec_t	*ep;
+	int		error;
+	xfs_fsblock_t	first;
+	xfs_bmap_free_t	flist;
+	int		i;
+	xfs_bmbt_irec_t	map[XFS_BMAP_MAX_NMAP];
+	xfs_extlen_t	nsumblocks;
+	int		nmap;
+	xfs_inode_t	*rbmip;
+	xfs_inode_t	*rsumip;
+	xfs_trans_t	*tp;
+	cred_t		creds;
+
+	/*
+	 * First, allocate the inodes.
+	 */
+	tp = libxfs_trans_alloc(mp, 0);
+	if (i = libxfs_trans_reserve(tp, MKFS_BLOCKRES_INODE, 0, 0, 0, 0))
+		res_failed(i);
+	bzero(&creds, sizeof(creds));
+	error = libxfs_inode_alloc(&tp, mp->m_rootip, IFREG, 1,
+				mp->m_dev, &creds, &rbmip);
+	if (error) {
+		fail("Realtime bitmap inode allocation failed", error);
+	}
+	/*
+	 * Do our thing with rbmip before allocating rsumip,
+	 * because the next call to ialloc() may
+	 * commit the transaction in which rbmip was allocated.
+	 */
+	mp->m_sb.sb_rbmino = rbmip->i_ino;
+	rbmip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
+	rbmip->i_d.di_flags = XFS_DIFLAG_NEWRTBM;
+	*(__uint64_t *)&rbmip->i_d.di_atime = 0;
+	libxfs_trans_log_inode(tp, rbmip, XFS_ILOG_CORE);
+	libxfs_mod_sb(tp, XFS_SB_RBMINO);
+	libxfs_trans_ihold(tp, rbmip);
+	mp->m_rbmip = rbmip;
+	error = libxfs_inode_alloc(&tp, mp->m_rootip, IFREG, 1,
+				mp->m_dev, &creds, &rsumip);
+	if (error) {
+		fail("Realtime bitmap inode allocation failed", error);
+	}
+	mp->m_sb.sb_rsumino = rsumip->i_ino;
+	rsumip->i_d.di_size = mp->m_rsumsize;
+	libxfs_trans_log_inode(tp, rsumip, XFS_ILOG_CORE);
+	libxfs_mod_sb(tp, XFS_SB_RSUMINO);
+	libxfs_trans_ihold(tp, rsumip);
+	libxfs_trans_commit(tp, 0, NULL);
+	mp->m_rsumip = rsumip;
+	/*
+	 * Next, give the bitmap file some zero-filled blocks.
+	 */
+	tp = libxfs_trans_alloc(mp, 0);
+	if (i = libxfs_trans_reserve(tp, mp->m_sb.sb_rbmblocks +
+			(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1), 0, 0, 0, 0))
+		res_failed(i);
+	libxfs_trans_ijoin(tp, rbmip, 0);
+	bno = 0;
+	XFS_BMAP_INIT(&flist, &first);
+	while (bno < mp->m_sb.sb_rbmblocks) {
+		nmap = XFS_BMAP_MAX_NMAP;
+		error = libxfs_bmapi(tp, rbmip, bno,
+				(xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
+				XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks,
+				map, &nmap, &flist);
+		if (error) {
+			fail("Allocation of the realtime bitmap failed", error);
+		}
+		for (i = 0, ep = map; i < nmap; i++, ep++) {
+			libxfs_device_zero(mp->m_dev,
+				XFS_FSB_TO_DADDR(mp, ep->br_startblock),
+				XFS_FSB_TO_BB(mp, ep->br_blockcount));
+			bno += ep->br_blockcount;
+		}
+	}
+
+	error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+	if (error) {
+		fail("Allocation of the realtime bitmap failed", error);
+	}
+	libxfs_trans_commit(tp, 0, NULL);
+	/*
+	 * Give the summary file some zero-filled blocks.
+	 */
+	tp = libxfs_trans_alloc(mp, 0);
+	nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
+	if (i = libxfs_trans_reserve(tp,
+			nsumblocks + (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1),
+			0, 0, 0, 0))
+		res_failed(i);
+	libxfs_trans_ijoin(tp, rsumip, 0);
+	bno = 0;
+	XFS_BMAP_INIT(&flist, &first);
+	while (bno < nsumblocks) {
+		nmap = XFS_BMAP_MAX_NMAP;
+		error = libxfs_bmapi(tp, rsumip, bno,
+				(xfs_extlen_t)(nsumblocks - bno),
+				XFS_BMAPI_WRITE, &first, nsumblocks,
+				map, &nmap, &flist);
+		if (error) {
+			fail("Allocation of the realtime bitmap failed", error);
+		}
+		for (i = 0, ep = map; i < nmap; i++, ep++) {
+			libxfs_device_zero(mp->m_dev,
+				XFS_FSB_TO_DADDR(mp, ep->br_startblock),
+				XFS_FSB_TO_BB(mp, ep->br_blockcount));
+			bno += ep->br_blockcount;
+		}
+	}
+	error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+	if (error) {
+		fail("Allocation of the realtime bitmap failed", error);
+	}
+	libxfs_trans_commit(tp, 0, NULL);
+	/*
+	 * Free the whole area using transactions.
+	 * Do one transaction per bitmap block.
+	 */
+	for (bno = 0; bno < mp->m_sb.sb_rextents; bno = ebno) {
+		tp = libxfs_trans_alloc(mp, 0);
+		if (i = libxfs_trans_reserve(tp, 0, 0, 0, 0, 0))
+			res_failed(i);
+		XFS_BMAP_INIT(&flist, &first);
+		ebno = XFS_RTMIN(mp->m_sb.sb_rextents,
+			bno + NBBY * mp->m_sb.sb_blocksize);
+		error = libxfs_rtfree_extent(tp, bno, (xfs_extlen_t)(ebno-bno));
+		if (error) {
+			fail("Error initializing the realtime bitmap", error);
+		}
+		error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+		if (error) {
+			fail("Error initializing the realtime bitmap", error);
+		}
+		libxfs_trans_commit(tp, 0, NULL);
+	}
+}
+
+void
+res_failed(
+	int	err)
+{
+	fprintf(stderr, "%s: ran out of disk space!\n", progname);
+	ASSERT(0);
+	exit(1);
+}
+
+static long
+filesize(
+	int		fd)
+{
+	struct stat64	stb;
+
+	if (fstat64(fd, &stb) < 0)
+		return -1;
+	return (long)stb.st_size;
+}
diff --git a/mkfs/proto.h b/mkfs/proto.h
new file mode 100644
index 000000000..e588e4805
--- /dev/null
+++ b/mkfs/proto.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+char *setup_proto(char *fname);
+void parseproto(xfs_mount_t *mp, xfs_inode_t *pip, char **pp, char *name);
+void res_failed(int err);
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
new file mode 100644
index 000000000..13132b979
--- /dev/null
+++ b/mkfs/xfs_mkfs.c
@@ -0,0 +1,1944 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "xfs_mkfs.h"
+#include "proto.h"
+#include "volume.h"
+#include "maxtrres.h"
+#include "mountinfo.h"
+
+#if HAVE_LIBLVM
+  #include "lvm_user.h"
+
+  char *cmd;		/* Not used. liblvm is broken */
+  int opt_d;		/* Same thing */
+#endif
+
+/*
+ * Prototypes for internal functions.
+ */
+static void conflict(char opt, char *tab[], int oldidx, int newidx);
+static void illegal(char *value, char *opt);
+static void reqval(char opt, char *tab[], int idx);
+static void respec(char opt, char *tab[], int idx);
+static void unknown(char opt, char *s);
+static int  ispow2(unsigned int i);
+static int  max_trans_res(xfs_mount_t *mp);
+
+/*
+ * option tables for getsubopt calls
+ */
+char	*bopts[] = {
+#define	B_LOG		0
+	"log",
+#define	B_SIZE		1
+	"size",
+	NULL
+};
+
+char	*dopts[] = {
+#define	D_AGCOUNT	0
+	"agcount",
+#define	D_FILE		1
+	"file",
+#define	D_NAME		2
+	"name",
+#define	D_SIZE		3
+	"size",
+#define D_SUNIT		4
+	"sunit",
+#define D_SWIDTH	5
+	"swidth",
+#define D_UNWRITTEN	6
+	"unwritten",
+	NULL
+};
+
+char	*iopts[] = {
+#define	I_ALIGN		0
+	"align",
+#define	I_LOG		1
+	"log",
+#define	I_MAXPCT	2
+	"maxpct",
+#define	I_PERBLOCK	3
+	"perblock",
+#define	I_SIZE		4
+	"size",
+	NULL
+};
+
+char	*lopts[] = {
+#define	L_AGNUM		0
+	"agnum",
+#define	L_INTERNAL	1
+	"internal",
+#define	L_SIZE		2
+	"size",
+#define L_DEV		3
+	"logdev",
+#ifdef MKFS_SIMULATION
+#define	L_FILE		4
+	"file",
+#define	L_NAME		5
+	"name",
+#endif
+	NULL
+};
+
+char	*nopts[] = {
+#define	N_LOG		0
+	"log",
+#define	N_SIZE		1
+	"size",
+#define	N_VERSION	2
+	"version",
+	NULL,
+};
+
+char	*ropts[] = {
+#define	R_EXTSIZE	0
+	"extsize",
+#define	R_SIZE		1
+	"size",
+#define	R_DEV		2
+	"rtdev",
+#ifdef MKFS_SIMULATION
+#define	R_FILE		3
+	"file",
+#define	R_NAME		4
+	"name",
+#endif
+	NULL
+};
+
+/*
+ * max transaction reservation values
+ * version 1:
+ * first dimension log(blocksize) (base XFS_MIN_BLOCKSIZE_LOG)
+ * second dimension log(inodesize) (base XFS_DINODE_MIN_LOG)
+ * version 2:
+ * first dimension log(blocksize) (base XFS_MIN_BLOCKSIZE_LOG)
+ * second dimension log(inodesize) (base XFS_DINODE_MIN_LOG)
+ * third dimension log(dirblocksize) (base XFS_MIN_BLOCKSIZE_LOG)
+ */
+#define	DFL_B	(XFS_MAX_BLOCKSIZE_LOG + 1 - XFS_MIN_BLOCKSIZE_LOG)
+#define	DFL_I	(XFS_DINODE_MAX_LOG + 1 - XFS_DINODE_MIN_LOG)
+#define	DFL_D	(XFS_MAX_BLOCKSIZE_LOG + 1 - XFS_MIN_BLOCKSIZE_LOG)
+
+static const int max_trres_v1[DFL_B][DFL_I] = {
+	{ MAXTRRES_B9_I8_D9_V1, 0, 0, 0 },
+	{ MAXTRRES_B10_I8_D10_V1, MAXTRRES_B10_I9_D10_V1, 0, 0 },
+	{ MAXTRRES_B11_I8_D11_V1, MAXTRRES_B11_I9_D11_V1,
+	  MAXTRRES_B11_I10_D11_V1, 0 },
+	{ MAXTRRES_B12_I8_D12_V1, MAXTRRES_B12_I9_D12_V1,
+	  MAXTRRES_B12_I10_D12_V1, MAXTRRES_B12_I11_D12_V1 },
+	{ MAXTRRES_B13_I8_D13_V1, MAXTRRES_B13_I9_D13_V1,
+	  MAXTRRES_B13_I10_D13_V1, MAXTRRES_B13_I11_D13_V1 },
+	{ MAXTRRES_B14_I8_D14_V1, MAXTRRES_B14_I9_D14_V1,
+	  MAXTRRES_B14_I10_D14_V1, MAXTRRES_B14_I11_D14_V1 },
+	{ MAXTRRES_B15_I8_D15_V1, MAXTRRES_B15_I9_D15_V1,
+	  MAXTRRES_B15_I10_D15_V1, MAXTRRES_B15_I11_D15_V1 },
+	{ MAXTRRES_B16_I8_D16_V1, MAXTRRES_B16_I9_D16_V1,
+	  MAXTRRES_B16_I10_D16_V1, MAXTRRES_B16_I11_D16_V1 },
+};
+
+static const int max_trres_v2[DFL_B][DFL_I][DFL_D] = {
+	{ { MAXTRRES_B9_I8_D9_V2, MAXTRRES_B9_I8_D10_V2, MAXTRRES_B9_I8_D11_V2,
+	    MAXTRRES_B9_I8_D12_V2, MAXTRRES_B9_I8_D13_V2, MAXTRRES_B9_I8_D14_V2,
+	    MAXTRRES_B9_I8_D15_V2, MAXTRRES_B9_I8_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, 0, 0 },
+	  { 0, 0, 0, 0, 0, 0, 0, 0 },
+	  { 0, 0, 0, 0, 0, 0, 0, 0 } },
+	{ { 0, MAXTRRES_B10_I8_D10_V2, MAXTRRES_B10_I8_D11_V2,
+	    MAXTRRES_B10_I8_D12_V2, MAXTRRES_B10_I8_D13_V2,
+	    MAXTRRES_B10_I8_D14_V2, MAXTRRES_B10_I8_D15_V2,
+	    MAXTRRES_B10_I8_D16_V2 },
+	  { 0, MAXTRRES_B10_I9_D10_V2, MAXTRRES_B10_I9_D11_V2,
+	    MAXTRRES_B10_I9_D12_V2, MAXTRRES_B10_I9_D13_V2,
+	    MAXTRRES_B10_I9_D14_V2, MAXTRRES_B10_I9_D15_V2,
+	    MAXTRRES_B10_I9_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, 0, 0 },
+	  { 0, 0, 0, 0, 0, 0, 0, 0 } },
+	{ { 0, 0, MAXTRRES_B11_I8_D11_V2, MAXTRRES_B11_I8_D12_V2,
+	    MAXTRRES_B11_I8_D13_V2, MAXTRRES_B11_I8_D14_V2,
+	    MAXTRRES_B11_I8_D15_V2, MAXTRRES_B11_I8_D16_V2 },
+	  { 0, 0, MAXTRRES_B11_I9_D11_V2, MAXTRRES_B11_I9_D12_V2,
+	    MAXTRRES_B11_I9_D13_V2, MAXTRRES_B11_I9_D14_V2,
+	    MAXTRRES_B11_I9_D15_V2, MAXTRRES_B11_I9_D16_V2 },
+	  { 0, 0, MAXTRRES_B11_I10_D11_V2, MAXTRRES_B11_I10_D12_V2,
+	    MAXTRRES_B11_I10_D13_V2, MAXTRRES_B11_I10_D14_V2,
+	    MAXTRRES_B11_I10_D15_V2, MAXTRRES_B11_I10_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, 0, 0 } },
+	{ { 0, 0, 0, MAXTRRES_B12_I8_D12_V2, MAXTRRES_B12_I8_D13_V2,
+	    MAXTRRES_B12_I8_D14_V2, MAXTRRES_B12_I8_D15_V2,
+	    MAXTRRES_B12_I8_D16_V2 },
+	  { 0, 0, 0, MAXTRRES_B12_I9_D12_V2, MAXTRRES_B12_I9_D13_V2,
+	    MAXTRRES_B12_I9_D14_V2, MAXTRRES_B12_I9_D15_V2,
+	    MAXTRRES_B12_I9_D16_V2 },
+	  { 0, 0, 0, MAXTRRES_B12_I10_D12_V2, MAXTRRES_B12_I10_D13_V2,
+	    MAXTRRES_B12_I10_D14_V2, MAXTRRES_B12_I10_D15_V2,
+	    MAXTRRES_B12_I10_D16_V2 },
+	  { 0, 0, 0, MAXTRRES_B12_I11_D12_V2, MAXTRRES_B12_I11_D13_V2,
+	    MAXTRRES_B12_I11_D14_V2, MAXTRRES_B12_I11_D15_V2,
+	    MAXTRRES_B12_I11_D16_V2 } },
+	{ { 0, 0, 0, 0, MAXTRRES_B13_I8_D13_V2, MAXTRRES_B13_I8_D14_V2,
+	    MAXTRRES_B13_I8_D15_V2, MAXTRRES_B13_I8_D16_V2 },
+	  { 0, 0, 0, 0, MAXTRRES_B13_I9_D13_V2, MAXTRRES_B13_I9_D14_V2,
+	    MAXTRRES_B13_I9_D15_V2, MAXTRRES_B13_I9_D16_V2 },
+	  { 0, 0, 0, 0, MAXTRRES_B13_I10_D13_V2, MAXTRRES_B13_I10_D14_V2,
+	    MAXTRRES_B13_I10_D15_V2, MAXTRRES_B13_I10_D16_V2 },
+	  { 0, 0, 0, 0, MAXTRRES_B13_I11_D13_V2, MAXTRRES_B13_I11_D14_V2,
+	    MAXTRRES_B13_I11_D15_V2, MAXTRRES_B13_I11_D16_V2 } },
+	{ { 0, 0, 0, 0, 0, MAXTRRES_B14_I8_D14_V2, MAXTRRES_B14_I8_D15_V2,
+	    MAXTRRES_B14_I8_D16_V2 },
+	  { 0, 0, 0, 0, 0, MAXTRRES_B14_I9_D14_V2, MAXTRRES_B14_I9_D15_V2,
+	    MAXTRRES_B14_I9_D16_V2 },
+	  { 0, 0, 0, 0, 0, MAXTRRES_B14_I10_D14_V2, MAXTRRES_B14_I10_D15_V2,
+	    MAXTRRES_B14_I10_D16_V2 },
+	  { 0, 0, 0, 0, 0, MAXTRRES_B14_I11_D14_V2, MAXTRRES_B14_I11_D15_V2,
+	    MAXTRRES_B14_I11_D16_V2 } },
+	{ { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I8_D15_V2, MAXTRRES_B15_I8_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I9_D15_V2, MAXTRRES_B15_I9_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I10_D15_V2,
+	    MAXTRRES_B15_I10_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I11_D15_V2,
+	    MAXTRRES_B15_I11_D16_V2 } },
+	{ { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I8_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I9_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I10_D16_V2 },
+	  { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I11_D16_V2, } },
+};
+
+/*
+ * Use this before we have a superblock, else would use XFS_DTOBT
+ */
+#define	DTOBT(d)	((xfs_drfsbno_t)((d) >> (blocklog - BBSHIFT)))
+
+/*
+ * Use this for block reservations needed for mkfs's conditions
+ * (basically no fragmentation).
+ */
+#define	MKFS_BLOCKRES_INODE	\
+	((uint)(XFS_IALLOC_BLOCKS(mp) + (XFS_IN_MAXLEVELS(mp) - 1)))
+#define	MKFS_BLOCKRES(rb)	\
+	((uint)(MKFS_BLOCKRES_INODE + XFS_DA_NODE_MAXDEPTH + \
+	(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1) + (rb)))
+
+static void
+get_subvol_stripe_wrapper(char *dfile, int type, int *sunit, int *swidth)
+{
+	struct stat64 sb;
+#if HAVE_LIBLVM
+        lv_t *lv;
+	char *vgname;
+#endif
+
+        if (!dfile)
+                return;
+        
+        if (stat64 (dfile, &sb)) {
+                fprintf (stderr, "Could not stat %s\n", dfile);
+		usage();
+        }
+
+#if HAVE_LIBLVM
+	/* If this is not an LVM volume, just bail out */
+        if (sb.st_rdev >> 8 != LVM_BLK_MAJOR) 
+		return;
+
+	/* Find volume group */
+        if (! (vgname = vg_name_of_lv (dfile))) {
+                fprintf (stderr, "Can't find volume group for %s\n", dfile);
+		usage();
+        }
+
+	/* Logical volume */
+        if (! lvm_tab_lv_check_exist (dfile)) {
+                fprintf (stderr, "Logical volume %s doesn't exist!\n", dfile);
+		usage();
+        }
+
+	/* Get status */
+        if (lv_status_byname (vgname, dfile, &lv) < 0 || lv == NULL) {
+                fprintf (stderr, "Could not get status info from %s\n", dfile);
+		usage();
+        }
+
+	/* Check that data is consistent */
+        if (lv_check_consistency (lv) < 0) {
+                fprintf (stderr, "Logical volume %s is inconsistent\n", dfile);
+		usage();
+        }
+        
+	/* Update sizes */
+        *sunit = lv->lv_stripesize;
+        *swidth = lv->lv_stripes * lv->lv_stripesize;
+
+#endif /* HAVE_LIBLVM */
+}
+
+
+static int
+get_default_blocksize(void)
+{
+	size_t	pagesize = getpagesize();
+	int	i;
+
+	/* default is between 4K and 16K */
+	for (i = 12; i <= 16; i++)
+		if ((1 << i) == pagesize)
+			return pagesize;
+	return (1 << XFS_DFL_BLOCKSIZE_LOG);
+}
+
+
+int
+main(int argc, char **argv)
+{
+	__uint64_t		agcount;
+	xfs_agf_t		*agf;
+	xfs_agi_t		*agi;
+	xfs_agnumber_t		agno;
+	__uint64_t		agsize;
+	xfs_alloc_rec_t		*arec;
+	xfs_btree_sblock_t	*block;
+	int			blflag;
+	int			blocklog;
+	int			blocksize;
+	int			bsflag;
+	int			bsize;
+	xfs_buf_t		*buf;
+	int			c;
+	int			daflag;
+	xfs_drfsbno_t		dblocks;
+	char			*dfile;
+	int			dirblocklog;
+	int			dirblocksize;
+	int			dirversion;
+	int                     do_overlap_checks;
+	char			*dsize;
+	int			dsunit;
+	int			dswidth;
+	int			extent_flagging;
+	int			force_fs_overwrite;
+	int			i;
+	int			iaflag;
+	int			ilflag;
+	int			imaxpct;
+	int			imflag;
+	int			inodelog;
+	int			inopblock;
+	int			ipflag;
+	int			isflag;
+	int			isize;
+	int			laflag;
+	int			lalign;
+	int			ldflag;
+	int			liflag;
+	xfs_agnumber_t		logagno;
+	xfs_drfsbno_t		logblocks;
+	char			*logfile;
+	int			loginternal;
+	char			*logsize;
+	xfs_dfsbno_t		logstart;
+	int			lsflag;
+	int			min_logblocks;
+	mnt_check_state_t       *mnt_check_state;
+	int                     mnt_partition_count;
+	xfs_mount_t		*mp;
+	xfs_mount_t		mbuf;
+	xfs_extlen_t		nbmblocks;
+	int			nlflag;
+	int			nodsflag;
+	xfs_alloc_rec_t		*nrec;
+	int			nsflag;
+	int			nvflag;
+	char			*p;
+	char			*protofile;
+	char			*protostring;
+	int			qflag;
+	xfs_drfsbno_t		rtblocks;
+	xfs_extlen_t		rtextblocks;
+	xfs_drtbno_t		rtextents;
+	char			*rtextsize;
+	char			*rtfile;
+	char			*rtsize;
+	xfs_sb_t		*sbp;
+	int			sectlog;
+	__uint64_t		tmp_agsize;
+	uuid_t			uuid;
+	int			worst_freelist;
+	libxfs_init_t		xi;
+	int 			xlv_dsunit;
+	int			xlv_dswidth;
+
+	progname = basename(argv[0]);
+	agcount = 8;
+	blflag = bsflag = 0;
+	blocksize = get_default_blocksize();
+	blocklog = libxfs_highbit32(blocksize);
+	agsize = daflag = dblocks = 0;
+	ilflag = imflag = ipflag = isflag = 0;
+	liflag = laflag = lsflag = ldflag = 0;
+	loginternal = 1;
+	logagno = logblocks = rtblocks = 0;
+	nlflag = nsflag = nvflag = 0;
+	dirblocklog = dirblocksize = dirversion = 0;
+	qflag = 0;
+	imaxpct = inodelog = inopblock = isize = 0;
+	iaflag = XFS_IFLAG_ALIGN;
+	bzero(&xi, sizeof(xi));
+	xi.notvolok = 1;
+	dfile = logfile = rtfile = NULL;
+	dsize = logsize = rtsize = rtextsize = protofile = NULL;
+	opterr = 0;
+	dsunit = dswidth = nodsflag = lalign = 0;
+	do_overlap_checks = 1;
+	extent_flagging = 0;
+	force_fs_overwrite = 0;
+	worst_freelist = 0;
+
+	while ((c = getopt(argc, argv, "b:d:i:l:n:p:qr:CfV")) != EOF) {
+		switch (c) {
+		case 'C':
+			do_overlap_checks = 0;
+			break;
+		case 'f':
+			force_fs_overwrite = 1;
+			break;
+		case 'b':
+			p = optarg;
+			while (*p != '\0') {
+				char	*value;
+
+				switch (getsubopt(&p, (constpp)bopts, &value)) {
+				case B_LOG:
+					if (!value)
+						reqval('b', bopts, B_LOG);
+					if (blflag)
+						respec('b', bopts, B_LOG);
+					if (bsflag)
+						conflict('b', bopts, B_SIZE,
+							 B_LOG);
+					blocklog = atoi(value);
+					if (blocklog <= 0)
+						illegal(value, "b log");
+					blocksize = 1 << blocklog;
+					blflag = 1;
+					break;
+				case B_SIZE:
+					if (!value)
+						reqval('b', bopts, B_SIZE);
+					if (bsflag)
+						respec('b', bopts, B_SIZE);
+					if (blflag)
+						conflict('b', bopts, B_LOG,
+							 B_SIZE);
+					blocksize = cvtnum(0, value);
+					if (blocksize <= 0 ||
+					    !ispow2(blocksize))
+						illegal(value, "b size");
+					blocklog = libxfs_highbit32(blocksize);
+					bsflag = 1;
+					break;
+				default:
+					unknown('b', value);
+				}
+			}
+			break;
+		case 'd':
+			p = optarg;
+			while (*p != '\0') {
+				char	*value;
+
+				switch (getsubopt(&p, (constpp)dopts, &value)) {
+				case D_AGCOUNT:
+					if (!value)
+						reqval('d', dopts, D_AGCOUNT);
+					if (daflag)
+						respec('d', dopts, D_AGCOUNT);
+					agcount = (__uint64_t)atoll(value);
+					if ((__int64_t)agcount <= 0)
+						illegal(value, "d agcount");
+					daflag = 1;
+					break;
+				case D_FILE:
+					if (!value)
+						value = "1";
+					xi.disfile = atoi(value);
+					if (xi.disfile < 0 || xi.disfile > 1)
+						illegal(value, "d file");
+					if (xi.disfile)
+						xi.dcreat = 1;
+					break;
+				case D_NAME:
+					if (!value)
+						reqval('d', dopts, D_NAME);
+					if (xi.dname)
+						respec('d', dopts, D_NAME);
+					xi.dname = value;
+					break;
+				case D_SIZE:
+					if (!value)
+						reqval('d', dopts, D_SIZE);
+					if (dsize)
+						respec('d', dopts, D_SIZE);
+					dsize = value;
+					break;
+				case D_SUNIT:
+					if (!value)
+						reqval('d', dopts, D_SUNIT);
+					if (dsunit)
+						respec('d', dopts, D_SUNIT);
+					dsunit = cvtnum(0, value);
+					break;
+				case D_SWIDTH:
+					if (!value)
+						reqval('d', dopts, D_SWIDTH);
+					if (dswidth)
+						respec('d', dopts, D_SWIDTH);
+					dswidth = cvtnum(0, value);
+					break;
+				case D_UNWRITTEN:
+					if (!value)
+					    reqval('d', dopts, D_UNWRITTEN);
+					i = atoi(value);
+					if (i < 0 || i > 1)
+					    illegal(value, "d unwritten");
+					extent_flagging = i;
+					break;
+				default:
+					unknown('d', value);
+				}
+			}
+			break;
+		case 'i':
+			p = optarg;
+			while (*p != '\0') {
+				char	*value;
+
+				switch (getsubopt(&p, (constpp)iopts, &value)) {
+				case I_ALIGN:
+					if (!value)
+						value = "1";
+					iaflag = atoi(value);
+					if (iaflag < 0 || iaflag > 1)
+						illegal(value, "i align");
+					break;
+				case I_LOG:
+					if (!value)
+						reqval('i', iopts, I_LOG);
+					if (ilflag)
+						respec('i', iopts, I_LOG);
+					if (ipflag)
+						conflict('i', iopts, I_PERBLOCK,
+							 I_LOG);
+					if (isflag)
+						conflict('i', iopts, I_SIZE,
+							 I_LOG);
+					inodelog = atoi(value);
+					if (inodelog <= 0)
+						illegal(value, "i log");
+					isize = 1 << inodelog;
+					ilflag = 1;
+					break;
+				case I_MAXPCT:
+					if (!value)
+						reqval('i', iopts, I_MAXPCT);
+					if (imflag)
+						respec('i', iopts, I_MAXPCT);
+					imaxpct = atoi(value);
+					if (imaxpct < 0 || imaxpct > 100)
+						illegal(value, "i maxpct");
+					imflag = 1;
+					break;
+				case I_PERBLOCK:
+					if (!value)
+						reqval('i', iopts, I_PERBLOCK);
+					if (ilflag)
+						conflict('i', iopts, I_LOG,
+							 I_PERBLOCK);
+					if (ipflag)
+						respec('i', iopts, I_PERBLOCK);
+					if (isflag)
+						conflict('i', iopts, I_SIZE,
+							 I_PERBLOCK);
+					inopblock = atoi(value);
+					if (inopblock <
+						XFS_MIN_INODE_PERBLOCK ||
+					    !ispow2(inopblock))
+						illegal(value, "i perblock");
+					ipflag = 1;
+					break;
+				case I_SIZE:
+					if (!value)
+						reqval('i', iopts, I_SIZE);
+					if (ilflag)
+						conflict('i', iopts, I_LOG,
+							 I_SIZE);
+					if (ipflag)
+						conflict('i', iopts, I_PERBLOCK,
+							 I_SIZE);
+					if (isflag)
+						respec('i', iopts, I_SIZE);
+					isize = cvtnum(0, value);
+					if (isize <= 0 || !ispow2(isize))
+						illegal(value, "i size");
+					inodelog = libxfs_highbit32(isize);
+					isflag = 1;
+					break;
+				default:
+					unknown('i', value);
+				}
+			}
+			break;
+		case 'l':
+			p = optarg;
+			while (*p != '\0') {
+				char	*value;
+
+				switch (getsubopt(&p, (constpp)lopts, &value)) {
+				case L_AGNUM:
+					if (laflag)
+						respec('l', lopts, L_AGNUM);
+
+					if (ldflag) 
+						conflict('l', lopts, L_AGNUM, L_DEV);
+
+					logagno = atoi(value);
+					laflag = 1;
+					break;
+				case L_DEV:
+					if (!value) {
+						fprintf (stderr, "Must specify log device\n");
+						usage();
+					}
+
+					if (laflag)
+						conflict('l', lopts, L_AGNUM, L_DEV);
+
+					if (liflag)
+						conflict('l', lopts, L_INTERNAL, L_DEV);
+					
+					ldflag = 1;
+					loginternal = 0;
+					logfile = value;
+					xi.logname = value;
+					break;
+#ifdef HAVE_VOLUME_MANAGER
+				case L_FILE:
+					if (!value)
+						value = "1";
+					if (loginternal)
+						conflict('l', lopts, L_INTERNAL,
+							 L_FILE);
+					xi.lisfile = atoi(value);
+					if (xi.lisfile < 0 || xi.lisfile > 1)
+						illegal(value, "l file");
+					if (xi.lisfile)
+						xi.lcreat = 1;
+					break;
+#endif
+				case L_INTERNAL:
+					if (!value)
+						value = "1";
+
+					if (ldflag) 
+						conflict('l', lopts, L_INTERNAL, L_DEV);
+#ifdef HAVE_VOLUME_MANAGER
+					if (xi.logname)
+						conflict('l', lopts, L_NAME,
+							 L_INTERNAL);
+					if (xi.lisfile)
+						conflict('l', lopts, L_FILE,
+							 L_INTERNAL);
+#endif
+					if (liflag)
+						respec('l', lopts, L_INTERNAL);
+					loginternal = atoi(value);
+					if (loginternal < 0 || loginternal > 1)
+						illegal(value, "l internal");
+					liflag = 1;
+					break;
+#ifdef HAVE_VOLUME_MANAGER
+				case L_NAME:
+					if (!value)
+						reqval('l', lopts, L_NAME);
+					if (loginternal)
+						conflict('l', lopts, L_INTERNAL,
+							 L_NAME);
+					if (xi.logname)
+						respec('l', lopts, L_NAME);
+					xi.logname = value;
+					break;
+#endif
+				case L_SIZE:
+					if (!value)
+						reqval('l', lopts, L_SIZE);
+					if (logsize)
+						respec('l', lopts, L_SIZE);
+					logsize = value;
+					lsflag = 1;
+					break;
+				default:
+					unknown('l', value);
+				}
+			}
+			break;
+		case 'n':
+			p = optarg;
+			while (*p != '\0') {
+				char	*value;
+
+				switch (getsubopt(&p, (constpp)nopts, &value)) {
+				case N_LOG:
+					if (!value)
+						reqval('n', nopts, N_LOG);
+					if (nlflag)
+						respec('n', nopts, N_LOG);
+					if (nsflag)
+						conflict('n', nopts, N_SIZE,
+							 N_LOG);
+					dirblocklog = atoi(value);
+					if (dirblocklog <= 0)
+						illegal(value, "n log");
+					dirblocksize = 1 << dirblocklog;
+					nlflag = 1;
+					break;
+				case N_SIZE:
+					if (!value)
+						reqval('n', nopts, N_SIZE);
+					if (nsflag)
+						respec('n', nopts, N_SIZE);
+					if (nlflag)
+						conflict('n', nopts, N_LOG,
+							 N_SIZE);
+					dirblocksize = cvtnum(0, value);
+					if (dirblocksize <= 0 ||
+					    !ispow2(dirblocksize))
+						illegal(value, "n size");
+					dirblocklog =
+						libxfs_highbit32(dirblocksize);
+					nsflag = 1;
+					break;
+				case N_VERSION:
+					if (!value)
+						reqval('n', nopts, N_VERSION);
+					if (nvflag)
+						respec('n', nopts, N_VERSION);
+					dirversion = atoi(value);
+					if (dirversion < 1 || dirversion > 2)
+						illegal(value, "n version");
+					nvflag = 1;
+					break;
+				default:
+					unknown('n', value);
+				}
+			}
+			break;
+		case 'p':
+			if (protofile)
+				respec('p', 0, 0);
+			protofile = optarg;
+			break;
+		case 'q':
+			qflag = 1;
+			break;
+		case 'r':
+			p = optarg;
+			while (*p != '\0') {
+				char	*value;
+
+				switch (getsubopt(&p, (constpp)ropts, &value)) {
+				case R_EXTSIZE:
+					if (!value)
+						reqval('r', ropts, R_EXTSIZE);
+					if (rtextsize)
+						respec('r', ropts, R_EXTSIZE);
+					rtextsize = value;
+					break;
+				case R_DEV:
+					if (!value)
+						reqval('r', ropts, R_DEV);
+					xi.rtname = value;
+					break;
+#ifdef HAVE_VOLUME_MANAGER
+				case R_FILE:
+					if (!value)
+						value = "1";
+					xi.risfile = atoi(value);
+					if (xi.risfile < 0 || xi.risfile > 1)
+						illegal(value, "r file");
+					if (xi.risfile)
+						xi.rcreat = 1;
+					break;
+				case R_NAME:
+					if (!value)
+						reqval('r', ropts, R_NAME);
+					if (xi.rtname)
+						respec('r', ropts, R_NAME);
+					xi.rtname = value;
+					break;
+#endif
+				case R_SIZE:
+					if (!value)
+						reqval('r', ropts, R_SIZE);
+					if (rtsize)
+						respec('r', ropts, R_SIZE);
+					rtsize = value;
+					break;
+
+				default:
+					unknown('r', value);
+				}
+			}
+			break;
+		case 'V':
+			printf("%s version %s\n", progname, VERSION);
+			break;
+		case '?':
+			unknown(optopt, "");
+		}
+	}
+	if (argc - optind > 1) {
+		fprintf(stderr, "extra arguments\n");
+		usage();
+	} else if (argc - optind == 1) {
+		dfile = xi.volname = argv[optind];
+		if (xi.dname) {
+			fprintf(stderr,
+				"cannot specify both %s and -d name=%s\n",
+				xi.volname, xi.dname);
+			usage();
+		}
+	} else
+		dfile = xi.dname;
+	/* option post-processing */
+	if (blocksize < XFS_MIN_BLOCKSIZE || blocksize > XFS_MAX_BLOCKSIZE) {
+		fprintf(stderr, "illegal block size %d\n", blocksize);
+		usage();
+	}
+	if (!nvflag)
+		dirversion = (nsflag || nlflag) ? 2 : XFS_DFL_DIR_VERSION;
+	switch (dirversion) {
+	case 1:
+		if ((nsflag || nlflag) && dirblocklog != blocklog) {
+			fprintf(stderr, "illegal directory block size %d\n",
+				dirblocksize);
+			usage();
+		}
+		break;
+	case 2:
+		if (nsflag || nlflag) {
+			if (dirblocksize < blocksize ||
+			    dirblocksize > XFS_MAX_BLOCKSIZE) {
+				fprintf(stderr,
+					"illegal directory block size %d\n",
+					dirblocksize);
+				usage();
+			}
+		} else {
+			if (blocksize < (1 << XFS_MIN_REC_DIRSIZE))
+				dirblocklog = XFS_MIN_REC_DIRSIZE;
+			else
+				dirblocklog = blocklog;
+			dirblocksize = 1 << dirblocklog;
+		}
+		break;
+	}
+	if (!daflag)
+		agcount = 8;
+
+	if (xi.disfile && (!dsize || !xi.dname)) {
+		fprintf(stderr,
+			"if -d file then -d name and -d size are required\n");
+		usage();
+	}
+	if (dsize) {
+		__uint64_t dbytes;
+
+		dbytes = cvtnum(blocksize, dsize);
+		if (dbytes % XFS_MIN_BLOCKSIZE) {
+			fprintf(stderr,
+			"illegal data length %lld, not a multiple of %d\n",
+				dbytes, XFS_MIN_BLOCKSIZE);
+			usage();
+		}
+		dblocks = (xfs_drfsbno_t)(dbytes >> blocklog);
+		if (dbytes % blocksize)
+			fprintf(stderr,
+	"warning: data length %lld not a multiple of %d, truncated to %lld\n",
+				dbytes, blocksize, dblocks << blocklog);
+	}
+	if (ipflag) {
+		inodelog = blocklog - libxfs_highbit32(inopblock);
+		isize = 1 << inodelog;
+	} else if (!ilflag && !isflag) {
+		inodelog = XFS_DINODE_DFL_LOG;
+		isize = 1 << inodelog;
+	}
+#ifdef HAVE_VOLUME_MANAGER
+	if (xi.lisfile && (!logsize || !xi.logname)) {
+		fprintf(stderr,
+			"if -l file then -l name and -l size are required\n");
+		usage();
+	}
+#endif
+	if (logsize) {
+		__uint64_t logbytes;
+
+		logbytes = cvtnum(blocksize, logsize);
+		if (logbytes % XFS_MIN_BLOCKSIZE) {
+			fprintf(stderr,
+			"illegal log length %lld, not a multiple of %d\n",
+				logbytes, XFS_MIN_BLOCKSIZE);
+			usage();
+		}
+		logblocks = (xfs_drfsbno_t)(logbytes >> blocklog);
+		if (logbytes % blocksize)
+			fprintf(stderr,
+	"warning: log length %lld not a multiple of %d, truncated to %lld\n",
+				logbytes, blocksize, logblocks << blocklog);
+	}
+#ifdef HAVE_VOLUME_MANAGER
+	if (xi.risfile && (!rtsize || !xi.rtname)) {
+		fprintf(stderr,
+			"if -r file then -r name and -r size are required\n");
+		usage();
+	}
+#endif
+	if (rtsize) {
+		__uint64_t rtbytes;
+
+		rtbytes = cvtnum(blocksize, rtsize);
+		if (rtbytes % XFS_MIN_BLOCKSIZE) {
+			fprintf(stderr,
+			"illegal rt length %lld, not a multiple of %d\n",
+				rtbytes, XFS_MIN_BLOCKSIZE);
+			usage();
+		}
+		rtblocks = (xfs_drfsbno_t)(rtbytes >> blocklog);
+		if (rtbytes % blocksize)
+			fprintf(stderr,
+	"warning: rt length %lld not a multiple of %d, truncated to %lld\n",
+				rtbytes, blocksize, rtblocks << blocklog);
+	}
+	/*
+	 * If specified, check rt extent size against its constraints.
+	 */
+	if (rtextsize) {
+		__uint64_t rtextbytes;
+
+		rtextbytes = cvtnum(blocksize, rtextsize);
+		if (rtextbytes % blocksize) {
+			fprintf(stderr,
+			"illegal rt extent size %lld, not a multiple of %d\n",
+				rtextbytes, blocksize);
+			usage();
+		}
+		if (rtextbytes > XFS_MAX_RTEXTSIZE) {
+			fprintf(stderr,
+				"rt extent size %s too large, maximum %d\n",
+				rtextsize, XFS_MAX_RTEXTSIZE);
+			usage();
+		}
+		if (rtextbytes < XFS_MIN_RTEXTSIZE) {
+			fprintf(stderr,
+				"rt extent size %s too small, minimum %d\n",
+				rtextsize, XFS_MIN_RTEXTSIZE);
+			usage();
+		}
+		rtextblocks = (xfs_extlen_t)(rtextbytes >> blocklog);
+	} else {
+		/*
+		 * If realtime extsize has not been specified by the user,
+		 * and the underlying volume is striped, then set rtextblocks
+		 * to the stripe width.
+		 */
+		int dummy1, rswidth;
+		__uint64_t rtextbytes;
+		dummy1 = rswidth = 0;
+                
+                if (!xi.disfile)
+		        get_subvol_stripe_wrapper(dfile, SVTYPE_RT, &dummy1, 
+						    &rswidth);
+
+		/* check that rswidth is a multiple of fs blocksize */
+		if (rswidth && !(BBTOB(rswidth) % blocksize)) {
+			rswidth = DTOBT(rswidth);
+			rtextbytes = rswidth << blocklog;
+			if (XFS_MIN_RTEXTSIZE <= rtextbytes &&
+                                (rtextbytes <= XFS_MAX_RTEXTSIZE))  {
+       		                 rtextblocks = rswidth;
+			} else {
+				rtextblocks = XFS_DFL_RTEXTSIZE >> blocklog;
+			}
+		} else
+			rtextblocks = XFS_DFL_RTEXTSIZE >> blocklog;
+	}
+
+	/*
+	 * Check some argument sizes against mins, maxes.
+	 */
+	if (isize > blocksize / XFS_MIN_INODE_PERBLOCK ||
+	    isize < XFS_DINODE_MIN_SIZE ||
+	    isize > XFS_DINODE_MAX_SIZE) {
+		int	maxsz;
+
+		fprintf(stderr, "illegal inode size %d\n", isize);
+		maxsz = MIN(blocksize / XFS_MIN_INODE_PERBLOCK,
+			    XFS_DINODE_MAX_SIZE);
+		if (XFS_DINODE_MIN_SIZE == maxsz)
+			fprintf(stderr,
+			"allowable inode size with %d byte blocks is %d\n",
+				blocksize, XFS_DINODE_MIN_SIZE);
+		else
+			fprintf(stderr,
+	"allowable inode size with %d byte blocks is between %d and %d\n",
+				blocksize, XFS_DINODE_MIN_SIZE, maxsz);
+		usage();
+	}
+
+	if (dsunit && !dswidth || !dsunit && dswidth) {
+		fprintf(stderr,
+"both sunit and swidth options have to be specified\n");
+		usage();
+	}
+
+	if (dsunit && dswidth % dsunit != 0) {
+		fprintf(stderr,
+"mount: stripe width (%d) has to be a multiple of the stripe unit (%d)\n",
+			dswidth, dsunit);
+		return 1;
+	}
+
+	/* other global variables */
+	sectlog = 9;		/* i.e. 512 bytes */
+
+	/*
+	 * Initialize.  This will open the log and rt devices as well.
+	 */
+	if (!libxfs_init(&xi))
+		usage();
+	if (!xi.ddev) {
+		fprintf(stderr, "no device name given in argument list\n");
+		usage();
+	}
+
+	/*
+	 * Check whether this partition contains a known filesystem.
+	 */
+
+	if (force_fs_overwrite == 0) {
+		char *fstyp;
+		int fsfound = 0;
+
+		fstyp = (char *) mnt_known_fs_type (dfile);
+		
+		if (fstyp != NULL) {
+			fprintf(stderr, "%s: "
+			"%s appears to contain an existing filesystem (%s).\n",
+				progname, dfile, fstyp);
+			fsfound = 1;
+		}
+
+		if (logfile && *logfile) {
+			fstyp = (char *) mnt_known_fs_type (logfile);
+			
+			if (fstyp != NULL) {
+				fprintf(stderr, "%s: "
+			"%s appears to contain an existing filesystem (%s).\n",
+					progname, logfile, fstyp);
+				fsfound = 1;
+			}
+		}
+
+		if (xi.rtname && *xi.rtname) {
+			fstyp = (char *) mnt_known_fs_type (xi.rtname);
+			
+			if (fstyp != NULL) {
+				fprintf(stderr, "%s: "
+			"%s appears to contain an existing filesystem (%s).\n",
+					progname, xi.rtname, fstyp);
+				fsfound = 1;
+			}
+		}
+
+		if (fsfound) {
+			fprintf(stderr, "%s: "
+				"Use the -f option to force overwrite\n",
+				progname);
+			exit(1);
+		}
+	}
+
+	if (!xi.disfile && do_overlap_checks) {
+	        /*
+		 * do partition overlap check
+		 * If this is a straight file we assume that it's been created
+		 * before the call to mnt_check_init()
+		 */
+
+                if (mnt_check_init(&mnt_check_state) == -1) {
+                        fprintf(stderr,
+				"unable to initialize mount checking "
+				"routines, bypassing protection checks.\n");
+		} else {
+		        mnt_partition_count = mnt_find_mount_conflicts(
+				mnt_check_state, dfile);
+
+			/* 
+			 * ignore -1 return codes, since 3rd party devices
+			 * may not be part of hinv.
+			 */
+			if (mnt_partition_count > 0) {
+			        if (mnt_causes_test(mnt_check_state, MNT_CAUSE_MOUNTED)) {
+				        fprintf(stderr, "%s: "
+						"%s is already in use.\n",
+						progname, dfile);
+				} else if (mnt_causes_test(mnt_check_state, MNT_CAUSE_OVERLAP)) {
+				        fprintf(stderr, "%s: "
+						"%s overlaps partition(s) "
+						"already in use.\n",
+						progname, dfile);
+				} else {
+				        mnt_causes_show(mnt_check_state, stderr, progname);
+				}
+				fprintf(stderr, "\n");
+				fflush(stderr);
+				mnt_plist_show(mnt_check_state, stderr, progname);
+				fprintf(stderr, "\n");
+			}
+			mnt_check_end(mnt_check_state);
+			if (mnt_partition_count > 0) {
+			        usage();
+			}
+		}
+	}
+
+	if (!liflag && !ldflag)
+		loginternal = xi.logdev == 0;
+	if (xi.logname)
+		logfile = xi.logname;
+	else if (loginternal)
+		logfile = "internal log";
+	else if (xi.volname && xi.logdev)
+		logfile = "volume log";
+	else if (!ldflag) {
+		fprintf(stderr, "no log subvolume or internal log\n");
+		usage();
+	}
+	if (xi.rtname)
+		rtfile = xi.rtname;
+	else
+	if (xi.volname && xi.rtdev)
+		rtfile = "volume rt";
+	else if (!xi.rtdev)
+		rtfile = "none";
+	if (dsize && xi.dsize > 0 && dblocks > DTOBT(xi.dsize)) {
+		fprintf(stderr,
+"size %s specified for data subvolume is too large, maximum is %lld blocks\n",
+			dsize, DTOBT(xi.dsize));
+		usage();
+	} else if (!dsize && xi.dsize > 0)
+		dblocks = DTOBT(xi.dsize);
+	else if (!dsize) {
+		fprintf(stderr, "can't get size of data subvolume\n");
+		usage();
+	} 
+	if (dblocks < XFS_MIN_DATA_BLOCKS) {
+		fprintf(stderr,
+		"size %lld of data subvolume is too small, minimum %d blocks\n",
+			dblocks, XFS_MIN_DATA_BLOCKS);
+		usage();
+	}
+	if (xi.logdev && loginternal) {
+		fprintf(stderr, "can't have both external and internal logs\n");
+		usage();
+	}
+	if (dirversion == 1)
+		i = max_trres_v1[blocklog - XFS_MIN_BLOCKSIZE_LOG]
+				[inodelog - XFS_DINODE_MIN_LOG];
+	else
+		i = max_trres_v2[blocklog - XFS_MIN_BLOCKSIZE_LOG]
+				[inodelog - XFS_DINODE_MIN_LOG]
+				[dirblocklog - XFS_MIN_BLOCKSIZE_LOG];
+	min_logblocks = MAX(XFS_MIN_LOG_BLOCKS, i * XFS_MIN_LOG_FACTOR);
+	if (logsize && xi.logBBsize > 0 && logblocks > DTOBT(xi.logBBsize)) {
+		fprintf(stderr,
+"size %s specified for log subvolume is too large, maximum is %lld blocks\n",
+			logsize, DTOBT(xi.logBBsize));
+		usage();
+	} else if (!logsize && xi.logBBsize > 0)
+		logblocks = DTOBT(xi.logBBsize);
+	else if (logsize && !xi.logdev && !loginternal) {
+		fprintf(stderr,
+			"size specified for non-existent log subvolume\n");
+		usage();
+	} else if (loginternal && logsize && logblocks >= dblocks) {
+		fprintf(stderr, "size %lld too large for internal log\n",
+			logblocks);
+		usage();
+	} else if (!loginternal && !xi.logdev)
+		logblocks = 0;
+	else if (loginternal && !logsize)
+		logblocks = MAX(XFS_DFL_LOG_SIZE, i * XFS_DFL_LOG_FACTOR);
+	if (logblocks < min_logblocks) {
+		fprintf(stderr,
+		"log size %lld blocks too small, minimum size is %d blocks\n",
+			logblocks, min_logblocks);
+		usage();
+	}
+	if (logblocks > XFS_MAX_LOG_BLOCKS) {
+		fprintf(stderr,
+		"log size %lld blocks too large, maximum size is %d blocks\n",
+			logblocks, XFS_MAX_LOG_BLOCKS);
+		usage();
+	}
+	if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) {
+		fprintf(stderr,
+		"log size %lld bytes too large, maximum size is %d bytes\n",
+			logblocks << blocklog, XFS_MAX_LOG_BYTES);
+		usage();
+	}
+	if (rtsize && xi.rtsize > 0 && rtblocks > DTOBT(xi.rtsize)) {
+		fprintf(stderr,
+"size %s specified for rt subvolume is too large, maximum is %lld blocks\n",
+			rtsize, DTOBT(xi.rtsize));
+		usage();
+	} else if (!rtsize && xi.rtsize > 0)
+		rtblocks = DTOBT(xi.rtsize);
+	else if (rtsize && !xi.rtdev) {
+		fprintf(stderr,
+			"size specified for non-existent rt subvolume\n");
+		usage();
+	}
+	if (xi.rtdev) {
+		rtextents = rtblocks / rtextblocks;
+		nbmblocks = (xfs_extlen_t)howmany(rtextents, NBBY * blocksize);
+	} else {
+		rtextents = rtblocks = 0;
+		nbmblocks = 0;
+	}
+	agsize = dblocks / agcount + (dblocks % agcount != 0);
+
+	/*
+	 * If the ag size is too small, complain if agcount was specified,
+	 * and fix it otherwise.
+	 */
+	if (agsize < XFS_AG_MIN_BLOCKS(blocklog)) {
+		if (daflag) {
+			fprintf(stderr,
+				"too many allocation groups for size\n");
+			fprintf(stderr, "need at most %lld allocation groups\n",
+				dblocks / XFS_AG_MIN_BLOCKS(blocklog) +
+				(dblocks % XFS_AG_MIN_BLOCKS(blocklog) != 0));
+			usage();
+		}
+		agsize = XFS_AG_MIN_BLOCKS(blocklog);
+		if (dblocks < agsize)
+			agcount = 1;
+		else {
+			agcount = dblocks / agsize;
+			agsize = dblocks / agcount + (dblocks % agcount != 0);
+		}
+	}
+	/*
+	 * If the ag size is too large, complain if agcount was specified,
+	 * and fix it otherwise.
+	 */
+	else if (agsize > XFS_AG_MAX_BLOCKS(blocklog)) {
+		if (daflag) {
+			fprintf(stderr, "too few allocation groups for size\n");
+			fprintf(stderr,
+				"need at least %lld allocation groups\n",
+				dblocks / XFS_AG_MAX_BLOCKS(blocklog) + 
+				(dblocks % XFS_AG_MAX_BLOCKS(blocklog) != 0));
+			usage();
+		}
+		agsize = XFS_AG_MAX_BLOCKS(blocklog);
+		agcount = dblocks / agsize + (dblocks % agsize != 0);
+		agsize = dblocks / agcount + (dblocks % agcount != 0);
+	}
+	/*
+	 * If agcount was not specified, and agsize is larger than
+	 * we'd like, make it the size we want.
+	 */
+	if (!daflag && agsize > XFS_AG_BEST_BLOCKS(blocklog)) {
+		agsize = XFS_AG_BEST_BLOCKS(blocklog);
+		agcount = dblocks / agsize + (dblocks % agsize != 0);
+		agsize = dblocks / agcount + (dblocks % agcount != 0);
+	}
+	/*
+	 * If agcount is too large, make it smaller.
+	 */
+	if (agcount > XFS_MAX_AGNUMBER + 1) {
+		agcount = XFS_MAX_AGNUMBER + 1;
+		agsize = dblocks / agcount + (dblocks % agcount != 0);
+		if (agsize > XFS_AG_MAX_BLOCKS(blocklog)) {
+			/*
+			 * We're confused.
+			 */
+			fprintf(stderr, "%s: can't compute agsize/agcount\n",
+				progname);
+			exit(1);
+		}
+	}
+
+	xlv_dsunit = xlv_dswidth = 0;
+        if (!xi.disfile)
+	        get_subvol_stripe_wrapper(dfile, SVTYPE_DATA, &xlv_dsunit, 
+				&xlv_dswidth);
+	if (dsunit) {
+
+		if (xlv_dsunit && xlv_dsunit != dsunit) {
+			fprintf(stderr, "%s: "
+  "Specified data stripe unit %d is not the same as the xlv stripe unit %d\n", 
+				progname, dsunit, xlv_dsunit);
+			exit(1);
+		}
+		if (xlv_dswidth && xlv_dswidth != dswidth) {
+			fprintf(stderr, "%s: "
+"Specified data stripe width (%d) is not the same as the xlv stripe width (%d)\n",
+				progname, dswidth, xlv_dswidth);
+			exit(1);
+		}
+	} else {
+		dsunit = xlv_dsunit;
+		dswidth = xlv_dswidth;
+		nodsflag = 1;
+	}
+
+	/*
+	 * If dsunit is a multiple of fs blocksize, then check that is a
+	 * multiple of the agsize too
+	 */
+	if (dsunit && !(BBTOB(dsunit) % blocksize) && 
+	    dswidth && !(BBTOB(dswidth) % blocksize)) {
+
+		/* convert from 512 byte blocks to fs blocksize */
+		dsunit = DTOBT(dsunit);
+		dswidth = DTOBT(dswidth);
+
+		/* 
+		 * agsize is not a multiple of dsunit
+		 */
+		if ((agsize % dsunit) != 0) {
+                	/*
+                 	 * round up to stripe unit boundary. Also make sure 
+			 * that agsize is still larger than 
+			 * XFS_AG_MIN_BLOCKS(blocklog)
+		 	 */
+                	tmp_agsize = ((agsize + (dsunit - 1))/ dsunit) * dsunit;
+                	if ((tmp_agsize >= XFS_AG_MIN_BLOCKS(blocklog)) &&
+			    (tmp_agsize <= XFS_AG_MAX_BLOCKS(blocklog)) &&
+			    !daflag) {
+				agsize = tmp_agsize;
+				agcount = dblocks/agsize + 
+						(dblocks % agsize != 0);
+                	} else {
+				if (nodsflag)
+					dsunit = dswidth = 0;
+				else { 
+					fprintf(stderr,
+"Allocation group size %lld is not a multiple of the stripe unit %d\n",
+						agsize, dsunit);
+					exit(1);
+				}
+        		}
+		}
+	} else {
+		if (nodsflag)
+			dsunit = dswidth = 0;
+		else { 
+			fprintf(stderr, "%s: "
+"Stripe unit(%d) or stripe width(%d) is not a multiple of the block size(%d)\n",
+				progname, dsunit, dswidth, blocksize); 	
+			exit(1);
+		}
+	}
+
+	protostring = setup_proto(protofile);
+	bsize = 1 << (blocklog - BBSHIFT);
+	buf = libxfs_getbuf(xi.ddev, XFS_SB_DADDR, 1);
+	mp = &mbuf;
+	sbp = &mp->m_sb;
+	bzero(mp, sizeof(xfs_mount_t));
+	sbp->sb_blocklog = (__uint8_t)blocklog;
+	sbp->sb_agblklog = (__uint8_t)libxfs_log2_roundup((unsigned int)agsize);
+	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
+	if (loginternal) {
+		if (logblocks > agsize - XFS_PREALLOC_BLOCKS(mp)) {
+			fprintf(stderr,
+	"internal log size %lld too large, must fit in allocation group\n",
+				logblocks);
+			usage();
+		}
+		if (laflag) {
+			if (logagno >= agcount) {
+				fprintf(stderr,
+			"log ag number %d too large, must be less than %lld\n",
+					logagno, agcount);
+				usage();
+			}
+		} else
+			logagno = (xfs_agnumber_t)(agcount / 2);
+
+		logstart = XFS_AGB_TO_FSB(mp, logagno, XFS_PREALLOC_BLOCKS(mp));
+		/*
+		 * Align the logstart at stripe unit boundary.
+		 */
+		if (dsunit && ((logstart % dsunit) != 0)) {
+			logstart = ((logstart + (dsunit - 1))/dsunit) * dsunit;
+
+			/* 
+			 * Make sure that the log size is a multiple of the
+			 * stripe unit
+			 */
+			if ((logblocks % dsunit) != 0) 
+			   if (!lsflag) 
+				logblocks = ((logblocks + (dsunit - 1))
+							/dsunit) * dsunit;
+			   else {
+				fprintf(stderr,
+	"internal log size %lld is not a multiple of the stripe unit %d\n", 
+					logblocks, dsunit);
+				usage();
+			   }
+
+			if (logblocks > agsize-XFS_FSB_TO_AGBNO(mp,logstart)) {
+				fprintf(stderr,
+	"Due to stripe alignment, the internal log size %lld is too large.\n"
+	"Must fit in allocation group\n",
+					logblocks);
+				usage();
+			}
+			lalign = 1;
+		}
+	} else
+		logstart = 0;
+	sbp->sb_magicnum = XFS_SB_MAGIC;
+	sbp->sb_blocksize = blocksize;
+	sbp->sb_dblocks = dblocks;
+	sbp->sb_rblocks = rtblocks;
+	sbp->sb_rextents = rtextents;
+	uuid_generate(uuid);
+	uuid_copy(sbp->sb_uuid, uuid);
+	sbp->sb_logstart = logstart;
+	sbp->sb_rootino = sbp->sb_rbmino = sbp->sb_rsumino = NULLFSINO;
+	sbp->sb_rextsize = rtextblocks;
+	sbp->sb_agblocks = (xfs_agblock_t)agsize;
+	sbp->sb_agcount = (xfs_agnumber_t)agcount;
+	sbp->sb_rbmblocks = nbmblocks;
+	sbp->sb_logblocks = (xfs_extlen_t)logblocks;
+	sbp->sb_sectsize = 1 << sectlog;
+	sbp->sb_inodesize = (__uint16_t)isize;
+	sbp->sb_inopblock = (__uint16_t)(blocksize / isize);
+	sbp->sb_sectlog = (__uint8_t)sectlog;
+	sbp->sb_inodelog = (__uint8_t)inodelog;
+	sbp->sb_inopblog = (__uint8_t)(blocklog - inodelog);
+	sbp->sb_rextslog =
+		(__uint8_t)(rtextents ?
+			libxfs_highbit32((unsigned int)rtextents) : 0);
+	sbp->sb_inprogress = 1;	/* mkfs is in progress */
+	sbp->sb_imax_pct = imflag ? imaxpct : XFS_DFL_IMAXIMUM_PCT;
+	sbp->sb_icount = 0;
+	sbp->sb_ifree = 0;
+	sbp->sb_fdblocks = dblocks - agcount * XFS_PREALLOC_BLOCKS(mp) -
+		(loginternal ? logblocks : 0);
+	sbp->sb_frextents = 0;	/* will do a free later */
+	sbp->sb_uquotino = sbp->sb_pquotino = 0;
+	sbp->sb_qflags = 0;
+	sbp->sb_unit = dsunit;
+	sbp->sb_width = dswidth;
+	if (dirversion == 2)
+		sbp->sb_dirblklog = dirblocklog - blocklog;
+	if (iaflag) {
+		sbp->sb_inoalignmt = XFS_INODE_BIG_CLUSTER_SIZE >> blocklog;
+		iaflag = sbp->sb_inoalignmt != 0;
+	} else
+		sbp->sb_inoalignmt = 0;
+	sbp->sb_versionnum =
+		XFS_SB_VERSION_MKFS(iaflag, dsunit != 0, extent_flagging,
+			dirversion == 2);
+
+	bzero(XFS_BUF_PTR(buf), BBSIZE);
+	libxfs_xlate_sb(XFS_BUF_PTR(buf), sbp, -1, ARCH_CONVERT,
+			XFS_SB_ALL_BITS);
+	libxfs_writebuf(buf, 1);
+
+	if (!qflag)
+		printf(
+		   "meta-data=%-22s isize=%-6d agcount=%lld, agsize=%lld blks\n"
+		   "data     =%-22s bsize=%-6d blocks=%lld, imaxpct=%d\n"
+		   "         =%-22s sunit=%-6d swidth=%d blks, unwritten=%d\n"
+		   "naming   =version %-14d bsize=%-6d\n"
+		   "log      =%-22s bsize=%-6d blocks=%lld\n"
+		   "realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n",
+			dfile, isize, agcount, agsize,
+			"", blocksize, dblocks, sbp->sb_imax_pct,
+			"", dsunit, dswidth, extent_flagging,
+			dirversion, dirversion == 1 ? blocksize : dirblocksize,
+			logfile, 1 << blocklog, logblocks,
+			rtfile, rtextblocks << blocklog, rtblocks, rtextents);
+	/*
+	 * If the data area is a file, then grow it out to its final size
+	 * so that the reads for the end of the device in the mount code
+	 * will succeed.
+	 */
+	if (xi.disfile && ftruncate64(xi.dfd, dblocks * blocksize) < 0) {
+		fprintf(stderr, "%s: Growing the data section file failed\n",
+			progname);
+		exit(1);
+	}
+	/*
+	 * Zero the log if there is one.
+	 */
+	if (loginternal)
+		xi.logdev = xi.ddev;
+	if (xi.logdev)
+		libxfs_log_clear(
+                    xi.logdev, 
+                    XFS_FSB_TO_DADDR(mp, logstart),
+		    (xfs_extlen_t)XFS_FSB_TO_BB(mp, logblocks),
+                    &sbp->sb_uuid,
+                    XLOG_FMT);
+
+	mp = libxfs_mount(mp, sbp, xi.ddev, xi.logdev, xi.rtdev, 1);
+	if (!mp) {
+		fprintf(stderr, "%s: mount initialization failed\n", progname);
+		exit(1);
+	}
+	if (xi.logdev &&
+	    XFS_FSB_TO_B(mp, logblocks) <
+	    XFS_MIN_LOG_FACTOR * max_trans_res(mp)) {
+		fprintf(stderr, "%s: log size (%lld) is too small for "
+				"transaction reservations\n",
+			progname, logblocks);
+		exit(1);
+	}
+
+	for (agno = 0; agno < agcount; agno++) {
+		/*
+		 * Superblock.
+		 */
+		buf = libxfs_getbuf(xi.ddev,
+				XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1);
+		bzero(XFS_BUF_PTR(buf), BBSIZE);
+                libxfs_xlate_sb(XFS_BUF_PTR(buf), sbp, -1, ARCH_CONVERT,
+				XFS_SB_ALL_BITS);
+		libxfs_writebuf(buf, 1);
+
+		/*
+		 * AG header block: freespace
+		 */
+		buf = libxfs_getbuf(mp->m_dev,
+				XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1);
+		agf = XFS_BUF_TO_AGF(buf);
+		bzero(agf, BBSIZE);
+		if (agno == agcount - 1)
+			agsize = dblocks - (xfs_drfsbno_t)(agno * agsize);
+		INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
+		INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
+		INT_SET(agf->agf_seqno, ARCH_CONVERT, agno);
+		INT_SET(agf->agf_length, ARCH_CONVERT, (xfs_agblock_t)agsize);
+		INT_SET(agf->agf_roots[XFS_BTNUM_BNOi], ARCH_CONVERT,
+				XFS_BNO_BLOCK(mp));
+		INT_SET(agf->agf_roots[XFS_BTNUM_CNTi], ARCH_CONVERT,
+				XFS_CNT_BLOCK(mp));
+		INT_SET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT, 1);
+		INT_SET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT, 1);
+		INT_SET(agf->agf_flfirst, ARCH_CONVERT, 0);
+		INT_SET(agf->agf_fllast, ARCH_CONVERT, XFS_AGFL_SIZE - 1);
+		INT_SET(agf->agf_flcount, ARCH_CONVERT, 0);
+		nbmblocks = (xfs_extlen_t)(agsize - XFS_PREALLOC_BLOCKS(mp));
+		INT_SET(agf->agf_freeblks, ARCH_CONVERT, nbmblocks);
+		INT_SET(agf->agf_longest, ARCH_CONVERT, nbmblocks);
+		if (loginternal && agno == logagno) {
+			INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -logblocks);
+			INT_SET(agf->agf_longest, ARCH_CONVERT, agsize - 
+				XFS_FSB_TO_AGBNO(mp, logstart) - logblocks);
+		}
+		if (XFS_MIN_FREELIST(agf, mp) > worst_freelist)
+			worst_freelist = XFS_MIN_FREELIST(agf, mp);
+		libxfs_writebuf(buf, 1);
+
+		/*
+		 * AG header block: inodes
+		 */
+		buf = libxfs_getbuf(mp->m_dev,
+				XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1);
+		agi = XFS_BUF_TO_AGI(buf);
+		bzero(agi, BBSIZE);
+		INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
+		INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
+		INT_SET(agi->agi_seqno, ARCH_CONVERT, agno);
+		INT_SET(agi->agi_length, ARCH_CONVERT, (xfs_agblock_t)agsize);
+		INT_SET(agi->agi_count, ARCH_CONVERT, 0);
+		INT_SET(agi->agi_root, ARCH_CONVERT, XFS_IBT_BLOCK(mp));
+		INT_SET(agi->agi_level, ARCH_CONVERT, 1);
+		INT_SET(agi->agi_freecount, ARCH_CONVERT, 0);
+		INT_SET(agi->agi_newino, ARCH_CONVERT, NULLAGINO);
+		INT_SET(agi->agi_dirino, ARCH_CONVERT, NULLAGINO);
+		for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
+			INT_SET(agi->agi_unlinked[i], ARCH_CONVERT, NULLAGINO);
+		libxfs_writebuf(buf, 1);
+
+		/*
+		 * BNO btree root block
+		 */
+		buf = libxfs_getbuf(mp->m_dev,
+				XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
+				bsize);
+		block = XFS_BUF_TO_SBLOCK(buf);
+		bzero(block, blocksize);
+		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTB_MAGIC);
+		INT_SET(block->bb_level, ARCH_CONVERT, 0);
+		INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+		INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+		INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		arec = XFS_BTREE_REC_ADDR(blocksize, xfs_alloc, block, 1,
+			XFS_BTREE_BLOCK_MAXRECS(blocksize, xfs_alloc, 1));
+		INT_SET(arec->ar_startblock, ARCH_CONVERT,
+			XFS_PREALLOC_BLOCKS(mp));
+		if (loginternal && agno == logagno) {
+			if (lalign) {
+				/*
+				 * Have to insert two records
+				 */
+				INT_SET(arec->ar_blockcount, ARCH_CONVERT, 
+					(xfs_extlen_t)(XFS_FSB_TO_AGBNO(
+						mp, logstart)
+				  	- (INT_GET(arec->ar_startblock,
+						ARCH_CONVERT))));
+				nrec = arec + 1;
+				INT_SET(nrec->ar_startblock, ARCH_CONVERT,
+					INT_GET(arec->ar_startblock,
+						ARCH_CONVERT) +
+					INT_GET(arec->ar_blockcount,
+						ARCH_CONVERT));
+				arec = nrec;
+				INT_MOD(block->bb_numrecs, ARCH_CONVERT, 1);
+			} 
+			INT_MOD(arec->ar_startblock, ARCH_CONVERT, logblocks);
+		} 
+		INT_SET(arec->ar_blockcount, ARCH_CONVERT,
+			(xfs_extlen_t)(agsize -
+				INT_GET(arec->ar_startblock, ARCH_CONVERT)));
+		libxfs_writebuf(buf, 1);
+
+		/*
+		 * CNT btree root block
+		 */
+		buf = libxfs_getbuf(mp->m_dev,
+				XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
+				bsize);
+		block = XFS_BUF_TO_SBLOCK(buf);
+		bzero(block, blocksize);
+		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTC_MAGIC);
+		INT_SET(block->bb_level, ARCH_CONVERT, 0);
+		INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+		INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+		INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		arec = XFS_BTREE_REC_ADDR(blocksize, xfs_alloc, block, 1,
+			XFS_BTREE_BLOCK_MAXRECS(blocksize, xfs_alloc, 1));
+		INT_SET(arec->ar_startblock, ARCH_CONVERT,
+			XFS_PREALLOC_BLOCKS(mp));
+		if (loginternal && agno == logagno) {
+			if (lalign) {
+				INT_SET(arec->ar_blockcount, ARCH_CONVERT,
+				    (xfs_extlen_t)( XFS_FSB_TO_AGBNO(
+					mp, logstart) - (INT_GET(
+					arec->ar_startblock, ARCH_CONVERT)) )
+				);
+				nrec = arec + 1;
+				INT_SET(nrec->ar_startblock, ARCH_CONVERT,
+				    INT_GET(arec->ar_startblock, ARCH_CONVERT) +
+				    INT_GET(arec->ar_blockcount, ARCH_CONVERT));
+				arec = nrec;
+				INT_MOD(block->bb_numrecs, ARCH_CONVERT, 1);
+			}
+			INT_MOD(arec->ar_startblock, ARCH_CONVERT, logblocks);
+		}	
+		INT_SET(arec->ar_blockcount, ARCH_CONVERT, (xfs_extlen_t)
+			(agsize - INT_GET(arec->ar_startblock, ARCH_CONVERT)));
+		libxfs_writebuf(buf, 1);
+		/*
+		 * INO btree root block
+		 */
+		buf = libxfs_getbuf(mp->m_dev,
+				XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
+				bsize);
+		block = XFS_BUF_TO_SBLOCK(buf);
+		bzero(block, blocksize);
+		INT_SET(block->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+		INT_SET(block->bb_level, ARCH_CONVERT, 0);
+		INT_SET(block->bb_numrecs, ARCH_CONVERT, 0);
+		INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+		INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		libxfs_writebuf(buf, 1);
+	}
+
+	/*
+	 * Touch last block, make fs the right size if it's a file.
+	 */
+	buf = libxfs_getbuf(mp->m_dev,
+		(xfs_daddr_t)XFS_FSB_TO_BB(mp, dblocks - 1LL), bsize);
+	bzero(XFS_BUF_PTR(buf), blocksize);
+	libxfs_writebuf(buf, 1);
+
+	/*
+	 * Make sure we can write the last block in the realtime area.
+	 */
+	if (mp->m_rtdev && rtblocks > 0) {
+		buf = libxfs_getbuf(mp->m_rtdev,
+				XFS_FSB_TO_BB(mp, rtblocks - 1LL), bsize);
+		bzero(XFS_BUF_PTR(buf), blocksize);
+		libxfs_writebuf(buf, 1);
+	}
+	/*
+	 * BNO, CNT free block list
+	 */
+	for (agno = 0; agno < agcount; agno++) {
+		xfs_alloc_arg_t	args;
+		xfs_trans_t	*tp;
+
+		bzero(&args, sizeof(args));
+		args.tp = tp = libxfs_trans_alloc(mp, 0);
+		args.mp = mp;
+		args.agno = agno;
+		args.alignment = 1;
+		args.minalignslop = UINT_MAX;
+		args.pag = &mp->m_perag[agno];
+		if (i = libxfs_trans_reserve(tp, worst_freelist, 0, 0, 0, 0))
+			res_failed(i);
+		libxfs_alloc_fix_freelist(&args, 0);
+		libxfs_trans_commit(tp, 0, NULL);
+	}
+	/*
+	 * Allocate the root inode and anything else in the proto file.
+	 */
+	mp->m_rootip = NULL;
+	parseproto(mp, NULL, &protostring, NULL);
+
+	/*
+	 * protect ourselves against possible stupidity
+	 */
+	if (XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino) != 0) {
+		fprintf(stderr, "%s: root inode not created in AG 0, "
+				"created in AG %u",
+			progname, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino));
+		exit(1);
+	}
+
+	/*
+	 * write out multiple copies of superblocks with the rootinode field set
+	 */
+	if (mp->m_sb.sb_agcount > 1) {
+		/*
+		 * the last superblock
+		 */
+		buf = libxfs_readbuf(mp->m_dev,
+				XFS_AGB_TO_DADDR(mp, mp->m_sb.sb_agcount-1,
+					XFS_SB_DADDR),
+				BTOBB(mp->m_sb.sb_sectsize), 1);
+		INT_SET((XFS_BUF_TO_SBP(buf))->sb_rootino,
+				ARCH_CONVERT, mp->m_sb.sb_rootino);
+		libxfs_writebuf(buf, 1);
+		/*
+		 * and one in the middle for luck
+		 */
+		if (mp->m_sb.sb_agcount > 2) {
+			buf = libxfs_readbuf(mp->m_dev,
+				XFS_AGB_TO_DADDR(mp, (mp->m_sb.sb_agcount-1)/2,
+					XFS_SB_DADDR),
+				BTOBB(mp->m_sb.sb_sectsize), 1);
+			INT_SET((XFS_BUF_TO_SBP(buf))->sb_rootino,
+				ARCH_CONVERT, mp->m_sb.sb_rootino);
+			libxfs_writebuf(buf, 1);
+		}
+	}
+
+	/*
+	 * Mark the filesystem ok.
+	 */
+	buf = libxfs_getsb(mp, 1);
+	(XFS_BUF_TO_SBP(buf))->sb_inprogress = 0;
+	libxfs_writebuf(buf, 1);
+
+	libxfs_umount(mp);
+	if (xi.rtdev)
+		libxfs_device_close(xi.rtdev);
+	if (xi.logdev && xi.logdev != xi.ddev)
+		libxfs_device_close(xi.logdev);
+	libxfs_device_close(xi.ddev);
+
+	return 0;
+}
+
+static void
+conflict(
+	char	opt,
+	char	*tab[],
+	int	oldidx,
+	int	newidx)
+{
+	fprintf(stderr, "Cannot specify both -%c %s and -%c %s\n",
+		opt, tab[oldidx], opt, tab[newidx]);
+	usage();
+}
+
+
+static void
+illegal(
+	char	*value,
+	char	*opt)
+{
+	fprintf(stderr, "Illegal value %s for -%s option\n", value, opt);
+	usage();
+}
+
+static int
+ispow2(
+	unsigned int	i)
+{
+	return (i & (i - 1)) == 0;
+}
+
+static void
+reqval(
+	char	opt,
+	char	*tab[],
+	int	idx)
+{
+	fprintf(stderr, "-%c %s option requires a value\n", opt, tab[idx]);
+	usage();
+}
+
+static void
+respec(
+	char	opt,
+	char	*tab[],
+	int	idx)
+{
+	fprintf(stderr, "-%c ", opt);
+	if (tab)
+		fprintf(stderr, "%s ", tab[idx]);
+	fprintf(stderr, "option respecified\n");
+	usage();
+}
+
+static void
+unknown(
+	char	opt,
+	char	*s)
+{
+	fprintf(stderr, "unknown option -%c %s\n", opt, s);
+	usage();
+}
+
+static int
+max_trans_res(
+	xfs_mount_t			*mp)
+{
+	uint				*p;
+	int				rval;
+	xfs_trans_reservations_t	*tr;
+
+	tr = &mp->m_reservations;
+
+	for (rval = 0, p = (uint *)tr; p < (uint *)(tr + 1); p++) {
+		if ((int)*p > rval)
+			rval = (int)*p;
+	}
+	return rval;
+}
+
+long long
+cvtnum(
+	int		blocksize,
+	char		*s)
+{
+	long long	i;
+	char		*sp;
+	extern void	usage(void);
+
+	i = strtoll(s, &sp, 0);
+	if (i == 0 && sp == s)
+		return -1LL;
+	if (*sp == '\0')
+		return i;
+
+	if (*sp == 'b' && sp[1] == '\0') {
+		if (blocksize)
+			return i * blocksize;
+
+		fprintf(stderr, "blocksize not available yet.\n");
+		usage();
+	}
+
+	if (*sp == 'k' && sp[1] == '\0')
+		return 1024LL * i;
+	if (*sp == 'm' && sp[1] == '\0')
+		return 1024LL * 1024LL * i;
+	if (*sp == 'g' && sp[1] == '\0')
+		return 1024LL * 1024LL * 1024LL * i;
+	return -1LL;
+}
+
+void
+usage(void)
+{
+	fprintf(stderr, "Usage: %s\n\
+/* blocksize */		[-b log=n|size=num]\n\
+/* data subvol */	[-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
+			    sunit=value,swidth=value,unwritten=0|1]\n\
+/* inode size */	[-i log=n|perblock=n|size=num,maxpct=n]\n\
+/* log subvol */	[-l agnum=n,internal,size=num,logdev=xxx]\n\
+/* naming */		[-n log=n|size=num|version=n]\n\
+/* prototype file */	[-p fname]\n\
+/* quiet */		[-q]\n\
+/* version */		[-V]\n\
+/* realtime subvol */	[-r extsize=num,size=num,rtdev=xxx]\n\
+			devicename\n\
+devicename is required unless -d name=xxx is given\n\
+internal 1000 block log is default unless overridden or using a volume\
+manager with log\n\
+num is xxx (bytes), or xxxb (blocks), or xxxk (xxx KB), or xxxm (xxx MB)\n\
+value is xxx (512 blocks)\n",
+		progname);
+	exit(1);
+}
diff --git a/mkfs/xfs_mkfs.h b/mkfs/xfs_mkfs.h
new file mode 100644
index 000000000..6dcd004d0
--- /dev/null
+++ b/mkfs/xfs_mkfs.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_MKFS_H__
+#define	__XFS_MKFS_H__
+
+#define	XFS_DFL_BLOCKSIZE_LOG	12		/* 4096 byte blocks */
+#define	XFS_DINODE_DFL_LOG	8		/* 256 byte inodes */
+#define	XFS_MIN_DATA_BLOCKS	100
+#define	XFS_MIN_INODE_PERBLOCK	2		/* min inodes per block */
+#define	XFS_DFL_IMAXIMUM_PCT	25		/* max % of space for inodes */
+#define	XFS_IFLAG_ALIGN		1		/* -i align defaults on */
+#define	XFS_MIN_REC_DIRSIZE	12		/* 4096 byte dirblocks (V2) */
+#define	XFS_DFL_DIR_VERSION	2		/* default directory version */
+#define	XFS_DFL_LOG_SIZE	1000		/* default log size, blocks */
+#define	XFS_MIN_LOG_FACTOR	3		/* min log size factor */
+#define	XFS_DFL_LOG_FACTOR	16		/* default log size, factor */
+						/* with max trans reservation */
+extern void  usage (void);
+extern long long  cvtnum (int blocksize, char *s);
+
+#endif	/* __XFS_MKFS_H__ */
diff --git a/repair/Makefile b/repair/Makefile
new file mode 100644
index 000000000..96f81d7c4
--- /dev/null
+++ b/repair/Makefile
@@ -0,0 +1,72 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_repair
+CMDDEPS = $(LIBXFS)
+
+HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h dinode.h dir.h \
+	dir2.h dir_stack.h err_protos.h globals.h incore.h protos.h rt.h \
+	scan.h versions.h
+
+CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c dino_chunks.c \
+	dinode.c dir.c dir2.c dir_stack.c globals.c incore.c \
+	incore_bmc.c init.c incore_ext.c incore_ino.c io.c phase1.c \
+	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c rt.c sb.c \
+	scan.c versions.c xfs_repair.c
+
+LLDLIBS = $(LIBXFS) $(LIBUUID)
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+#
+# Tracing flags:
+# -DXR_BMAP_DBG		incore block bitmap debugging
+# -DXR_INODE_TRACE	inode processing
+# -DXR_BMAP_TRACE	bmap btree processing
+# -DXR_DIR_TRACE	directory processing
+# -DXR_DUP_TRACE	duplicate extent processing
+# -DXR_BCNT_TRACE	incore bcnt freespace btree building
+# -DXR_BLD_FREE_TRACE	building on-disk freespace (bcnt/bno) btrees
+# -DXR_BLD_INO_TRACE	building on-disk inode allocation btrees
+# -DXR_BLD_ADD_EXTENT	track phase 5 block extent creation
+# -DXR_BCKPTR_DBG	parent list debugging info
+#
+CFLAGS += -DAVL_USER_MODE -DAVL_FUTURE_ENHANCEMENTS
+
+install: default
+	$(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR)
+	$(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR)
diff --git a/repair/README b/repair/README
new file mode 100644
index 000000000..c007af9f9
--- /dev/null
+++ b/repair/README
@@ -0,0 +1,718 @@
+A living document.  The basic algorithm.
+
+TODO: (D == DONE)
+
+0)	Need to bring some sanity into the case of flags that can
+	be set in the secondaries at mkfs time but reset or cleared
+	in the primary later in the filesystem's life.
+
+0)	Clear the persistent read-only bit if set.  Clear the
+	shared bit if set and the version number is zero.  This
+	brings the filesystem back to a known state.
+
+0)	make sure that superblock geometry code checks the logstart
+	value against whether or not we have an internal log.
+	If we have an internal log and a logdev, that's ok.
+	(Maybe we just aren't using it).  If we have an external
+	log (logstart == 0) but no logdev, that's right out.
+
+0)	write secondary superblock search code.  Rewrite initial
+	superblock parsing code to be less complicated.  Just
+	use variables to indicate primary, secondary, etc.,
+	and use a function to get the SB given a specific location
+	or something.
+
+2)	For inode alignment, if the SB bit is set and the
+	inode alignment size field in the SB is set, then
+	believe that the fs inodes MUST be aligned and
+	disallow any non-aligned inodes.  Likewise, if
+	the SB bit isn't set (or earlier version) and
+	the inode alignment size field is zero, then
+	never set the bit even if the inodes are aligned.
+	Note that the bits and alignment values are
+	replicated in the secondary superblocks.
+
+0)  add feature specification options to parse_arguments
+
+0)	add logic to add_inode_ref(), add_inode_reached()
+	to detect nlink overflows in cases where the fs
+	(or user had indicated fs) doesn't support new nlinks.
+
+6) check to make sure that the inodes containing btree blocks
+	with # recs < minrecs aren't legit -- e.g. the only
+	descendant of a root block.
+
+7)  inode di_size value sanity checking -- should always be less than
+	the biggest filebno offset mentioned in the bmaps.  Doesn't
+	have to be equal though since we're allowed to overallocate
+	(it just wastes a little space).  This is for both regular
+	files and directories (have to modify the existing directory
+	check).
+
+	Add tracking of largest offset in bmap scanning code.  Compare
+	value against di_size.  Should be >= di_size.
+
+	Alternatively, you could pass the inode into down through
+	the extent record processing layer and make the checks
+	there.
+
+	Add knowledge of quota inodes.  size of quota inode is
+	always zero.  We should maintain that.
+
+8)  Basic quota stuff.
+
+	Invariants
+		if quota feature bit is set, the quota inodes
+		if set, should point to disconnected, 0 len inodes.
+
+D -		if quota inodes exist, the quota bits must be
+		turned on.  It's ok for the quota flags to be
+		zeroed but they should be in a legal state
+		(see xfs_quota.h).
+
+D - 		if the quota flags are non-zero, the corresponding
+		quota inodes must exist.
+
+		quota inodes are never deleted, only their space
+		is freed.
+
+	if quotas are being downgraded, then check quota inodes
+	at the end of phase 3.  If they haven't been cleared yet,
+	clear them.  Regardless, then clear sb flags (quota inode
+	fields, quota flags, and quota bit).
+
+
+5) look at verify_inode_chunk().  it's probably really broken.
+
+
+9)  Complicated quota stuff.  Add code to bmap scan code to
+	track used blocks.  Add another pair of AVL trees
+	to track user and project quota limits.  Set AVL
+	trees up at the beginning of phase 3.  Quota inodes
+	can be rebuilt or corrected later if damaged.
+
+
+D - 0)	fix directory processing.  phase 3, if an entry references
+	a free inode, *don't* mark it used.  wait for the rest of
+	phase 3 processing to hit that inode.  If it looks like it's
+	in use, we'll mark in use then.  If not, we'll clear it and
+	mark the inode map.  then in phase 4, you can depend on the
+	inode map.  should probably set the parent info in phase 4.
+	So we have a check_dups flag.  Maybe we should change the
+	name of check_dir to discover_inodes.  During phase 3
+	(discover_inodes == 1), uncertain inodes are added to list.
+	During phase 4 (discover_inodes == 0), they aren't.  And
+	we never mark inodes in use from the directory code.
+	During phase 4, we shouldn't complain about names with
+	a leading '/' since we made those names in phase 3.
+
+	Have to change dino_chunks.c (parent setting), dinode.c
+	and dir.c.
+
+D - 0)	make sure we don't screw up filesystems with real-time inodes.
+	remember to initialize real-time map with all blocks XR_E_FREE.
+
+D - 4) check contents of symlinks as well as lengths in process_symlinks()
+	in dinode.c.  Right now, we only check lengths.
+
+
+D - 1)	Feature mismatches -- for quotas and attributes,
+	if the stuff exists in the filesystem, set the
+	superblock version bits.
+
+D - 0)	rewrite directory leaf block holemap comparison code.
+	probably should just check the leaf block hole info
+	against our incore bitmap.  If the hole flag is not
+	set, then we know that there can only be one hole and
+	it has to be between the entry table and the top of heap.
+	If the hole flag is set, then it's ok if the on-disk
+	holemap doesn't describe everything as long as what
+	it does describe doesn't conflict with reality.
+
+D - 0)	rewrite setting nlinks handling -- for version 1
+	inodes, set both nlinks and onlinks (zero projid
+	and pad) if we have to change anything.  For
+	version 2, I think we're ok.
+
+D - 0)	Put awareness of quota inode into mark_standalone_inodes.
+
+
+D - 8) redo handling of superblocks with bad version numbers.  need
+	to bail out (without harming) fs's that have sbs that
+	are newer than we are.
+
+D - 0)  How do we handle feature mismatches between fs and
+	superblock?  For nlink, check each inode after you
+	know it's good.  If onlinks is 0 and nlinks is > 0
+	and it's a version 2 inode, then it really is a version
+	2 inode and the nlinks flag in the SB needs to be set.
+	If it's a version 2 inode and the SB agrees but onlink
+	is non-zero, then clear onlink.
+
+D - 3)  keep cumulative counts of freeblocks, inodes, etc. to set in
+	the superblock at the end of phase 5.  Remember that
+	agf freeblock counters don't include blocks used by
+	the non-root levels of the freespace trees but that
+	the sb free block counters include those.
+
+D - 0)  Do parent setting in directory code (called by phase 3).
+	actually, I put it in process_inode_set and propagated
+	the parent up to it from the process_dinode/process_dir
+	routines.  seemed cleaner than pushing the irec down
+	and letting them bang on it.
+
+D - 0)  If we clear a file in phase 4, make sure that if it's
+	a directory that the parent info is cleared also.
+
+D - 0) put inode tree flashover (call to add_ino_backptrs) into phase 5.
+
+D - 0) do set/get_inode_parent functions in incore_ino.c.
+	also do is/set/ inode_processed.
+	
+D - 0) do a versions.c to extract feature info and set global vars
+	from the superblock version number and possibly feature bits
+
+D - 0) change longform_dir_entry_check + shortform_dir_entry_check
+	to return a count of how many illegal '/' entries exist.
+	if > 0, then process_dirstack needs to call prune_dir_entry
+	with a hash value of 0 to delete the entries.
+
+D - 0)  add the "processed" bitfield
+	to the backptrs_t struct that gets attached after
+	phase 4.
+
+D- )  Phase 6 !!!
+
+D - 0) look at usage of XFS_MAKE_IPTR().  It does the right
+	arithmetic assuming you count your offsets from the
+	beginning of the buffer.
+
+
+D - 0) look at references to XFS_INODES_PER_CHUNK.  change the
+	ones that really mean sizeof(__uint64_t)*NBBY to
+	something else (like that only defined as a constant
+	INOS_PER_IREC. this isn't as important since
+	XFS_INODES_PER_CHUNK will never chang
+
+
+D - 0) look at junk_zerolen_dir_leaf_entries() to make sure it isn't hosing
+	the freemap since it assumed that bytes between the
+	end of the table and firstused didn't show up in the
+	freemap when they actually do.
+
+D - 0) track down XFS_INO_TO_OFFSET() usage.  I don't think I'm
+	using it right.  (e.g. I think
+	it gives you the offset of an inode into a block but
+	on small block filesystems, I may be reading in inodes
+	in multiblock buffers and working from the start of
+	the buffer plus I'm using it to get offsets into
+	my ino_rec's which may not be a good idea since I
+	use 64-inode ino_rec's whereas the offset macro
+	works off blocksize).
+
+D - 0.0) put buffer -> dirblock conversion macros into xfs kernel code
+
+D - 0.2) put in sibling pointer checking and path fixup into
+	bmap (long form) scan routines in scan.c
+D - 0.3) find out if bmap btrees with only root blocks are legal.  I'm
+	betting that they're not because they'd be extent inodes
+	instead.  If that's the case, rip some code out of
+	process_btinode()
+
+
+Algorithm (XXX means not done yet):
+
+Phase 1 -- get a superblock and zero log
+
+	get a superblock -- either read in primary or
+		find a secondary (ag header), check ag headers
+
+		To find secondary:
+
+			Go for brute force and read in the filesystem N meg
+				at a time looking for a superblock.  as a
+				slight optimization, we could maybe skip
+				ahead some number of blocks to try and get
+				towards the end of the first ag.
+
+			After you find a secondary, try and find at least
+				other ags as a verification that the
+				secondary is a good superblock.
+
+XXX -		Ugh.  Have to take growfs'ed filesystems into account.
+		The root superblock geometry info may not be right if
+		recovery hasn't run or it's been trashed.  The old ag's
+		may or may not be right since the system could have crashed
+		during growfs or the bwrite() to the superblocks could have
+		failed and the buffer been reused.  So we need to check
+		to see if another ag exists beyond the "last" ag
+		to see if a growfs happened.  If not, then we know that
+		the geometry info is good and treat the fs as a non-growfs'ed
+		fs.  If we do have inconsistencies, then the smaller geometry
+		is the old fs and the larger the new.  We can check the
+		new superblocks to see if they're good.  If not, then we
+		know the system crashed at or soon after the growfs and
+		we can choose to either accept the new geometry info or
+		trash it and truncate the fs back to the old geometry
+		parameters.
+
+	Cross-check geometry information in secondary sb's with
+	primary to ensure that it's correct.
+
+	Use sim code to allow mount filesystems *without* reading
+	in root inode.  This sets up the xfs_mount_t structure
+	and allows us to use XFS_* macros that we wouldn't
+	otherwise be able to use.
+
+	Note, I split phase 1 and 2 into separate pieces because I want
+	to initialize the xfs_repair incore data structures after phase 1.
+
+	parse superblock version and feature flags and set appropriate
+		global vars to reflect the flags (attributes, quotas, etc.)
+
+	Workaround for the mkfs "not zeroing the superblock buffer" bug.
+	Determine what field is the last valid non-zero field in
+	the superblock.  The trick here is to be able to differentiate
+	the last valid non-zero field in the primary superblock and
+	secondaries because they may not be the same.  Fields in
+	the primary can be set as the filesystem gets upgraded but
+	the upgrades won't touch the secondaries.  This means that
+	we need to find some number of secondaries and check them.
+	So we do the checking here and the setting in phase2.
+
+Phase 2 -- check integrity of allocation group allocation structures
+
+	zero the log if in no modify mode
+
+	sanity check ag headers -- superblocks match, agi isn't
+				trashed -- the agf and agfl
+				don't really matter because we can
+				just recreate them later.
+
+		Zero part of the superblock buffer if necessary
+
+		Walk the freeblock trees to get an
+			initial idea of what the fs thinks is free.
+			Files that disagree (claim free'd blocks)
+			can be salvaged or deleted.  If the btree is
+			internally inconsistent, when in doubt, mark
+			blocks free.  If they're used, they'll be stolen
+			back later.  don't have to check sibling pointers
+			for each level since we're going to regenerate
+			all the trees anyway.
+		Walk the inode allocation trees and
+			make sure they're ok, otherwise the sim
+			inode routines will probably just barf.
+			mark inode allocation tree blocks and ag header
+			blocks as used blocks.  If the trees are
+			corrupted, this phase will generate "uncertain"
+			inode chunks.  Those chunks go on a list and
+			will have to verified later.  Record the blocks
+			that are used to detect corruption and multiply
+			claimed blocks.  These trees will be regenerated
+			later.  Mark the blocks containing inodes referenced
+			by uncorrupted inode trees as being used by inodes.
+			The other blocks will get marked when/if the inodes
+			are verified.
+
+	calculate root and realtime inode numbers from the
+		filesystem geometry, fix up mount structure's
+		incore superblock if they're wrong.
+
+ASSUMPTION:  at end of phase 2, we've got superblocks and ag headers
+	that are not garbage (some data in them like counters and the
+	freeblock and inode trees may be inconsistent but the header
+	is readable and otherwise makes sense).
+
+XXX	if in no_modify mode, check for blocks claimed by one freespace
+	btree and not the other
+	
+Phase 3 -- traverse inodes to make the inodes, bmaps and freespace maps
+		consistent.  For each ag, use either the incore inode map or
+		scan the ag for inodes.
+		Let's use the incore inode map, now that we've made one
+		up in phase2.  If we lose the maps, we'll locate inodes
+		when we traverse the directory heirarchy.  If we lose both,
+		we could scan the disk.  Ugh.  Maybe make that a command-line
+		option that we support later.
+		
+	ASSUMPTION: we know if the ag allocation btrees are intact (phase 2)
+
+	First - Walk and clear the ag unlinked lists.  We'll process
+		the inodes later.  Check and make sure that the unlinked
+		lists reference known inodes.  If not, add to the list
+		of uncertain inodes.
+
+	Second, check the uncertain inode list generated in phase2 and
+		above and get them into the inode tree if they're good.
+		The incore inode cluster tree *always* has good
+		clusters (alignment, etc.) in it.
+		
+	Third, make sure that the root inode is known.  If not,
+		and we know the inode number from the superblock,
+		discover that inode and it's chunk.
+
+	Then, walk the incore inode-cluster tree.
+
+	Maintain an in-core bitmap over the entire fs for block allocation.
+
+	traverse each inode, make sure inode mode field matches free/allocated
+		bit in the incore inode allocation tree.  If there's a mismatch,
+		assume that the inode is in use.
+
+		- for each in-use inode, traverse each bmap/dir/attribute
+			map or tree.  Maintain a map (extent list?) for the
+			current inode.
+
+		- For each block marked as used, check to see if already known
+			(referenced by another file or directory) and sanity
+			check the contents of the block as well if possible
+			(in the case of meta-blocks).
+
+		- if the inode claims already used blocks, mark the blocks
+			as multiply claimed (duplicate) and go on.  the inode
+			will be cleared in phase 4.
+
+		- if metablocks are garbaged, clear the inode after
+			traversing what you can of the bmap and
+			proceed to next inode.  We don't have to worry
+			about trashing the maps or trees in cleared inodes
+			because the blocks will show up as free in the
+			ag freespace trees that we set up in phase 5.
+
+		- clear the di_next_unlinked pointer -- all unlinked
+			but active files go bye-bye.
+
+		- All blocks start out unknown.  We need the last state
+			in case we run into a case where we need to step
+			on a block to store filesystem meta-data and it
+			turns out later that it's referenced by some inode's
+			bmap.  In that case, the inode loses because we've
+			already trashed the block.  This shouldn't happen
+			in the first version unless some inode has a bogus
+			bmap referencing blocks in the ag header but the
+			4th state will keep us from inadvertently doing
+			something stupid in that case.
+
+		- If inode is allocated, mark all blocks allocated to the
+			current inode as allocated in the incore freespace
+			bitmap.
+
+ 		- If inode is good and a directory, scan through it to
+			find leaf entries and discover any unknown inodes.
+			
+			For shortform, we correct what we can.
+
+			If the directory is corrupt, we try and fix it in
+			place.  If it has zero good entries, then we blast it.
+
+			All unknown inodes get put onto the uncertain inode
+			list.  This is safe because we only put inodes onto
+			the list when we're processing known inodes so the
+			uncertain inode list isn't in use.
+
+			We fix only one problem -- an entry that has
+			a mathematically invalid inode numbers in them.
+			If that's the case, we replace the inode number
+			with NULLFSINO and we'll fix up the entry in
+			phase 6.
+
+			That info may conflict with the inode information,
+			but we'll straighten out any inconsistencies there
+			in phase4 when we process the inodes again.
+
+			Errors involving bogus forward/back links,
+			zero-length entries make the directory get
+			trashed.
+
+			if an entry references a free inode, ignore that
+			fact for now.  wait for the rest of phase 3
+			processing to hit that inode.  If it looks like it's
+			in use, we'll mark in use then.  If not, we'll
+			clear it and mark the inode map.  then in phase
+			4, you can depend on the inode map.
+	
+			Entries that point to non-existent or free
+			inodes, and extra blocks in the directory
+			will get fixed in place in a later pass.
+
+			Entries that point to a quota inode are
+			marked TBD.
+
+			If the directory internally points to the same
+			block twice, the directory gets blown away.
+
+	Note that processing uncertain inodes can add more inodes
+	to the uncertain list if they're directories.  So we loop
+	until the uncertain list is empty.
+
+	During inode verification, if the inode blocks are unknown,
+	mark then as in-use by inodes.
+
+XXX	HEURISTIC -- if we blow an inode away that has space,
+	assume that the freespace btree is now out of wack.
+	If it was ok earlier, it's certain to be wrong now.
+	And the odds of this space free cancelling out the
+	existing error is so small I'm willing to ignore it.
+	Should probably do this via a global var and complain
+	about this later.
+
+Assumption:  All known inodes are now marked as in-use or free.  Any
+	inodes that we haven't found by now are hosed (lost) since
+	we can't reach them via either the inode btrees or via directory
+	entries.
+
+	Directories are semi-clean.  All '.' entries are good.
+	Root '..' entry is good if root inode exists.  All entries
+	referencing non-existent inodes, free inodes, etc. 
+
+XXX	verify that either quota inode is 0 or NULLFSINO or
+	if sb quota flag is non zero, verify that quota inode
+	is NULLFSINO or is referencing a used, but disconnected
+	inode.
+
+XXX	if in no_modify mode, check for unclaimed blocks
+
+- Phase 4 - Check for inodes referencing duplicate blocks
+
+	At this point, all known duplicate blocks are marked in
+	the block map.  However, some of the claimed blocks in
+	the bmap may in fact be free because they belong to inodes
+	that have to be cleared either due to being a trashed
+	directory or because it's the first inode to claim a
+	block that was then claimed later.  There's a similar
+	problem with meta-data blocks that are referenced by
+	inode bmaps that are going to be freed once the inode
+	(or directory) gets cleared.
+
+	So at this point, we collect the duplicate blocks into
+	extents and put them into the duplicate extent list.
+
+	Mark the ag header blocks as in use.
+
+	We then process each inode twice -- the first time
+	we check to see if the inode claims a duplicate extent
+	and we do NOT set the block bitmap.  If the inode claims
+	a duplicate extent, we clear the inode.  Since the bitmap
+	hasn't been set, that automatically frees all blocks associated
+	with the cleared inode.  If the inode is ok, process it a second
+	time and set the bitmap since we know that this inode will live.
+
+	The unlinked list gets cleared in every inode at this point as
+	well.  We no longer need to preserve it since we've discovered
+	every inode we're going to find from it.
+
+	verify existence of root inode.  if it exists, check for
+	existence of "lost+found".  If it exists, mark the entry
+	to be deleted, and clear the inode.  All the inodes that
+	were connected to the lost+found will be reconnected later.
+
+XXX	HEURISTIC -- if we blow an inode away that has space,
+	assume that the freespace btree is now out of wack.
+	If it was ok earlier, it's certain to be wrong now.
+	And the odds of this space free cancelling out the
+	existing error is so small I'm willing to ignore it.
+	Should probably do this via a global var and complain
+	about this later.
+
+	Clear the quota inodes if the inode btree says that
+	they're not in use.  The space freed will get picked
+	up by phase 5.
+	
+XXX	Clear the quota inodes if the filesystem is being downgraded.
+
+- Phase 5 - Build inode allocation trees, freespace trees and
+		agfl's for each ag.  After this, we should be able to
+		unmount the filesystem and remount it for real.
+
+	For each ag: (if no in no_modify mode)
+
+	scan bitmap first to figure out number of extents.
+	
+	calculate space required for all trees.  Start with inode trees.
+	Setup the btree cursor which includes the list of preallocated
+	blocks.  As a by-product, this will delete the extents required
+	for the inode tree from the incore extent tree.
+	
+	Calculate how many extents will be required to represent the
+	remaining free extent tree on disk (twice, one for bybno and
+	one for bycnt).  You have to iterate on this because consuming
+	extents can alter the number of blocks required to represent
+	the remaining extents.  If there's slop left over, you can
+	put it in the agfl though.
+
+	Then, manually build the trees, agi, agfs, and agfls.
+
+XXX	if in no_modify mode, scan the on-disk inode allocation
+	trees and compare against the incore versions.  Don't have
+	to scan the freespace trees because we caught the problems
+	there in phase2 and phase3.  But if we cleared any inodes
+	with space during phases 3 or 4, now is the time to complain.
+
+XXX - 	Free duplicate extent lists. ???
+
+Assumptions:  at this point, sim code having to do with inode
+		creation/modification/deletion and space allocation
+		work because the inode maps, space maps, and bmaps
+		for all files in the filesystem are good.  The only
+		structures that are screwed up are the directory contents,
+		which means that lookup may not work for beans, the
+		root inode which exists but may be completely bogus and
+		the link counts on all inodes which may also be bogus.
+
+	Free the bitmap, the freespace tree.
+
+ 	Flash the incore inode tree over from parent list to having
+	full backpointers.
+
+	realtime processing, if any --
+
+		(Skip to below if running in no_modify mode).
+
+		Generate the realtime bitmap from the incore realtime
+		extent map and slam the info into the realtime bitmap
+		inode.  Generate summary info from the realtime extent map.
+		
+XXX		if in no_modify mode, compare contents of realtime bitmap
+		inode to the incore realtime extent map.  generate the
+		summary info from the incore realtime extent map.
+		compare against the contents of the realtime summary inode.
+		complain if bad.
+
+	reset superblock counters, sync version numbers
+
+- Phase 6 - directory traversal -- check reference counts,
+		attach disconnected inodes, fix up bogus directories
+
+	Assumptions:  all on-disk space and inode trees are structurally
+		sound.  Incore and on-disk inode trees agree on whether
+		an inode is in use.
+
+		Directories are structurally sound.  All hashvalues
+		are monotonically increasing and interior nodes are
+		correct so lookups work.  All legal directory entries
+		point to inodes that are in use and exist.  Shortform
+		directories are fine except that the links haven't been
+		checked for conflicts (cycles, ".." being correct, etc.).
+		Longform directories haven't been checked for those problems
+		either PLUS longform directories may still contain
+		entries beginning with '/'.  No zero-length entries
+		exist (they've been deleted or converted to '/').
+
+		Root directory may or may not exist.  orphange may
+		or may not exist.  Contents of either may be completely
+		bogus.
+
+		Entries may point to free or non-existent inodes.
+
+	At this we point, we may need new incore structures and
+		may be able to trash an old one (like the filesystem
+		block map)
+
+	If '/' is trashed, then reinitialize it.
+
+	If no realtime inodes, make them and if necessary, slam the
+		summary info into the realtime summary
+		inode.  Ditto with the realtime bitmap inode.
+	
+	Make orphanage (lost+found ???).
+
+	Traverse each directory from '/' (unless it was created).
+		Check directory structure and each directory entry.
+		If the entry is bogus (points to a non-existent or
+		free inode, for example), mark that entry TBD.  Maintain
+		link counts on all inodes.  Currently, traversal is
+		depth-first.
+
+		Mark every inode reached as "reached" (includes
+		bumping up link counts).
+
+		If a entry points to a directory but the parent (..)
+		disagrees, then blow away the entry.  if the directory
+		being pointed to winds up disconnected, it'll be moved
+		to the orphanage (and the link count incremented to
+		account for the link and the reached bit set then).
+
+		If an entry points to a directory that we've already
+		reached, then some entry is bad and should be blown
+		away.  It's easiest to blow away the current entry
+		plus since presumably the parent entry in the
+		reached directory points to another directory,
+		then it's far more likely that the current
+		entry is bogus (otherwise the parent should point
+		at it).
+
+		If an entry points to a non-existent of free inode,
+		blow the entry away.
+
+		Every time a good entry is encountered update the
+		link count for the inode that the entry points to.
+
+	After traversal, scan incore inode map for directories not
+		reached.  Go to first one and try and find it's root
+		by following .. entries.  Once at root, run traversal
+		algorithm.  When algorithm terminates, move subtree
+		root inode to the orphanage.  Repeat as necessary
+		until all disconnected directories are attached.
+
+	Move all disconnected inodes to orphanage.
+
+- Phase 7:  reset reference counts if required.
+
+	Now traverse the on-disk inodes again, and make sure on-disk
+		reference counts are correct.  Reset if necessary.
+
+		SKIP all unused inodes -- that also makes us
+		skip the orphanage inode which we think is
+		unused but is really used.  However, the ref counts
+		on that should be right so that's ok.
+
+---
+
+multiple TB xfs_repair
+
+modify above to work in a couple of AGs at a time.  The bitmaps
+should span only the current set of AGs.
+
+The key it scan the inode bmaps and keep a list of inodes
+that span multiple AG sets and keep the list in a data structure
+that's keyed off AG set # as well as inode # and also has a bit
+to indicate whether or not the inode will be cleared.
+
+Then in each AG set, when doing duplicate extent processing,
+you have to process all multi-AG-set inodes that claim blocks in
+the current AG set.  If there's a conflict, you mark clear the
+inode in the current AG and you mark the multi-AG inode as
+"to be cleared".
+
+After going through all AGs, you can clear the to-be-cleared
+multi-AG-set inodes and pull them off the list.
+
+When building up the AG freespace trees, you walk the bmaps
+of all multi-AG-set inodes that are in the AG-set and include
+blocks claimed in the AG by the inode as used.
+
+This probably involves adding a phase 3-0 which would have to
+check all the inodes to see which ones are multi-AG-set inodes
+and set up the multi-AG-set inode data structure.  Plus the
+process_dinode routines may have to be altered just a bit
+to do the right thing if running in tera-byte mode (call
+out to routines that check the multi-AG-set inodes when
+appropriate).
+
+To make things go faster, phase 3-0 could probably run
+in parallel.  It should be possible to run phases 2-5
+in parallel as well once the appropriate synchronization
+is added to the incore routines and the static directory
+leaf block bitmap is changed to be on the stack.
+
+Phase 7 probably can be in parallel as well.
+
+By in parallel, I mean that assuming that an AG-set
+contains 4 AGs, you could run 4 threads, 1 per AG
+in parallel to process the AG set.
+
+I don't see how phase 6 can be run in parallel though.
+
+And running Phase 8 in parallel is just silly.
+
diff --git a/repair/agheader.c b/repair/agheader.c
new file mode 100644
index 000000000..0a4200f7c
--- /dev/null
+++ b/repair/agheader.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+int
+verify_set_agf(xfs_mount_t *mp, xfs_agf_t *agf, xfs_agnumber_t i)
+{
+	xfs_drfsbno_t agblocks;
+	int retval = 0;
+
+	/* check common fields */
+
+	if (INT_GET(agf->agf_magicnum, ARCH_CONVERT) != XFS_AGF_MAGIC)  {
+		retval = XR_AG_AGF;
+		do_warn("bad magic # 0x%x for agf %d\n", INT_GET(agf->agf_magicnum, ARCH_CONVERT), i);
+
+		if (!no_modify)
+			INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
+	}
+
+	if (!XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT)))  {
+		retval = XR_AG_AGF;
+		do_warn("bad version # %d for agf %d\n",
+			INT_GET(agf->agf_versionnum, ARCH_CONVERT), i);
+
+		if (!no_modify)
+			INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
+	}
+
+	if (INT_GET(agf->agf_seqno, ARCH_CONVERT) != i)  {
+		retval = XR_AG_AGF;
+		do_warn("bad sequence # %d for agf %d\n", INT_GET(agf->agf_seqno, ARCH_CONVERT), i);
+
+		if (!no_modify)
+			INT_SET(agf->agf_seqno, ARCH_CONVERT, i);
+	}
+
+	if (INT_GET(agf->agf_length, ARCH_CONVERT) != mp->m_sb.sb_agblocks)  {
+		if (i != mp->m_sb.sb_agcount - 1)  {
+			retval = XR_AG_AGF;
+			do_warn("bad length %d for agf %d, should be %d\n",
+				INT_GET(agf->agf_length, ARCH_CONVERT), i, mp->m_sb.sb_agblocks);
+			if (!no_modify)
+				INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_agblocks);
+		} else  {
+			agblocks = mp->m_sb.sb_dblocks -
+				(xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
+
+			if (INT_GET(agf->agf_length, ARCH_CONVERT) != agblocks)  {
+				retval = XR_AG_AGF;
+				do_warn(
+			"bad length %d for agf %d, should be %llu\n",
+					INT_GET(agf->agf_length, ARCH_CONVERT), i, agblocks);
+				if (!no_modify)
+					INT_SET(agf->agf_length, ARCH_CONVERT, (xfs_agblock_t) agblocks);
+			}
+		}
+	}
+
+	/*
+	 * check first/last AGF fields.  if need be, lose the free
+	 * space in the AGFL, we'll reclaim it later.
+	 */
+	if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) >= XFS_AGFL_SIZE)  {
+		do_warn("flfirst %d in agf %d too large (max = %d)\n",
+			INT_GET(agf->agf_flfirst, ARCH_CONVERT), i, XFS_AGFL_SIZE);
+		if (!no_modify)
+			INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
+	}
+
+	if (INT_GET(agf->agf_fllast, ARCH_CONVERT) >= XFS_AGFL_SIZE)  {
+		do_warn("fllast %d in agf %d too large (max = %d)\n",
+			INT_GET(agf->agf_fllast, ARCH_CONVERT), i, XFS_AGFL_SIZE);
+		if (!no_modify)
+			INT_ZERO(agf->agf_fllast, ARCH_CONVERT);
+	}
+
+	/* don't check freespace btrees -- will be checked by caller */
+
+	return(retval);
+}
+
+int
+verify_set_agi(xfs_mount_t *mp, xfs_agi_t *agi, xfs_agnumber_t i)
+{
+	xfs_drfsbno_t agblocks;
+	int retval = 0;
+
+	/* check common fields */
+
+	if (INT_GET(agi->agi_magicnum, ARCH_CONVERT) != XFS_AGI_MAGIC)  {
+		retval = XR_AG_AGI;
+		do_warn("bad magic # 0x%x for agi %d\n", INT_GET(agi->agi_magicnum, ARCH_CONVERT), i);
+
+		if (!no_modify)
+			INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
+	}
+
+	if (!XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT)))  {
+		retval = XR_AG_AGI;
+		do_warn("bad version # %d for agi %d\n",
+			INT_GET(agi->agi_versionnum, ARCH_CONVERT), i);
+
+		if (!no_modify)
+			INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
+	}
+
+	if (INT_GET(agi->agi_seqno, ARCH_CONVERT) != i)  {
+		retval = XR_AG_AGI;
+		do_warn("bad sequence # %d for agi %d\n", INT_GET(agi->agi_seqno, ARCH_CONVERT), i);
+
+		if (!no_modify)
+			INT_SET(agi->agi_seqno, ARCH_CONVERT, i);
+	}
+
+	if (INT_GET(agi->agi_length, ARCH_CONVERT) != mp->m_sb.sb_agblocks)  {
+		if (i != mp->m_sb.sb_agcount - 1)  {
+			retval = XR_AG_AGI;
+			do_warn("bad length # %d for agi %d, should be %d\n",
+				INT_GET(agi->agi_length, ARCH_CONVERT), i, mp->m_sb.sb_agblocks);
+			if (!no_modify)
+				INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_agblocks);
+		} else  {
+			agblocks = mp->m_sb.sb_dblocks -
+				(xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
+
+			if (INT_GET(agi->agi_length, ARCH_CONVERT) != agblocks)  {
+				retval = XR_AG_AGI;
+				do_warn(
+			"bad length # %d for agi %d, should be %llu\n",
+					INT_GET(agi->agi_length, ARCH_CONVERT), i, agblocks);
+				if (!no_modify)
+					INT_SET(agi->agi_length, ARCH_CONVERT, (xfs_agblock_t) agblocks);
+			}
+		}
+	}
+
+	/* don't check inode btree -- will be checked by caller */
+
+	return(retval);
+}
+
+/*
+ * superblock comparison - compare arbitrary superblock with
+ *			filesystem mount-point superblock
+ *
+ * the verified fields include id and geometry.
+
+ * the inprogress fields, version numbers, and counters
+ * are allowed to differ as well as all fields after the
+ * counters to cope with the pre-6.5 mkfs non-bzeroed
+ * secondary superblock sectors.
+ */
+
+int
+compare_sb(xfs_mount_t *mp, xfs_sb_t *sb)
+{
+	fs_geometry_t fs_geo, sb_geo;
+
+	get_sb_geometry(&fs_geo, &mp->m_sb);
+	get_sb_geometry(&sb_geo, sb);
+
+	if (memcmp(&fs_geo, &sb_geo,
+		   (char *) &fs_geo.sb_shared_vn - (char *) &fs_geo))
+		return(XR_SB_GEO_MISMATCH);
+
+	return(XR_OK);
+}
+
+/*
+ * possible fields that may have been set at mkfs time,
+ * sb_inoalignmt, sb_unit, sb_width.  We know that
+ * the quota inode fields in the secondaries should be zero.
+ * Likewise, the sb_flags and sb_shared_vn should also be
+ * zero and the shared version bit should be cleared for
+ * current mkfs's.
+ *
+ * And everything else in the buffer beyond sb_width should
+ * be zeroed.
+ */
+int
+secondary_sb_wack(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
+	xfs_agnumber_t i)
+{
+	int do_bzero;
+	int size;
+	int *ip;
+	int rval;
+
+	rval = do_bzero = 0;
+
+	/*
+	 * mkfs's that stamped a feature bit besides the ones in the mask
+	 * (e.g. were pre-6.5 beta) could leave garbage in the secondary
+	 * superblock sectors.  Anything stamping the shared fs bit or better
+	 * into the secondaries is ok and should generate clean secondary
+	 * superblock sectors.  so only run the bzero check on the
+	 * potentially garbaged secondaries.
+	 */
+	if (pre_65_beta ||
+	    (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK) == 0 ||
+	    sb->sb_versionnum < XFS_SB_VERSION_4)  {
+		/*
+		 * check for garbage beyond the last field set by the
+		 * pre-6.5 mkfs's.  Don't blindly use sizeof(sb).
+		 * Use field addresses instead so this code will still
+		 * work against older filesystems when the superblock
+		 * gets rev'ed again with new fields appended.
+		 */
+		size = (__psint_t)&sb->sb_width + sizeof(sb->sb_width)
+			- (__psint_t)sb;
+		for (ip = (int *)((__psint_t)sb + size);
+		     ip < (int *)((__psint_t)sb + mp->m_sb.sb_sectsize);
+		     ip++)  {
+			if (*ip)  {
+				do_bzero = 1;
+				break;
+			}
+		}
+
+		if (do_bzero)  {
+			rval |= XR_AG_SB_SEC;
+			if (!no_modify)  {
+				do_warn(
+		"zeroing unused portion of secondary superblock %d sector\n",
+					i);
+				bzero((void *)((__psint_t)sb + size),
+					mp->m_sb.sb_sectsize - size);
+			} else
+				do_warn(
+		"would zero unused portion of secondary superblock %d sector\n",
+					i);
+		}
+	}
+
+	/*
+	 * now look for the fields we can manipulate directly.
+	 * if we did a bzero and that bzero could have included
+	 * the field in question, just silently reset it.  otherwise,
+	 * complain.
+	 *
+	 * for now, just zero the flags field since only
+	 * the readonly flag is used
+	 */
+	if (sb->sb_flags)  {
+		if (!no_modify)
+			sb->sb_flags = 0;
+		if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+			rval |= XR_AG_SB;
+			do_warn("bad flags field in superblock %d\n", i);
+		} else
+			rval |= XR_AG_SB_SEC;
+	}
+
+	/*
+	 * quota inodes and flags in secondary superblocks
+	 * are never set by mkfs.  However, they could be set
+	 * in a secondary if a fs with quotas was growfs'ed since
+	 * growfs copies the new primary into the secondaries.
+	 */
+	if (sb->sb_inprogress == 1 && sb->sb_uquotino)  {
+		if (!no_modify)
+			sb->sb_uquotino = 0;
+		if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+			rval |= XR_AG_SB;
+			do_warn(
+			"non-null user quota inode field in superblock %d\n",
+				i);
+		} else
+			rval |= XR_AG_SB_SEC;
+	}
+
+	if (sb->sb_inprogress == 1 && sb->sb_pquotino)  {
+		if (!no_modify)
+			sb->sb_pquotino = 0;
+		if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+			rval |= XR_AG_SB;
+			do_warn(
+			"non-null project quota inode field in superblock %d\n",
+				i);
+		} else
+			rval |= XR_AG_SB_SEC;
+	}
+
+	if (sb->sb_inprogress == 1 && sb->sb_qflags)  {
+		if (!no_modify)
+			sb->sb_qflags = 0;
+		if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+			rval |= XR_AG_SB;
+			do_warn("non-null quota flags in superblock %d\n", i);
+		} else
+			rval |= XR_AG_SB_SEC;
+	}
+
+	/*
+	 * if the secondaries agree on a stripe unit/width or inode
+	 * alignment, those fields ought to be valid since they are
+	 * written at mkfs time (and the corresponding sb version bits
+	 * are set).
+	 */
+	if (!XFS_SB_VERSION_HASSHARED(sb) && sb->sb_shared_vn != 0)  {
+		if (!no_modify)
+			sb->sb_shared_vn = 0;
+		if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+			rval |= XR_AG_SB;
+			do_warn("bad shared version number in superblock %d\n",
+				i);
+		} else
+			rval |= XR_AG_SB_SEC;
+	}
+
+	if (!XFS_SB_VERSION_HASALIGN(sb) && sb->sb_inoalignmt != 0)  {
+		if (!no_modify)
+			sb->sb_inoalignmt = 0;
+		if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+			rval |= XR_AG_SB;
+			do_warn("bad inode alignment field in superblock %d\n",
+				i);
+		} else
+			rval |= XR_AG_SB_SEC;
+	}
+
+	if (!XFS_SB_VERSION_HASDALIGN(sb) &&
+	    (sb->sb_unit != 0 || sb->sb_width != 0))  {
+		if (!no_modify)
+			sb->sb_unit = sb->sb_width = 0;
+		if (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK || !do_bzero)  {
+			rval |= XR_AG_SB;
+			do_warn(
+			"bad stripe unit/width fields in superblock %d\n",
+				i);
+		} else
+			rval |= XR_AG_SB_SEC;
+	}
+
+	return(rval);
+}
+
+/*
+ * verify and reset the ag header if required.
+ *
+ * lower 4 bits of rval are set depending on what got modified.
+ * (see agheader.h for more details)
+ *
+ * NOTE -- this routine does not tell the user that it has
+ * altered things.  Rather, it is up to the caller to do so
+ * using the bits encoded into the return value.
+ */
+
+int
+verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
+	xfs_agf_t *agf, xfs_agi_t *agi, xfs_agnumber_t i)
+{
+	int rval = 0;
+	int status = XR_OK;
+	int status_sb = XR_OK;
+
+	status = verify_sb(sb, (i == 0));
+
+	if (status != XR_OK)  {
+		do_warn("bad on-disk superblock %d - %s\n",
+			i, err_string(status));
+	}
+
+	status_sb = compare_sb(mp, sb);
+
+	if (status_sb != XR_OK)  {
+		do_warn("primary and secondary superblock %d conflict - %s\n",
+			i, err_string(status_sb));
+	}
+
+	if (status != XR_OK || status_sb != XR_OK)  {
+		if (!no_modify)  {
+			*sb = mp->m_sb;
+
+			/*
+			 * clear the more transient fields
+			 */
+			sb->sb_inprogress = 1;
+
+			sb->sb_icount = 0;
+			sb->sb_ifree = 0;
+			sb->sb_fdblocks = 0;
+			sb->sb_frextents = 0;
+
+			sb->sb_qflags = 0;
+		}
+
+		rval |= XR_AG_SB;
+	}
+
+	rval |= secondary_sb_wack(mp, sbuf, sb, i);
+
+	rval |= verify_set_agf(mp, agf, i);
+	rval |= verify_set_agi(mp, agi, i);
+
+	return(rval);
+}
diff --git a/repair/agheader.h b/repair/agheader.h
new file mode 100644
index 000000000..48326f7e8
--- /dev/null
+++ b/repair/agheader.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef struct fs_geometry  {
+	/*
+	 * these types should match the superblock types
+	 */
+	__uint32_t	sb_blocksize;	/* blocksize (bytes) */
+	xfs_drfsbno_t	sb_dblocks;	/* # data blocks */
+	xfs_drfsbno_t	sb_rblocks;	/* # realtime blocks */
+	xfs_drtbno_t	sb_rextents;	/* # realtime extents */
+	uuid_t		sb_uuid;	/* fs uuid */
+	xfs_dfsbno_t	sb_logstart;	/* starting log block # */
+	xfs_agblock_t	sb_rextsize;	/* realtime extent size (blocks )*/
+	xfs_agblock_t	sb_agblocks;	/* # of blocks per ag */
+	xfs_agnumber_t	sb_agcount;	/* # of ags */
+	xfs_extlen_t	sb_rbmblocks;	/* # of rt bitmap blocks */
+	xfs_extlen_t	sb_logblocks;	/* # of log blocks */
+	__uint16_t	sb_sectsize;	/* volume sector size (bytes) */
+	__uint16_t	sb_inodesize;	/* inode size (bytes) */
+	__uint8_t	sb_imax_pct;	/* max % of fs for inode space */
+
+	/*
+	 * these don't have to match the superblock types but are placed
+	 * before sb_shared_vn because these values don't have to be
+	 * checked manually.  These variables will be set only on
+	 * filesystems with dependably good (fully initialized)
+	 * secondary superblock sectors, will be stamped in all
+	 * superblocks at mkfs time, and are features that cannot
+	 * be downgraded unless all superblocks in the filesystem
+	 * are rewritten.
+	 */
+	int		sb_extflgbit;	/* extent flag feature bit set */
+
+	/*
+	 * fields after this point have to be checked manually in compare_sb()
+	 */
+	__uint8_t	sb_shared_vn;	/* shared version number */
+	xfs_extlen_t	sb_inoalignmt;	/* inode chunk alignment, fsblocks */
+	__uint32_t	sb_unit;	/* stripe or raid unit */
+	__uint32_t	sb_width;	/* stripe or width unit */
+
+	/*
+	 * these don't have to match, they track superblock properties
+	 * that could have been upgraded and/or downgraded during
+	 * run-time so that the primary superblock has them but the
+	 * secondaries do not.
+	 * Plus, they have associated data fields whose data fields may
+	 * be corrupt in cases where the filesystem was made on a
+	 * pre-6.5 campus alpha mkfs and the feature was enabled on
+	 * the filesystem later.
+	 */
+	int		sb_ialignbit;	/* sb has inode alignment bit set */
+	int		sb_salignbit;	/* sb has stripe alignment bit set */
+	int		sb_sharedbit;	/* sb has inode alignment bit set */
+
+	int		sb_fully_zeroed; /* has zeroed secondary sb sectors */
+} fs_geometry_t;
+
+typedef struct fs_geo_list  {
+	struct fs_geo_list	*next;
+	int			refs;
+	int			index;
+	fs_geometry_t		geo;
+} fs_geo_list_t;
+
+/*
+ * fields for sb_last_nonzero
+ */
+
+#define XR_SB_COUNTERS		0x0001
+#define XR_SB_INOALIGN		0x0002
+#define XR_SB_SALIGN		0x0004
+
+/*
+ * what got modified by verify_set_* routines
+ */
+
+#define XR_AG_SB	0x1
+#define XR_AG_AGF	0x2
+#define XR_AG_AGI	0x4
+#define XR_AG_SB_SEC	0x8
+
+
diff --git a/repair/attr_repair.c b/repair/attr_repair.c
new file mode 100644
index 000000000..d64230b09
--- /dev/null
+++ b/repair/attr_repair.c
@@ -0,0 +1,1067 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <errno.h>
+#include <acl.h>
+
+#include "globals.h"
+#include "err_protos.h"
+#include "dir.h"
+#include "dinode.h"
+#include "bmap.h"
+
+static int acl_valid(struct acl *aclp);
+static int mac_valid(mac_t lp);
+
+
+/*
+ * For attribute repair, there are 3 formats to worry about. First, is 
+ * shortform attributes which reside in the inode. Second is the leaf
+ * form, and lastly the btree. Much of this models after the directory
+ * structure so code resembles the directory repair cases. 
+ * For shortform case, if an attribute looks corrupt, it is removed.
+ * If that leaves the shortform down to 0 attributes, it's okay and 
+ * will appear to just have a null attribute fork. Some checks are done
+ * for validity of the value field based on what the security needs are.
+ * Calls will be made out to mac_valid or acl_valid libc libraries if
+ * the security attributes exist. They will be cleared if invalid. No
+ * other values will be checked. The DMF folks do not have current
+ * requirements, but may in the future.
+ *
+ * For leaf block attributes, it requires more processing. One sticky
+ * point is that the attributes can be local (within the leaf) or 
+ * remote (outside the leaf in other blocks). Thinking of local only
+ * if you get a bad attribute, and want to delete just one, its a-okay
+ * if it remains large enough to still be a leaf block attribute. Otherwise,
+ * it may have to be converted to shortform. How to convert this and when
+ * is an issue. This call is happening in Phase3. Phase5 will capture empty
+ * blocks, but Phase6 allows you to use the simulation library which knows
+ * how to handle attributes in the kernel for converting formats. What we
+ * could do is mark an attribute to be cleared now, but in phase6 somehow
+ * have it cleared for real and then the format changed to shortform if
+ * applicable. Since this requires more work than I anticipate can be
+ * accomplished for the next release, we will instead just say any bad
+ * attribute in the leaf block will make the entire attribute fork be
+ * cleared. The simplest way to do that is to ignore the leaf format, and
+ * call clear_dinode_attr to just make a shortform attribute fork with
+ * zero entries. 
+ *
+ * Another issue with handling repair on leaf attributes is the remote
+ * blocks. To make sure that they look good and are not used multiple times
+ * by the attribute fork, some mechanism to keep track of all them is necessary.
+ * Do this in the future, time permitting. For now, note that there is no
+ * check for remote blocks and their allocations.
+ *
+ * For btree formatted attributes, the model can follow directories. That
+ * would mean go down the tree to the leftmost leaf. From there moving down
+ * the links and processing each. They would call back up the tree, to verify
+ * that the tree structure is okay. Any problems will result in the attribute
+ * fork being emptied and put in shortform format.
+ */
+
+/*
+ * This routine just checks what security needs are for attribute values
+ * only called when root flag is set, otherwise these names could exist in
+ * in user attribute land without a conflict.
+ * If value is non-zero, then a remote attribute is being passed in
+ */
+
+int
+valuecheck(char *namevalue, char *value, int namelen, int valuelen)
+{
+	/* for proper alignment issues, get the structs and bcopy the values */
+	mac_label macl;
+	struct acl thisacl;
+	void *valuep;
+	int clearit = 0;
+
+	if ((strncmp(namevalue, SGI_ACL_FILE, SGI_ACL_FILE_SIZE) == 0) || 
+			(strncmp(namevalue, SGI_ACL_DEFAULT, 
+				SGI_ACL_DEFAULT_SIZE) == 0)) {
+		if (value == NULL) {	
+			bzero(&thisacl, sizeof(struct acl));
+			bcopy(namevalue+namelen, &thisacl, valuelen);
+			valuep = &thisacl;
+		} else
+			valuep = value;
+
+		if (acl_valid((struct acl *) valuep) != 0) { /* 0 means valid */
+			clearit = 1;
+			do_warn("entry contains illegal value in attribute named SGI_ACL_FILE or SGI_ACL_DEFAULT\n");
+		}
+	} else if (strncmp(namevalue, SGI_MAC_FILE, SGI_MAC_FILE_SIZE) == 0) {
+		if (value == NULL) {
+			bzero(&macl, sizeof(mac_label));
+			bcopy(namevalue+namelen, &macl, valuelen);
+			valuep = &macl;
+		} else 
+			valuep = value;
+
+		if (mac_valid((mac_label *) valuep) != 1) { /* 1 means valid */
+			 /*
+			 *if sysconf says MAC enabled, 
+			 *	temp = mac_from_text("msenhigh/mintlow", NULL)
+			 *	copy it to value, update valuelen, totsize
+			 *	This causes pushing up or down of all following
+			 *	attributes, forcing a attribute format change!!
+			 * else clearit = 1;
+			 */
+			clearit = 1;
+			do_warn("entry contains illegal value in attribute named SGI_MAC_LABEL\n");
+		}
+	} else if (strncmp(namevalue, SGI_CAP_FILE, SGI_CAP_FILE_SIZE) == 0) {
+		if ( valuelen != sizeof(cap_set_t)) {
+			clearit = 1;
+			do_warn("entry contains illegal value in attribute named SGI_CAP_FILE\n");
+		}
+	}
+
+	return(clearit);
+}
+
+
+/*
+ * this routine validates the attributes in shortform format.
+ * a non-zero return repair value means certain attributes are bogus
+ * and were cleared if possible. Warnings do not generate error conditions
+ * if you cannot modify the structures. repair is set to 1, if anything
+ * was fixed.
+ */
+int
+process_shortform_attr(
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int 		*repair)	
+{
+	xfs_attr_shortform_t	*asf;
+	xfs_attr_sf_entry_t	*currententry, *nextentry, *tempentry;
+	int			i, junkit;
+	int			currentsize, remainingspace;
+	
+	*repair = 0;
+
+	asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+
+	/* Assumption: hdr.totsize is less than a leaf block and was checked
+	 * by lclinode for valid sizes. Check the count though.	
+	*/
+	if (INT_GET(asf->hdr.count, ARCH_CONVERT) == 0) 
+		/* then the total size should just be the header length */
+		if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) != sizeof(xfs_attr_sf_hdr_t)) {
+			/* whoops there's a discrepancy. Clear the hdr */
+			if (!no_modify) {
+				do_warn("there are no attributes in the fork for inode %llu \n", ino);
+				INT_SET(asf->hdr.totsize, ARCH_CONVERT,
+						sizeof(xfs_attr_sf_hdr_t));
+				*repair = 1;
+				return(1); 	
+			} else {
+				do_warn("would junk the attribute fork since the count is 0 for inode %llu\n",ino);
+				return(1);
+			}
+                }
+		
+	currentsize = sizeof(xfs_attr_sf_hdr_t); 
+	remainingspace = INT_GET(asf->hdr.totsize, ARCH_CONVERT) - currentsize;
+	nextentry = &asf->list[0];
+	for (i = 0; i < INT_GET(asf->hdr.count, ARCH_CONVERT); i++)  {
+		currententry = nextentry;
+		junkit = 0;
+
+		/* don't go off the end if the hdr.count was off */
+		if ((currentsize + (sizeof(xfs_attr_sf_entry_t) - 1)) > 
+				INT_GET(asf->hdr.totsize, ARCH_CONVERT))
+			break; /* get out and reset count and totSize */
+
+		/* if the namelen is 0, can't get to the rest of the entries */
+		if (INT_GET(currententry->namelen, ARCH_CONVERT) == 0) {
+			do_warn("zero length name entry in attribute fork, ");
+			if (!no_modify) {
+				do_warn("truncating attributes for inode %llu to %d \n", ino, i);
+				*repair = 1;
+				break; 	/* and then update hdr fields */
+			} else {
+				do_warn("would truncate attributes for inode %llu to %d \n", ino, i);
+				break;
+			}
+		} else {
+			/* It's okay to have a 0 length valuelen, but do a
+			 * rough check to make sure we haven't gone outside of
+			 * totsize.
+			 */
+			if ((remainingspace < INT_GET(currententry->namelen, ARCH_CONVERT)) ||
+				((remainingspace - INT_GET(currententry->namelen, ARCH_CONVERT))
+					  < INT_GET(currententry->valuelen, ARCH_CONVERT))) {
+				do_warn("name or value attribute lengths are too large, \n");
+				if (!no_modify) {
+					do_warn(" truncating attributes for inode %llu to %d \n", ino, i);
+					*repair = 1; 
+					break; /* and then update hdr fields */
+				} else {
+					do_warn(" would truncate attributes for inode %llu to %d \n", ino, i);	
+					break;
+				}	
+			}
+		}
+	
+		/* namecheck checks for / and null terminated for file names. 
+		 * attributes names currently follow the same rules.
+		*/
+		if (namecheck((char *)&currententry->nameval[0], 
+				INT_GET(currententry->namelen, ARCH_CONVERT)))  {
+			do_warn("entry contains illegal character in shortform attribute name\n");
+			junkit = 1;
+		}
+
+		if (INT_GET(currententry->flags, ARCH_CONVERT) & XFS_ATTR_INCOMPLETE) {
+			do_warn("entry has INCOMPLETE flag on in shortform attribute\n");
+			junkit = 1;
+		}
+
+		/* Only check values for root security attributes */
+		if (INT_GET(currententry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) 
+		       junkit = valuecheck((char *)&currententry->nameval[0], NULL, 
+				INT_GET(currententry->namelen, ARCH_CONVERT), INT_GET(currententry->valuelen, ARCH_CONVERT));
+
+		remainingspace = remainingspace - 
+				XFS_ATTR_SF_ENTSIZE(currententry);
+
+		if (junkit) {
+			if (!no_modify) {
+				/* get rid of only this entry */
+				do_warn("removing attribute entry %d for inode %llu \n", i, ino);
+				tempentry = (xfs_attr_sf_entry_t *)
+					((__psint_t) currententry +
+					 XFS_ATTR_SF_ENTSIZE(currententry));
+				memmove(currententry,tempentry,remainingspace);
+				INT_MOD(asf->hdr.count, ARCH_CONVERT, -1);
+				i--; /* no worries, it will wrap back to 0 */
+				*repair = 1;
+				continue; /* go back up now */
+			} else { 
+				do_warn("would remove attribute entry %d for inode %llu \n", i, ino);
+                        }
+                }
+
+		/* Let's get ready for the next entry... */
+		nextentry = (xfs_attr_sf_entry_t *)
+			 ((__psint_t) nextentry +
+			 XFS_ATTR_SF_ENTSIZE(currententry));
+		currentsize = currentsize + XFS_ATTR_SF_ENTSIZE(currententry);
+	
+		} /* end the loop */
+
+	
+	if (INT_GET(asf->hdr.count, ARCH_CONVERT) != i)  {
+		if (no_modify)  {
+			do_warn("would have corrected attribute entry count in inode %llu from %d to %d\n",
+				ino, INT_GET(asf->hdr.count, ARCH_CONVERT), i);
+		} else  {
+			do_warn("corrected attribute entry count in inode %llu, was %d, now %d\n",
+				ino, INT_GET(asf->hdr.count, ARCH_CONVERT), i);
+			INT_SET(asf->hdr.count, ARCH_CONVERT, i);
+			*repair = 1;
+		}
+	}
+	
+	/* ASSUMPTION: currentsize <= totsize */
+	if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) != currentsize)  {
+		if (no_modify)  {
+			do_warn("would have corrected attribute totsize in inode %llu from %d to %d\n",
+				ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT), currentsize);
+		} else  {
+			do_warn("corrected attribute entry totsize in inode %llu, was %d, now %d\n",
+				ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT), currentsize);
+			INT_SET(asf->hdr.totsize, ARCH_CONVERT, currentsize);
+			*repair = 1;
+		}
+	}
+
+	return(*repair);
+}
+
+/* This routine brings in blocks from disk one by one and assembles them
+ * in the value buffer. If get_bmapi gets smarter later to return an extent
+ * or list of extents, that would be great. For now, we don't expect too
+ * many blocks per remote value, so one by one is sufficient.
+ */
+static int
+rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap,
+		xfs_dablk_t blocknum, int valuelen, char* value)
+{
+	xfs_dfsbno_t	bno;
+	xfs_buf_t	*bp;
+	int		clearit = 0, i = 0, length = 0, amountdone = 0;
+	
+	/* ASSUMPTION: valuelen is a valid number, so use it for looping */
+	/* Note that valuelen is not a multiple of blocksize */  
+	while (amountdone < valuelen) {
+		bno = blkmap_get(blkmap, blocknum + i);
+		if (bno == NULLDFSBNO) {
+			do_warn("remote block for attributes of inode %llu"
+				" is missing\n", ino);
+			clearit = 1;
+			break;
+		}
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+		if (!bp) {
+			do_warn("can't read remote block for attributes"
+				" of inode %llu\n", ino);
+			clearit = 1;
+			break;
+		}
+		ASSERT(mp->m_sb.sb_blocksize == XFS_BUF_COUNT(bp));
+		length = MIN(XFS_BUF_COUNT(bp), valuelen - amountdone);
+		bcopy(XFS_BUF_PTR(bp), value, length); 
+		amountdone += length;
+		value += length;
+		i++;
+		libxfs_putbuf(bp);
+	}
+	return (clearit);
+}
+
+/*
+ * freespace map for directory and attribute leaf blocks (1 bit per byte)
+ * 1 == used, 0 == free
+ */
+static da_freemap_t attr_freemap[DA_BMAP_SIZE];
+
+/* The block is read in. The magic number and forward / backward
+ * links are checked by the caller process_leaf_attr.
+ * If any problems occur the routine returns with non-zero. In
+ * this case the next step is to clear the attribute fork, by
+ * changing it to shortform and zeroing it out. Forkoff need not
+ * be changed. 
+ */
+
+int
+process_leaf_attr_block(
+	xfs_mount_t	*mp,
+	xfs_attr_leafblock_t *leaf,
+	xfs_dablk_t	da_bno,
+	xfs_ino_t	ino,
+	blkmap_t	*blkmap,
+	xfs_dahash_t	last_hashval,
+	xfs_dahash_t	*current_hashval,
+	int 		*repair)	
+{
+	xfs_attr_leaf_entry_t *entry;
+	xfs_attr_leaf_name_local_t *local;
+	xfs_attr_leaf_name_remote_t *remotep;
+	int  i, start, stop, clearit, usedbs, firstb, thissize;
+
+	clearit = usedbs = 0;
+	*repair = 0;
+	firstb = mp->m_sb.sb_blocksize; 
+	stop = sizeof(xfs_attr_leaf_hdr_t);
+
+	/* does the count look sorta valid? */
+	if (INT_GET(leaf->hdr.count, ARCH_CONVERT)
+				* sizeof(xfs_attr_leaf_entry_t)
+				+ sizeof(xfs_attr_leaf_hdr_t)
+							> XFS_LBSIZE(mp)) {
+		do_warn("bad attribute count %d in attr block %u, inode %llu\n",
+			(int) INT_GET(leaf->hdr.count, ARCH_CONVERT),
+						da_bno, ino);
+		return (1);
+	}
+ 
+	init_da_freemap(attr_freemap);
+	(void) set_da_freemap(mp, attr_freemap, 0, stop);
+	
+	/* go thru each entry checking for problems */
+	for (i = 0, entry = &leaf->entries[0]; 
+			i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
+						i++, entry++) {
+	
+		/* check if index is within some boundary. */
+		if (INT_GET(entry->nameidx, ARCH_CONVERT) > XFS_LBSIZE(mp)) {
+			do_warn("bad attribute nameidx %d in attr block %u, inode %llu\n",
+				(int)INT_GET(entry->nameidx, ARCH_CONVERT),
+				da_bno,ino);
+			clearit = 1;
+			break;
+			}
+
+		if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_INCOMPLETE) {
+			/* we are inconsistent state. get rid of us */
+			do_warn("attribute entry #%d in attr block %u, inode %llu is INCOMPLETE\n",
+				i, da_bno, ino);
+			clearit = 1;
+			break;
+			}
+
+		/* mark the entry used */
+		start = (__psint_t)&leaf->entries[i] - (__psint_t)leaf;
+		stop = start + sizeof(xfs_attr_leaf_entry_t);
+		if (set_da_freemap(mp, attr_freemap, start, stop))  {
+			do_warn("attribute entry %d in attr block %u, inode %llu claims already used space\n",
+				i,da_bno,ino);
+			clearit = 1;
+			break;	/* got an overlap */
+			}
+
+		if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) {
+
+			local = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);	
+			if ((INT_GET(local->namelen, ARCH_CONVERT) == 0) || 
+					(namecheck((char *)&local->nameval[0], 
+						INT_GET(local->namelen, ARCH_CONVERT)))) {
+				do_warn("attribute entry %d in attr block %u, inode %llu has bad name (namelen = %d)\n",
+					i, da_bno, ino, (int) INT_GET(local->namelen, ARCH_CONVERT));
+
+				clearit = 1;
+				break;
+				};
+
+			/* Check on the hash value. Checking ordering of hash values
+			 * is not necessary, since one wrong one clears the whole
+			 * fork. If the ordering's wrong, it's caught here or 
+ 			 * the kernel code has a bug with transaction logging
+			 * or attributes itself. For paranoia reasons, let's check
+			 * ordering anyway in case both the name value and the 
+		  	 * hashvalue were wrong but matched. Unlikely, however.
+			*/
+			if (INT_GET(entry->hashval, ARCH_CONVERT) != 
+				libxfs_da_hashname((char *)&local->nameval[0],
+					INT_GET(local->namelen, ARCH_CONVERT)) ||
+				(INT_GET(entry->hashval, ARCH_CONVERT)
+							< last_hashval)) {
+				do_warn("bad hashvalue for attribute entry %d in attr block %u, inode %llu\n",
+					i, da_bno, ino);
+				clearit = 1;
+				break;
+			}
+
+			/* Only check values for root security attributes */
+			if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) 
+				if (valuecheck((char *)&local->nameval[0], NULL,
+					    INT_GET(local->namelen, ARCH_CONVERT), INT_GET(local->valuelen, ARCH_CONVERT))) {
+					do_warn("bad security value for attribute entry %d in attr block %u, inode %llu\n",
+						i,da_bno,ino);
+					clearit = 1;
+					break;
+				};
+			thissize = XFS_ATTR_LEAF_ENTSIZE_LOCAL(
+					INT_GET(local->namelen, ARCH_CONVERT), INT_GET(local->valuelen, ARCH_CONVERT));
+
+		} else {
+			/* do the remote case */
+			remotep = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
+			thissize = XFS_ATTR_LEAF_ENTSIZE_REMOTE(
+					INT_GET(remotep->namelen, ARCH_CONVERT)); 
+
+			if ((INT_GET(remotep->namelen, ARCH_CONVERT) == 0) || 
+				   (namecheck((char *)&remotep->name[0],
+					INT_GET(remotep->namelen, ARCH_CONVERT))) ||
+				   (INT_GET(entry->hashval, ARCH_CONVERT)
+						!= libxfs_da_hashname(
+					(char *)&remotep->name[0],
+					 INT_GET(remotep->namelen, ARCH_CONVERT))) ||
+				   (INT_GET(entry->hashval, ARCH_CONVERT)
+						< last_hashval) ||
+				   (INT_GET(remotep->valueblk, ARCH_CONVERT) == 0)) {
+				do_warn("inconsistent remote attribute entry %d in attr block %u, ino %llu\n",
+					i, da_bno, ino);
+				clearit = 1;
+				break;
+			};
+
+			if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) {
+				char*	value;
+				if ((value = malloc(INT_GET(remotep->valuelen, ARCH_CONVERT)))==NULL){
+					do_warn("cannot malloc enough for remotevalue attribute for inode %llu\n",ino);
+					do_warn("SKIPPING this remote attribute\n");
+					continue;
+				}
+				if (rmtval_get(mp, ino, blkmap,
+						INT_GET(remotep->valueblk, ARCH_CONVERT),
+						INT_GET(remotep->valuelen, ARCH_CONVERT), value)) {
+					do_warn("remote attribute get failed for entry %d, inode %llu\n", i,ino);
+					clearit = 1;
+					free(value);
+					break;
+				}
+				if (valuecheck((char *)&remotep->name[0], value,
+					    INT_GET(remotep->namelen, ARCH_CONVERT), INT_GET(remotep->valuelen, ARCH_CONVERT))){
+					do_warn("remote attribute value check  failed for entry %d, inode %llu\n", i, ino);
+					clearit = 1;
+					free(value);
+					break;
+				}
+				free(value);
+			}
+		}
+
+		*current_hashval = last_hashval 
+				 = INT_GET(entry->hashval, ARCH_CONVERT);
+
+		if (set_da_freemap(mp, attr_freemap, INT_GET(entry->nameidx, ARCH_CONVERT),
+				INT_GET(entry->nameidx, ARCH_CONVERT) + thissize))  {
+			do_warn("attribute entry %d in attr block %u, inode %llu claims used space\n",
+				i, da_bno, ino);
+			clearit = 1;
+			break;	/* got an overlap */
+		}			
+		usedbs += thissize;
+		if (INT_GET(entry->nameidx, ARCH_CONVERT) < firstb) 
+			firstb = INT_GET(entry->nameidx, ARCH_CONVERT);
+
+	} /* end the loop */
+
+	if (!clearit) {
+		/* verify the header information is correct */
+
+		/* if the holes flag is set, don't reset first_used unless it's
+		 * pointing to used bytes.  we're being conservative here
+		 * since the block will get compacted anyhow by the kernel. 
+		 */
+
+		if (  (INT_GET(leaf->hdr.holes, ARCH_CONVERT) == 0
+		    && firstb != INT_GET(leaf->hdr.firstused, ARCH_CONVERT))
+		    || INT_GET(leaf->hdr.firstused, ARCH_CONVERT) > firstb)  {
+			if (!no_modify)  {
+				do_warn("- resetting first used heap value from %d to %d in block %u of attribute fork of inode %llu\n",
+					(int)INT_GET(leaf->hdr.firstused,
+						ARCH_CONVERT), firstb,
+						da_bno, ino);
+				INT_SET(leaf->hdr.firstused,
+						ARCH_CONVERT, firstb);
+				*repair = 1;
+			} else  {
+				do_warn("- would reset first used value from %d to %d in block %u of attribute fork of inode %llu\n",
+					(int)INT_GET(leaf->hdr.firstused,
+						ARCH_CONVERT), firstb,
+						da_bno, ino);
+			}
+		}
+
+		if (usedbs != INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT))  {
+			if (!no_modify)  {
+				do_warn("- resetting usedbytes cnt from %d to %d in block %u of attribute fork of inode %llu\n",
+					(int)INT_GET(leaf->hdr.usedbytes,
+					  ARCH_CONVERT), usedbs, da_bno, ino);
+				INT_SET(leaf->hdr.usedbytes,
+						ARCH_CONVERT, usedbs);
+				*repair = 1;
+			} else  {
+				do_warn("- would reset usedbytes cnt from %d to %d in block %u of attribute fork of %llu\n",
+					(int)INT_GET(leaf->hdr.usedbytes,
+					    ARCH_CONVERT), usedbs,da_bno,ino);
+			}
+		}
+
+		/* there's a lot of work in process_leaf_dir_block to go thru
+		* checking for holes and compacting if appropiate. I don't think
+		* attributes need all that, so let's just leave the holes. If
+		* we discover later that this is a good place to do compaction
+		* we can add it then. 
+		*/
+	}
+	return (clearit);  /* and repair */
+}
+
+
+/*
+ * returns 0 if the attribute fork is ok, 1 if it has to be junked.
+ */
+int
+process_leaf_attr_level(xfs_mount_t	*mp,
+			da_bt_cursor_t	*da_cursor)
+{
+	int			repair;
+	xfs_attr_leafblock_t	*leaf;
+	xfs_buf_t		*bp;
+	xfs_ino_t		ino;
+	xfs_dfsbno_t		dev_bno;
+	xfs_dablk_t		da_bno;
+	xfs_dablk_t		prev_bno;
+	xfs_dahash_t		current_hashval = 0;
+	xfs_dahash_t		greatest_hashval;
+
+	da_bno = da_cursor->level[0].bno;
+	ino = da_cursor->ino;
+	prev_bno = 0;
+
+	do {
+		repair = 0;
+		dev_bno = blkmap_get(da_cursor->blkmap, da_bno);
+		/*
+		 * 0 is the root block and no block
+		 * pointer can point to the root block of the btree
+		 */
+		ASSERT(da_bno != 0);
+
+		if (dev_bno == NULLDFSBNO) {
+			do_warn("can't map block %u for attribute fork "
+				"for inode %llu\n", da_bno, ino);
+			goto error_out; 
+		}
+
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, dev_bno),
+					XFS_FSB_TO_BB(mp, 1), 0);
+		if (!bp) {
+			do_warn("can't read file block %u (fsbno %llu) for"
+				" attribute fork of inode %llu\n",
+				da_bno, dev_bno, ino);
+			goto error_out;
+		}
+
+		leaf = (xfs_attr_leafblock_t *)XFS_BUF_PTR(bp);
+
+		/* check magic number for leaf directory btree block */
+		if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+						!= XFS_ATTR_LEAF_MAGIC) {
+			do_warn("bad attribute leaf magic %#x for inode %llu\n",
+				 leaf->hdr.info.magic, ino);
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+
+		/*
+		 * for each block, process the block, verify it's path,
+		 * then get next block.  update cursor values along the way
+		 */
+		if (process_leaf_attr_block(mp, leaf, da_bno, ino,
+				da_cursor->blkmap, current_hashval,
+				&greatest_hashval, &repair))  {
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+
+		/*
+		 * index can be set to hdr.count so match the
+		 * indexes of the interior blocks -- which at the
+		 * end of the block will point to 1 after the final
+		 * real entry in the block
+		 */
+		da_cursor->level[0].hashval = greatest_hashval;
+		da_cursor->level[0].bp = bp;
+		da_cursor->level[0].bno = da_bno;
+		da_cursor->level[0].index
+				= INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		da_cursor->level[0].dirty = repair; 
+
+		if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno)  {
+			do_warn("bad sibling back pointer for block %u in "
+				"attribute fork for inode %llu\n", da_bno, ino);
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+
+		prev_bno = da_bno;
+		da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+		if (da_bno != 0 && verify_da_path(mp, da_cursor, 0))  {
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+
+		current_hashval = greatest_hashval;
+
+		if (repair && !no_modify) {
+			libxfs_writebuf(bp, 0);
+		}
+		else {
+			libxfs_putbuf(bp);
+		}
+	} while (da_bno != 0);
+
+	if (verify_final_da_path(mp, da_cursor, 0))  {
+		/*
+		 * verify the final path up (right-hand-side) if still ok
+		 */
+		do_warn("bad hash path in attribute fork for inode %llu\n",
+			da_cursor->ino);
+		goto error_out;
+	}
+
+	/* releases all buffers holding interior btree blocks */
+	release_da_cursor(mp, da_cursor, 0);
+	return(0);
+
+error_out:
+	/* release all buffers holding interior btree blocks */
+	err_release_da_cursor(mp, da_cursor, 0);
+	return(1);
+}
+
+
+/*
+ * a node directory is a true btree  -- where the attribute fork
+ * has gotten big enough that it is represented as a non-trivial (e.g.
+ * has more than just a block) btree.
+ *
+ * Note that if we run into any problems, we will trash the attribute fork.
+ * 
+ * returns 0 if things are ok, 1 if bad
+ * Note this code has been based off process_node_dir. 
+ */
+int
+process_node_attr(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	blkmap_t	*blkmap)
+{
+	xfs_dablk_t			bno;
+	int				error = 0;
+	da_bt_cursor_t			da_cursor;
+
+	/*
+	 * try again -- traverse down left-side of tree until we hit
+	 * the left-most leaf block setting up the btree cursor along
+	 * the way.  Then walk the leaf blocks left-to-right, calling
+	 * a parent-verification routine each time we traverse a block.
+	 */
+	bzero(&da_cursor, sizeof(da_bt_cursor_t));
+	da_cursor.active = 0;
+	da_cursor.type = 0;
+	da_cursor.ino = ino;
+	da_cursor.dip = dip;
+	da_cursor.greatest_bno = 0;
+	da_cursor.blkmap = blkmap;
+
+	/*
+	 * now process interior node. don't have any buffers held in this path.
+	 */
+	error = traverse_int_dablock(mp, &da_cursor, &bno, XFS_ATTR_FORK);
+	if (error == 0) 
+		return(1);  /* 0 means unsuccessful */
+
+	/*
+	 * now pass cursor and bno into leaf-block processing routine
+	 * the leaf dir level routine checks the interior paths
+	 * up to the root including the final right-most path.
+	 */
+	
+	return (process_leaf_attr_level(mp, &da_cursor));
+}
+
+/*
+ * Start processing for a leaf or fuller btree.
+ * A leaf directory is one where the attribute fork is too big for
+ * the inode  but is small enough to fit into one btree block
+ * outside the inode. This code is modelled after process_leaf_dir_block.
+ *
+ * returns 0 if things are ok, 1 if bad (attributes needs to be junked)
+ * repair is set, if anything was changed, but attributes can live thru it
+ */
+
+int
+process_longform_attr(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	blkmap_t	*blkmap,
+	int		*repair)	/* out - 1 if something was fixed */
+{
+	xfs_attr_leafblock_t	*leaf;
+	xfs_dfsbno_t	bno;
+	xfs_buf_t	*bp;
+	xfs_dahash_t	next_hashval;
+	int		repairlinks = 0;
+
+	*repair = 0;
+
+	bno = blkmap_get(blkmap, 0);
+
+	if ( bno == NULLDFSBNO ) {
+		if (INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) == 0  &&
+		    dip->di_core.di_aformat == XFS_DINODE_FMT_EXTENTS )
+			/* it's okay the kernel can handle this state */
+			return(0);
+		else	{
+			do_warn("block 0 of inode %llu attribute fork"
+				" is missing\n", ino);
+			return(1);
+		}
+	}
+	/* FIX FOR bug 653709 -- EKN */
+	if (mp->m_sb.sb_agcount < XFS_FSB_TO_AGNO(mp, bno)) {
+		do_warn("agno of attribute fork of inode %llu out of "
+			"regular partition\n", ino);
+		return(1);
+	}
+
+	bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+	if (!bp) {
+		do_warn("can't read block 0 of inode %llu attribute fork\n",
+			ino);
+		return(1);
+	}
+
+	/* verify leaf block */
+	leaf = (xfs_attr_leafblock_t *)XFS_BUF_PTR(bp);
+
+	/* check sibling pointers in leaf block or root block 0 before
+	* we have to release the btree block
+	*/
+	if (   INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) != 0
+	    || INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != 0)  {
+		if (!no_modify)  {
+			do_warn("clearing forw/back pointers in block 0 "
+				"for attributes in inode %llu\n", ino);
+			repairlinks = 1;
+			INT_SET(leaf->hdr.info.forw, ARCH_CONVERT, 0);
+			INT_SET(leaf->hdr.info.back, ARCH_CONVERT, 0);
+		} else  {
+			do_warn("would clear forw/back pointers in block 0 "
+				"for attributes in inode %llu\n", ino);
+		}
+	}
+
+	/*
+	 * use magic number to tell us what type of attribute this is.
+	 * it's possible to have a node or leaf attribute in either an
+	 * extent format or btree format attribute fork.
+	 */
+	switch (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)) {
+	case XFS_ATTR_LEAF_MAGIC:	/* leaf-form attribute */
+		if (process_leaf_attr_block(mp, leaf, 0, ino, blkmap,
+				0, &next_hashval, repair)) {
+			/* the block is bad.  lose the attribute fork. */
+			libxfs_putbuf(bp);
+			return(1); 
+		}
+		*repair = *repair || repairlinks; 
+		break;
+
+	case XFS_DA_NODE_MAGIC:		/* btree-form attribute */
+		/* must do this now, to release block 0 before the traversal */
+		if (repairlinks) {
+			*repair = 1;
+			libxfs_writebuf(bp, 0);
+		} else 
+			libxfs_putbuf(bp);	
+		return (process_node_attr(mp, ino, dip, blkmap)); /* + repair */
+	default:
+		do_warn("bad attribute leaf magic # %#x for dir ino %llu\n", 
+			INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino);
+		libxfs_putbuf(bp);
+		return(1);
+	}
+
+	if (*repair && !no_modify) 
+		libxfs_writebuf(bp, 0);
+	else
+		libxfs_putbuf(bp);
+
+	return(0);  /* repair may be set */
+}
+
+
+/*
+ * returns 1 if attributes got cleared
+ * and 0 if things are ok. 
+ */
+int
+process_attributes(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	blkmap_t	*blkmap,
+	int		*repair)  /* returned if we did repair */
+{
+	int err;
+	xfs_dinode_core_t *dinoc;
+	/* REFERENCED */
+	xfs_attr_shortform_t *asf;
+
+	dinoc = &dip->di_core;
+	asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+
+	if (dinoc->di_aformat == XFS_DINODE_FMT_LOCAL) {
+		ASSERT(INT_GET(asf->hdr.totsize, ARCH_CONVERT) <= XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT));
+		err = process_shortform_attr(ino, dip, repair);
+	} else if (dinoc->di_aformat == XFS_DINODE_FMT_EXTENTS ||
+		   dinoc->di_aformat == XFS_DINODE_FMT_BTREE)  {
+			err = process_longform_attr(mp, ino, dip, blkmap,
+				repair);
+			/* if err, convert this to shortform and clear it */
+			/* if repair and no error, it's taken care of */
+	} else  {
+		do_warn("illegal attribute format %d, ino %llu\n",
+			dinoc->di_aformat, ino);
+		err = 1; 
+	}
+	return (err);  /* and repair */
+}
+
+/* 
+ * Validate an ACL
+ */
+static int
+acl_valid (struct acl *aclp)
+{
+	struct acl_entry *entry, *e;
+	int user = 0, group = 0, other = 0, mask = 0, mask_required = 0;
+	int i, j;
+
+	if (aclp == NULL)
+		goto acl_invalid;
+
+	if (aclp->acl_cnt > ACL_MAX_ENTRIES)
+		goto acl_invalid;
+
+	for (i = 0; i < aclp->acl_cnt; i++)
+	{
+
+		entry = &aclp->acl_entry[i];
+
+		switch (entry->ae_tag)
+		{
+			case ACL_USER_OBJ:
+				if (user++)
+					goto acl_invalid;
+				break;
+			case ACL_GROUP_OBJ:
+				if (group++)
+					goto acl_invalid;
+				break;
+			case ACL_OTHER_OBJ:
+				if (other++)
+					goto acl_invalid;
+				break;
+			case ACL_USER:
+			case ACL_GROUP:
+				for (j = i + 1; j < aclp->acl_cnt; j++)
+				{
+					e = &aclp->acl_entry[j];
+					if (e->ae_id == entry->ae_id && e->ae_tag == entry->ae_tag)
+						goto acl_invalid;
+				}
+				mask_required++;
+				break;
+			case ACL_MASK:
+				if (mask++)
+					goto acl_invalid;
+				break;
+			default:
+				goto acl_invalid;
+		}
+	}
+	if (!user || !group || !other || (mask_required && !mask))
+		goto acl_invalid;
+	else
+		return 0;
+acl_invalid:
+	errno = EINVAL;
+	return (-1);
+}
+
+/*
+ * Check a category or division set to ensure that all values are in
+ * ascending order and each division or category appears only once.
+ */
+static int
+__check_setvalue(const unsigned short *list, unsigned short count)
+{
+        unsigned short i;
+
+        for (i = 1; i < count ; i++)
+                if (list[i] <= list[i-1])
+                        return -1;
+        return 0;
+}
+
+
+/*
+ * mac_valid(lp)
+ * check the validity of a mac label
+ */
+static int
+mac_valid(mac_t lp)
+{
+	if (lp == NULL)
+		return (0);
+
+	/*
+	 * if the total category set and division set is greater than 250
+	 * report error
+	 */
+	if ((lp->ml_catcount + lp->ml_divcount) > MAC_MAX_SETS)
+		return(0);
+
+	/*
+	 * check whether the msentype value is valid, and do they have
+  	 * appropriate level, category association.
+         */
+	switch (lp->ml_msen_type) {
+		case MSEN_ADMIN_LABEL:
+		case MSEN_EQUAL_LABEL:
+		case MSEN_HIGH_LABEL:
+		case MSEN_MLD_HIGH_LABEL:
+		case MSEN_LOW_LABEL:
+		case MSEN_MLD_LOW_LABEL:
+			if (lp->ml_level != 0 || lp->ml_catcount > 0 )
+				return (0);
+			break;
+		case MSEN_TCSEC_LABEL:
+		case MSEN_MLD_LABEL:
+			if (lp->ml_catcount > 0 &&
+			    __check_setvalue(lp->ml_list,
+					     lp->ml_catcount) == -1)
+				return (0);
+			break;
+		case MSEN_UNKNOWN_LABEL:
+		default:
+			return (0);
+	}
+
+	/*
+	 * check whether the minttype value is valid, and do they have
+	 * appropriate grade, division association.
+	 */
+	switch (lp->ml_mint_type) {
+		case MINT_BIBA_LABEL:
+			if (lp->ml_divcount > 0 &&
+			    __check_setvalue(lp->ml_list + lp->ml_catcount,
+					     lp->ml_divcount) == -1)
+				return(0);
+			break;
+		case MINT_EQUAL_LABEL:
+		case MINT_HIGH_LABEL:
+		case MINT_LOW_LABEL:
+			if (lp->ml_grade != 0 || lp->ml_divcount > 0 )
+				return(0);
+			break;
+		default:
+			return(0);
+	}
+
+	return (1);
+}
diff --git a/repair/attr_repair.h b/repair/attr_repair.h
new file mode 100644
index 000000000..61d3f212a
--- /dev/null
+++ b/repair/attr_repair.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XR_ATTRREPAIR_H
+#define _XR_ATTRREPAIR_H
+
+struct blkmap;
+
+int
+process_attributes(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	struct blkmap	*blkmap,
+	int		*repair);
+
+
+#endif /* _XR_ATTRREPAIR_H */
diff --git a/repair/avl.c b/repair/avl.c
new file mode 100644
index 000000000..4d1a4ac82
--- /dev/null
+++ b/repair/avl.c
@@ -0,0 +1,1465 @@
+/**************************************************************************
+ *									  *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *									  *
+ **************************************************************************/
+
+#include <libxfs.h>
+
+#if defined(STAND_ALONE_DEBUG) || defined(AVL_USER_MODE_DEBUG)
+#define AVL_DEBUG
+#endif
+
+#include "avl.h"
+
+#define CERT	ASSERT
+
+#ifdef AVL_DEBUG
+
+static void
+avl_checknode(
+	register avltree_desc_t *tree,
+	register avlnode_t *np)
+{
+	register avlnode_t *back = np->avl_back;
+	register avlnode_t *forw = np->avl_forw;
+	register avlnode_t *nextino = np->avl_nextino;
+	register int bal = np->avl_balance;
+
+	ASSERT(bal != AVL_BALANCE || (!back && !forw) || (back && forw));
+	ASSERT(bal != AVL_FORW || forw);
+	ASSERT(bal != AVL_BACK || back);
+
+	if (forw) {
+		ASSERT(AVL_START(tree, np) < AVL_START(tree, forw));
+		ASSERT(np->avl_forw->avl_parent == np);
+		ASSERT(back || bal == AVL_FORW);
+	} else {
+		ASSERT(bal != AVL_FORW);
+		ASSERT(bal == AVL_BALANCE || back);
+		ASSERT(bal == AVL_BACK || !back);
+	}
+
+	if (back) {
+		ASSERT(AVL_START(tree, np) > AVL_START(tree, back));
+		ASSERT(np->avl_back->avl_parent == np);
+		ASSERT(forw || bal == AVL_BACK);
+	} else {
+		ASSERT(bal != AVL_BACK);
+		ASSERT(bal == AVL_BALANCE || forw);
+		ASSERT(bal == AVL_FORW || !forw);
+	}
+
+	if (nextino == NULL)
+		ASSERT(forw == NULL);
+	else
+		ASSERT(AVL_END(tree, np) <= AVL_START(tree, nextino));
+}
+
+static void
+avl_checktree(
+	register avltree_desc_t *tree,
+	register avlnode_t *root)
+{
+	register avlnode_t *nlast, *nnext, *np;
+	__psunsigned_t offset = 0;
+	__psunsigned_t end;
+
+	nlast = nnext = root;
+
+	ASSERT(!nnext || nnext->avl_parent == NULL);
+
+	while (nnext) {
+
+		avl_checknode(tree, nnext);
+		end = AVL_END(tree, nnext);
+
+		if (end <= offset) {
+			if ((np = nnext->avl_forw) && np != nlast) {
+				nlast = nnext;
+				nnext = np;
+			} else {
+				nlast = nnext;
+				nnext = nnext->avl_parent;
+			}
+			continue;
+		}
+
+		nlast = nnext;
+		if (np = nnext->avl_back) {
+			if (AVL_END(tree, np) > offset) {
+				nnext = np;
+				continue;
+			}
+		}
+
+		np = nnext;
+		nnext = nnext->avl_forw;
+		if (!nnext)
+			nnext = np->avl_parent;
+
+		offset = end;
+	}
+}
+#else	/* ! AVL_DEBUG */
+#define avl_checktree(t,x)
+#endif	/* AVL_DEBUG */
+
+
+/*
+ * Reset balance for np up through tree.
+ * ``direction'' is the way that np's balance
+ * is headed after the deletion of one of its children --
+ * e.g., deleting a avl_forw child sends avl_balance toward AVL_BACK.
+ * Called only when deleting a node from the tree.
+ */
+static void
+retreat(
+	avltree_desc_t *tree,
+	register avlnode_t *np,
+	register int direction)
+{
+	register avlnode_t **rootp = &tree->avl_root;
+	register avlnode_t *parent;
+	register avlnode_t *child;
+	register avlnode_t *tmp;
+	register int	bal;
+
+	do {
+		ASSERT(direction == AVL_BACK || direction == AVL_FORW);
+
+		if (np->avl_balance == AVL_BALANCE) {
+			np->avl_balance = direction;
+			return;
+		}
+
+		parent = np->avl_parent;
+
+		/*
+		 * If balance is being restored, no local node
+		 * reorganization is necessary, but may be at
+		 * a higher node.  Reset direction and continue.
+		 */
+		if (direction != np->avl_balance) {
+			np->avl_balance = AVL_BALANCE;
+			if (parent) {
+				if (parent->avl_forw == np)
+					direction = AVL_BACK;
+				else
+					direction = AVL_FORW;
+
+				np = parent;
+				continue;
+			}
+			return;
+		}
+
+		/*
+		 * Imbalance.  If a avl_forw node was removed, direction
+		 * (and, by reduction, np->avl_balance) is/was AVL_BACK.
+		 */
+		if (np->avl_balance == AVL_BACK) {
+
+			ASSERT(direction == AVL_BACK);
+			child = np->avl_back;
+			bal = child->avl_balance;
+
+			if (bal != AVL_FORW) /* single LL */ {
+				/*
+				 * np gets pushed down to lesser child's
+				 * avl_forw branch.
+				 *
+				 *  np->    -D 		    +B
+				 *	    / \		    / \
+				 * child-> B   deleted	   A  -D
+				 *	  / \		      /
+				 *	 A   C		     C
+				 */
+#ifdef AVL_PRINT
+				if (!(tree->avl_flags & AVLF_DUPLICITY))
+				cmn_err(CE_CONT, "!LL delete b 0x%x c 0x%x\n",
+					np, child);
+#endif
+				np->avl_back = child->avl_forw;
+				if (child->avl_forw)
+					child->avl_forw->avl_parent = np;
+				child->avl_forw = np;
+
+				if (parent) {
+					if (parent->avl_forw == np) {
+						parent->avl_forw = child;
+						direction = AVL_BACK;
+					} else {
+						ASSERT(parent->avl_back == np);
+						parent->avl_back = child;
+						direction = AVL_FORW;
+					}
+				} else {
+					ASSERT(*rootp == np);
+					*rootp = child;
+				}
+				np->avl_parent = child;
+				child->avl_parent = parent;
+
+				if (bal == AVL_BALANCE) {
+					np->avl_balance = AVL_BACK;
+					child->avl_balance = AVL_FORW;
+					return;
+				} else {
+					np->avl_balance = AVL_BALANCE;
+					child->avl_balance = AVL_BALANCE;
+					np = parent;
+					avl_checktree(tree, *rootp);
+					continue;
+				}
+			}
+
+			/* child->avl_balance == AVL_FORW  double LR rotation
+			 *
+			 * child's avl_forw node gets promoted up, along with
+			 * its avl_forw subtree
+			 *
+			 *  np->     -G 		  C
+			 *	     / \		 / \
+			 * child-> +B   H	       -B   G
+			 *	   / \   \	       /   / \
+			 *	  A  +C   deleted     A   D   H
+			 *	       \
+			 *	        D
+			 */
+#ifdef AVL_PRINT
+			if (!(tree->avl_flags & AVLF_DUPLICITY))
+			cmn_err(CE_CONT, "!LR delete b 0x%x c 0x%x t 0x%x\n",
+				np, child, child->avl_forw);
+#endif
+			tmp = child->avl_forw;
+			bal = tmp->avl_balance;
+
+			child->avl_forw = tmp->avl_back;
+			if (tmp->avl_back)
+				tmp->avl_back->avl_parent = child;
+
+			tmp->avl_back = child;
+			child->avl_parent = tmp;
+
+			np->avl_back = tmp->avl_forw;
+			if (tmp->avl_forw)
+				tmp->avl_forw->avl_parent = np;
+			tmp->avl_forw = np;
+
+			if (bal == AVL_FORW)
+				child->avl_balance = AVL_BACK;
+			else
+				child->avl_balance = AVL_BALANCE;
+
+			if (bal == AVL_BACK)
+				np->avl_balance = AVL_FORW;
+			else
+				np->avl_balance = AVL_BALANCE;
+
+			goto next;
+		}
+
+		ASSERT(np->avl_balance == AVL_FORW && direction == AVL_FORW);
+
+		child = np->avl_forw;
+		bal = child->avl_balance;
+
+		if (bal != AVL_BACK) /* single RR */ {
+			/*
+			 * np gets pushed down to greater child's
+			 * avl_back branch.
+			 *
+			 *  np->    +B 		     -D
+			 *	    / \		     / \
+			 *   deleted   D <-child   +B   E
+			 *	      / \	     \
+			 *	     C   E	      C
+			 */
+#ifdef AVL_PRINT
+			if (!(tree->avl_flags & AVLF_DUPLICITY))
+			cmn_err(CE_CONT, "!RR delete b 0x%x c 0x%x\n",
+				np, child);
+#endif
+			np->avl_forw = child->avl_back;
+			if (child->avl_back)
+				child->avl_back->avl_parent = np;
+			child->avl_back = np;
+
+			if (parent) {
+				if (parent->avl_forw == np) {
+					parent->avl_forw = child;
+					direction = AVL_BACK;
+				} else {
+					ASSERT(parent->avl_back == np);
+					parent->avl_back = child;
+					direction = AVL_FORW;
+				}
+			} else {
+				ASSERT(*rootp == np);
+				*rootp = child;
+			}
+			np->avl_parent = child;
+			child->avl_parent = parent;
+
+			if (bal == AVL_BALANCE) {
+				np->avl_balance = AVL_FORW;
+				child->avl_balance = AVL_BACK;
+				return;
+			} else {
+				np->avl_balance = AVL_BALANCE;
+				child->avl_balance = AVL_BALANCE;
+				np = parent;
+				avl_checktree(tree, *rootp);
+				continue;
+			}
+		}
+
+		/* child->avl_balance == AVL_BACK  double RL rotation */
+#ifdef AVL_PRINT
+		if (!(tree->avl_flags & AVLF_DUPLICITY))
+		cmn_err(CE_CONT, "!RL delete b 0x%x c 0x%x t 0x%x\n",
+			np, child, child->avl_back);
+#endif
+		tmp = child->avl_back;
+		bal = tmp->avl_balance;
+
+		child->avl_back = tmp->avl_forw;
+		if (tmp->avl_forw)
+			tmp->avl_forw->avl_parent = child;
+
+		tmp->avl_forw = child;
+		child->avl_parent = tmp;
+
+		np->avl_forw = tmp->avl_back;
+		if (tmp->avl_back)
+			tmp->avl_back->avl_parent = np;
+		tmp->avl_back = np;
+
+		if (bal == AVL_BACK)
+			child->avl_balance = AVL_FORW;
+		else
+			child->avl_balance = AVL_BALANCE;
+
+		if (bal == AVL_FORW)
+			np->avl_balance = AVL_BACK;
+		else
+			np->avl_balance = AVL_BALANCE;
+next:
+		np->avl_parent = tmp;
+		tmp->avl_balance = AVL_BALANCE;
+		tmp->avl_parent = parent;
+
+		if (parent) {
+			if (parent->avl_forw == np) {
+				parent->avl_forw = tmp;
+				direction = AVL_BACK;
+			} else {
+				ASSERT(parent->avl_back == np);
+				parent->avl_back = tmp;
+				direction = AVL_FORW;
+			}
+		} else {
+			ASSERT(*rootp == np);
+			*rootp = tmp;
+			return;
+		}
+
+		np = parent;
+		avl_checktree(tree, *rootp);
+	} while (np);
+}
+
+/*
+ *	Remove node from tree.
+ *	avl_delete does the local tree manipulations,
+ *	calls retreat() to rebalance tree up to its root.
+ */
+void
+avl_delete(
+	register avltree_desc_t *tree,
+	register avlnode_t *np)
+{
+	register avlnode_t *forw = np->avl_forw;
+	register avlnode_t *back = np->avl_back;
+	register avlnode_t *parent = np->avl_parent;
+	register avlnode_t *nnext;
+
+
+	if (np->avl_back) {
+		/*
+		 * a left child exits, then greatest left descendent's nextino
+		 * is pointing to np; make it point to np->nextino.
+		 */
+		nnext = np->avl_back;
+		while (nnext) {
+			if (!nnext->avl_forw)
+				break; /* can't find anything bigger */
+			nnext = nnext->avl_forw;
+		}
+	} else
+	if (np->avl_parent) {
+		/*
+		 * find nearest ancestor with lesser value. That ancestor's
+		 * nextino is pointing to np; make it point to np->nextino
+		 */
+		 nnext = np->avl_parent;
+		 while (nnext) {
+			if (AVL_END(tree, nnext) <= AVL_END(tree, np))
+				break;
+			nnext = nnext->avl_parent;
+		}
+	} else
+		nnext = NULL;
+
+	if (nnext) {
+		ASSERT(nnext->avl_nextino == np);
+		nnext->avl_nextino = np->avl_nextino;
+		/*
+		 * 	Something preceeds np; np cannot be firstino.
+		 */
+		ASSERT(tree->avl_firstino != np);
+	}
+	else {
+		/*
+		 * 	Nothing preceeding np; after deletion, np's nextino
+		 * 	is firstino of tree.
+		 */
+		ASSERT(tree->avl_firstino == np);
+		tree->avl_firstino = np->avl_nextino;
+	}
+	
+
+	/*
+	 * Degenerate cases...
+	 */
+	if (forw == NULL) {
+		forw = back;
+		goto attach;
+	}
+
+	if (back == NULL) {
+attach:
+		if (forw)
+			forw->avl_parent = parent;
+		if (parent) {
+			if (parent->avl_forw == np) {
+				parent->avl_forw = forw;
+				retreat(tree, parent, AVL_BACK);
+			} else {
+				ASSERT(parent->avl_back == np);
+				parent->avl_back = forw;
+				retreat(tree, parent, AVL_FORW);
+			}
+		} else {
+			ASSERT(tree->avl_root == np);
+			tree->avl_root = forw;
+		}
+		avl_checktree(tree, tree->avl_root);
+		return;
+	}
+
+	/*
+	 * Harder case: children on both sides.
+	 * If back's avl_forw pointer is null, just have back
+	 * inherit np's avl_forw tree, remove np from the tree
+	 * and adjust balance counters starting at back.
+	 *
+	 * np->	    xI		    xH	(befor retreat())
+	 *	    / \		    / \
+	 * back->  H   J	   G   J
+	 *	  /   / \             / \
+	 *       G   ?   ?           ?   ?
+	 *      / \
+	 *     ?   ?
+	 */
+	if ((forw = back->avl_forw) == NULL) {
+		/*
+		 * AVL_FORW retreat below will set back's
+		 * balance to AVL_BACK.
+		 */
+		back->avl_balance = np->avl_balance;
+		back->avl_forw = forw = np->avl_forw;
+		forw->avl_parent = back;
+		back->avl_parent = parent;
+		
+		if (parent) {
+			if (parent->avl_forw == np)
+				parent->avl_forw = back;
+			else {
+				ASSERT(parent->avl_back == np);
+				parent->avl_back = back;
+			}
+		} else {
+			ASSERT(tree->avl_root == np);
+			tree->avl_root = back;
+		}
+
+		/*
+		 * back is taking np's place in the tree, and
+		 * has therefore lost a avl_back node (itself).
+		 */
+		retreat(tree, back, AVL_FORW);
+		avl_checktree(tree, tree->avl_root);
+		return;
+	}
+
+	/*
+	 * Hardest case: children on both sides, and back's
+	 * avl_forw pointer isn't null.  Find the immediately
+	 * inferior buffer by following back's avl_forw line
+	 * to the end, then have it inherit np's avl_forw tree.
+	 *
+	 * np->	    xI			      xH
+	 *	    / \			      / \
+	 *         G   J	     back->  G   J   (before retreat())
+	 *	  / \			    / \
+	 *       F   ?...  		   F   ?1
+	 *      /     \
+	 *     ?       H  <-forw
+	 *	      /
+	 *	     ?1
+	 */
+	while (back = forw->avl_forw)
+		forw = back;
+
+	/*
+	 * Will be adjusted by retreat() below.
+	 */
+	forw->avl_balance = np->avl_balance;
+	
+	/*
+	 * forw inherits np's avl_forw...
+	 */
+	forw->avl_forw = np->avl_forw;
+	np->avl_forw->avl_parent = forw;
+
+	/*
+	 * ... forw's parent gets forw's avl_back...
+	 */
+	back = forw->avl_parent;
+	back->avl_forw = forw->avl_back;
+	if (forw->avl_back)
+		forw->avl_back->avl_parent = back;
+
+	/*
+	 * ... forw gets np's avl_back...
+	 */
+	forw->avl_back = np->avl_back;
+	np->avl_back->avl_parent = forw;
+
+	/*
+	 * ... and forw gets np's parent.
+	 */
+	forw->avl_parent = parent;
+
+	if (parent) {
+		if (parent->avl_forw == np)
+			parent->avl_forw = forw;
+		else
+			parent->avl_back = forw;
+	} else {
+		ASSERT(tree->avl_root == np);
+		tree->avl_root = forw;
+	}
+
+	/*
+	 * What used to be forw's parent is the starting
+	 * point for rebalancing.  It has lost a avl_forw node.
+	 */
+	retreat(tree, back, AVL_BACK);
+	avl_checktree(tree, tree->avl_root);
+}
+
+
+/*
+ * 	avl_findanyrange:
+ *	
+ *	Given range r [start, end), find any range which is contained in r.
+ *	if checklen is non-zero, then only ranges of non-zero length are
+ * 	considered in finding a match.
+ */
+avlnode_t *
+avl_findanyrange(
+	register avltree_desc_t *tree,
+	register __psunsigned_t start,
+	register __psunsigned_t end,
+	int 	checklen)
+{
+        register avlnode_t *np = tree->avl_root;
+
+	/* np = avl_findadjacent(tree, start, AVL_SUCCEED); */
+	while (np) {
+		if (start < AVL_START(tree, np)) {
+			if (np->avl_back) {
+				np = np->avl_back;
+				continue;
+			}
+			/* if we were to add node with start, would
+			 * have a growth of AVL_BACK
+			 */
+			/* if succeeding node is needed, this is it.
+			 */
+			break;
+		}
+		if (start >= AVL_END(tree, np)) {
+			if (np->avl_forw) {
+				np = np->avl_forw;
+				continue;
+			}
+			/* if we were to add node with start, would
+			 * have a growth of AVL_FORW; 
+			 */
+			/* we are looking for a succeeding node;
+			 * this is nextino.
+			 */
+			np = np->avl_nextino;
+			break;
+		}
+		/* AVL_START(tree, np) <= start < AVL_END(tree, np) */
+		break;
+	}
+	if (np) {
+		if (checklen == AVL_INCLUDE_ZEROLEN) {
+			if (end <= AVL_START(tree, np)) {
+				/* something follows start, but is
+				 * is entierly after the range (end)
+				 */
+				return(NULL);
+			}
+			/* np may stradle [start, end) */
+			return(np);
+		}
+		/*
+		 * find non-zero length region 
+		 */
+		while (np && (AVL_END(tree, np) - AVL_START(tree, np) == 0)
+			&& (AVL_START(tree, np)  < end))
+				np = np->avl_nextino;
+
+		if ((np == NULL) || (AVL_START(tree, np) >= end))
+			return NULL;
+		return(np);
+	}
+	/*
+	 * nothing succeeds start, all existing ranges are before start.
+	 */
+	return NULL;
+}
+
+
+/*
+ * Returns a pointer to range which contains value.
+ */
+avlnode_t *
+avl_findrange(
+	register avltree_desc_t *tree,
+	register __psunsigned_t value)
+{
+	register avlnode_t *np = tree->avl_root;
+
+	while (np) {
+		if (value < AVL_START(tree, np)) {
+			np = np->avl_back;
+			continue;
+		}
+		if (value >= AVL_END(tree, np)) {
+			np = np->avl_forw;
+			continue;
+		}
+		ASSERT(AVL_START(tree, np) <= value &&
+		       value < AVL_END(tree, np));
+		return np;
+	}
+	return NULL;
+}
+
+
+/*
+ * Returns a pointer to node which contains exact value.
+ */
+avlnode_t *
+avl_find(
+	register avltree_desc_t *tree,
+	register __psunsigned_t value)
+{
+	register avlnode_t *np = tree->avl_root;
+	register __psunsigned_t nvalue;
+
+	while (np) {
+		nvalue = AVL_START(tree, np);
+		if (value < nvalue) {
+			np = np->avl_back;
+			continue;
+		}
+		if (value == nvalue) {
+			return np;
+		}
+		np = np->avl_forw;
+	}
+	return NULL;
+}
+
+
+/*
+ * Balance buffer AVL tree after attaching a new node to root.
+ * Called only by avl_insert.
+ */
+static void
+avl_balance(
+	register avlnode_t **rootp,
+	register avlnode_t *np,
+	register int growth)
+{
+	/*
+	 * At this point, np points to the node to which
+	 * a new node has been attached.  All that remains is to
+	 * propagate avl_balance up the tree.
+	 */
+	for ( ; ; ) {
+		register avlnode_t *parent = np->avl_parent;
+		register avlnode_t *child;
+
+		CERT(growth == AVL_BACK || growth == AVL_FORW);
+
+		/*
+		 * If the buffer was already balanced, set avl_balance
+		 * to the new direction.  Continue if there is a
+		 * parent after setting growth to reflect np's
+		 * relation to its parent.
+		 */
+		if (np->avl_balance == AVL_BALANCE) {
+			np->avl_balance = growth;
+			if (parent) {
+				if (parent->avl_forw == np)
+					growth = AVL_FORW;
+				else {
+					ASSERT(parent->avl_back == np);
+					growth = AVL_BACK;
+				}
+
+				np = parent;
+				continue;
+			}
+			break;
+		}
+
+		if (growth != np->avl_balance) {
+			/*
+			 * Subtree is now balanced -- no net effect
+			 * in the size of the subtree, so leave.
+			 */
+			np->avl_balance = AVL_BALANCE;
+			break;
+		}
+
+		if (growth == AVL_BACK) {
+
+			child = np->avl_back;
+			CERT(np->avl_balance == AVL_BACK && child);
+
+			if (child->avl_balance == AVL_BACK) { /* single LL */
+				/*
+				 * ``A'' just got inserted;
+				 * np points to ``E'', child to ``C'',
+				 * and it is already AVL_BACK --
+				 * child will get promoted to top of subtree.
+
+				np->	     -E			C
+					     / \	       / \
+				child->	   -C   F	     -B   E
+					   / \		     /   / \
+					 -B   D		    A   D   F
+					 /
+					A
+
+					Note that child->avl_parent and
+					avl_balance get set in common code.
+				 */
+				np->avl_parent = child;
+				np->avl_balance = AVL_BALANCE;
+				np->avl_back = child->avl_forw;
+				if (child->avl_forw)
+					child->avl_forw->avl_parent = np;
+				child->avl_forw = np;
+			} else {
+				/*
+				 * double LR
+				 *
+				 * child's avl_forw node gets promoted to
+				 * the top of the subtree.
+
+				np->	     -E		      C
+					     / \	     / \
+				child->	   +B   F	   -B   E
+					   / \		   /   / \
+					  A  +C 	  A   D   F
+					       \
+						D
+
+				 */
+				register avlnode_t *tmp = child->avl_forw;
+
+				CERT(child->avl_balance == AVL_FORW && tmp);
+
+				child->avl_forw = tmp->avl_back;
+				if (tmp->avl_back)
+					tmp->avl_back->avl_parent = child;
+
+				tmp->avl_back = child;
+				child->avl_parent = tmp;
+
+				np->avl_back = tmp->avl_forw;
+				if (tmp->avl_forw)
+					tmp->avl_forw->avl_parent = np;
+
+				tmp->avl_forw = np;
+				np->avl_parent = tmp;
+
+				if (tmp->avl_balance == AVL_BACK)
+					np->avl_balance = AVL_FORW;
+				else
+					np->avl_balance = AVL_BALANCE;
+
+				if (tmp->avl_balance == AVL_FORW)
+					child->avl_balance = AVL_BACK;
+				else
+					child->avl_balance = AVL_BALANCE;
+
+				/*
+				 * Set child to point to tmp since it is
+				 * now the top of the subtree, and will
+				 * get attached to the subtree parent in
+				 * the common code below.
+				 */
+				child = tmp;
+			}
+
+		} else /* growth == AVL_BACK */ {
+
+			/*
+			 * This code is the mirror image of AVL_FORW above.
+			 */
+
+			child = np->avl_forw;
+			CERT(np->avl_balance == AVL_FORW && child);
+
+			if (child->avl_balance == AVL_FORW) { /* single RR */
+				np->avl_parent = child;
+				np->avl_balance = AVL_BALANCE;
+				np->avl_forw = child->avl_back;
+				if (child->avl_back)
+					child->avl_back->avl_parent = np;
+				child->avl_back = np;
+			} else {
+				/*
+				 * double RL
+				 */
+				register avlnode_t *tmp = child->avl_back;
+
+				ASSERT(child->avl_balance == AVL_BACK && tmp);
+
+				child->avl_back = tmp->avl_forw;
+				if (tmp->avl_forw)
+					tmp->avl_forw->avl_parent = child;
+
+				tmp->avl_forw = child;
+				child->avl_parent = tmp;
+
+				np->avl_forw = tmp->avl_back;
+				if (tmp->avl_back)
+					tmp->avl_back->avl_parent = np;
+
+				tmp->avl_back = np;
+				np->avl_parent = tmp;
+
+				if (tmp->avl_balance == AVL_FORW)
+					np->avl_balance = AVL_BACK;
+				else
+					np->avl_balance = AVL_BALANCE;
+
+				if (tmp->avl_balance == AVL_BACK)
+					child->avl_balance = AVL_FORW;
+				else
+					child->avl_balance = AVL_BALANCE;
+
+				child = tmp;
+			}
+		}
+
+		child->avl_parent = parent;
+		child->avl_balance = AVL_BALANCE;
+
+		if (parent) {
+			if (parent->avl_back == np)
+				parent->avl_back = child;
+			else
+				parent->avl_forw = child;
+		} else {
+			ASSERT(*rootp == np);
+			*rootp = child;
+		}
+
+		break;
+	}
+}
+
+static
+avlnode_t *
+avl_insert_find_growth(
+		register avltree_desc_t *tree,
+		register __psunsigned_t start, 	/* range start at start, */
+		register __psunsigned_t end, 	/* exclusive */
+		register int   *growthp) 	/* OUT */ 
+{
+	avlnode_t *root = tree->avl_root;
+	register avlnode_t *np;
+
+	np = root;
+	ASSERT(np); /* caller ensures that there is atleast one node in tree */
+
+	for ( ; ; ) {
+		CERT(np->avl_parent || root == np);
+		CERT(!np->avl_parent || root != np);
+		CERT(!(np->avl_back) || np->avl_back->avl_parent == np);
+		CERT(!(np->avl_forw) || np->avl_forw->avl_parent == np);
+		CERT(np->avl_balance != AVL_FORW || np->avl_forw);
+		CERT(np->avl_balance != AVL_BACK || np->avl_back);
+		CERT(np->avl_balance != AVL_BALANCE ||
+		     np->avl_back == NULL || np->avl_forw);
+		CERT(np->avl_balance != AVL_BALANCE ||
+		     np->avl_forw == NULL || np->avl_back);
+
+		if (AVL_START(tree, np) >= end) {
+			if (np->avl_back) {
+				np = np->avl_back;
+				continue;
+			}
+			*growthp = AVL_BACK;
+			break;
+		}
+
+		if (AVL_END(tree, np) <= start) {
+			if (np->avl_forw) {
+				np = np->avl_forw;
+				continue;
+			}
+			*growthp = AVL_FORW;
+			break;
+		}
+		/* found exact match -- let caller decide if it is an error */
+		return(NULL);
+	}
+	return(np);
+}
+
+
+static void
+avl_insert_grow(
+	register avltree_desc_t *tree,
+	register avlnode_t *parent,
+	register avlnode_t *newnode,
+	register int growth)
+{
+	register avlnode_t *nnext;
+	register __psunsigned_t start = AVL_START(tree, newnode);
+
+	if (growth == AVL_BACK) {
+
+		parent->avl_back = newnode;
+		/*
+		 * we are growing to the left; previous in-order to newnode is
+		 * closest ancestor with lesser value. Before this
+		 * insertion, this ancestor will be pointing to
+		 * newnode's parent. After insertion, next in-order to newnode
+		 * is the parent.
+		 */
+		newnode->avl_nextino = parent;
+		nnext = parent;
+		while (nnext) {
+			if (AVL_END(tree, nnext) <= start)
+				break;
+			nnext = nnext->avl_parent;
+		}
+		if (nnext)  {
+			/*
+			 * nnext will be null if newnode is
+			 * the least element, and hence very first in the list.
+			 */
+			ASSERT(nnext->avl_nextino == parent);
+			nnext->avl_nextino = newnode;
+		}
+	}
+	else {
+		parent->avl_forw = newnode;
+		newnode->avl_nextino = parent->avl_nextino;
+		parent->avl_nextino = newnode;
+	}
+}
+
+
+avlnode_t *
+avl_insert(
+	register avltree_desc_t *tree,
+	register avlnode_t *newnode)
+{
+	register avlnode_t *np;
+	register __psunsigned_t start = AVL_START(tree, newnode);
+	register __psunsigned_t end = AVL_END(tree, newnode);
+	int growth;
+
+	ASSERT(newnode);
+	ASSERT(start <= end);
+
+	/*
+	 * Clean all pointers for sanity; some will be reset as necessary.
+	 */
+	newnode->avl_nextino = NULL;
+	newnode->avl_parent = NULL;
+	newnode->avl_forw = NULL;
+	newnode->avl_back = NULL;
+	newnode->avl_balance = AVL_BALANCE;
+
+	if ((np = tree->avl_root) == NULL) { /* degenerate case... */
+		tree->avl_root = newnode;
+		tree->avl_firstino = newnode;
+		return newnode;
+	}
+
+	if ((np = avl_insert_find_growth(tree, start, end, &growth)) == NULL) {
+		if (start != end)  { /* non-zero length range */
+#ifdef	AVL_USER_MODE
+			printf(
+			"avl_insert: Warning! duplicate range [0x%x,0x%x)\n",
+				start, end);
+#else
+			/*
+			 * lockmetering tree can't afford printfs here.
+			 */
+			if (!(tree->avl_flags & AVLF_DUPLICITY))
+			cmn_err(CE_CONT,
+			"!avl_insert: Warning! duplicate range [0x%x,0x%x)\n",
+			start, end);
+#endif
+		}
+		return(NULL);
+	}
+
+	avl_insert_grow(tree, np, newnode, growth);
+	if (growth == AVL_BACK) {
+		/*
+		 * Growing to left. if np was firstino, newnode will be firstino
+		 */
+		 if (tree->avl_firstino == np)
+			tree->avl_firstino = newnode;
+	}
+#ifdef notneeded
+	else
+	if (growth == AVL_FORW)
+		/*
+		 * Cannot possibly be firstino; there is somebody to our left.
+		 */
+		 ;
+#endif
+
+	newnode->avl_parent = np;
+	CERT(np->avl_forw == newnode || np->avl_back == newnode);
+
+	avl_balance(&tree->avl_root, np, growth);
+
+	avl_checktree(tree, tree->avl_root);
+
+	return newnode;
+}
+
+/*
+ *
+ * avl_insert_immediate(tree, afterp, newnode):
+ * 	insert newnode immediately into tree immediately after afterp.
+ *	after insertion, newnode is right child of afterp.
+ */
+void
+avl_insert_immediate(
+		avltree_desc_t *tree,
+		avlnode_t *afterp,
+		avlnode_t *newnode)
+{
+	/*
+	 * Clean all pointers for sanity; some will be reset as necessary.
+	 */
+	newnode->avl_nextino = NULL;
+	newnode->avl_parent = NULL;
+	newnode->avl_forw = NULL;
+	newnode->avl_back = NULL;
+	newnode->avl_balance = AVL_BALANCE;
+
+	if (afterp == NULL) {
+		tree->avl_root = newnode;
+		tree->avl_firstino = newnode;
+		return;
+	}
+
+	ASSERT(afterp->avl_forw == NULL);
+	avl_insert_grow(tree, afterp, newnode, AVL_FORW); /* grow to right */
+	CERT(afterp->avl_forw == newnode);
+	avl_balance(&tree->avl_root, afterp, AVL_FORW);
+	avl_checktree(tree, tree->avl_root);
+}
+
+
+/*
+ *	Returns first in order node
+ */
+avlnode_t *
+avl_firstino(register avlnode_t *root)
+{
+	register avlnode_t *np;
+
+	if ((np = root) == NULL)
+		return NULL;
+
+	while (np->avl_back)
+		np = np->avl_back;
+	return np;
+}
+
+#ifdef AVL_USER_MODE
+/*
+ * leave this as a user-mode only routine until someone actually
+ * needs it in the kernel
+ */
+
+/*
+ *	Returns last in order node
+ */
+avlnode_t *
+avl_lastino(register avlnode_t *root)
+{
+	register avlnode_t *np;
+
+	if ((np = root) == NULL)
+		return NULL;
+
+	while (np->avl_forw)
+		np = np->avl_forw;
+	return np;
+}
+#endif
+
+void
+avl_init_tree(avltree_desc_t *tree, avlops_t *ops)
+{
+	tree->avl_root = NULL;
+	tree->avl_firstino = NULL;
+	tree->avl_ops = ops;
+}
+
+#ifdef AVL_DEBUG
+static void
+avl_printnode(avltree_desc_t *tree, avlnode_t *np, int nl)
+{
+	printf("[%d-%d]%c", AVL_START(tree, np),
+		(AVL_END(tree, np) - 1), nl ? '\n' : ' ');
+}
+#endif
+#ifdef STAND_ALONE_DEBUG
+
+struct avl_debug_node {
+	avlnode_t	avl_node;
+	xfs_off_t		avl_start;
+	unsigned int	avl_size;
+}
+
+avlops_t avl_debug_ops = {
+	avl_debug_start,
+	avl_debug_end,
+}
+
+static __psunsigned_t
+avl_debug_start(avlnode_t *node)
+{
+	return (__psunsigned_t)(struct avl_debug_node *)node->avl_start;
+}
+
+static __psunsigned_t
+avl_debug_end(avlnode_t *node)
+{
+	return (__psunsigned_t)
+		((struct avl_debug_node *)node->avl_start +
+		 (struct avl_debug_node *)node->avl_size);
+}
+
+avl_debug_node 	freenodes[100];
+avl_debug_node 	*freehead = &freenodes[0];
+
+static avlnode_t *
+alloc_avl_debug_node()
+{
+	freehead->avl_balance = AVL_BALANCE;
+	freehead->avl_parent = freehead->avl_forw = freehead->avl_back = NULL;
+	return(freehead++);
+}
+
+static void
+avl_print(avltree_desc_t *tree, avlnode_t *root, int depth)
+{
+	int i;
+
+	if (!root)
+		return;
+	if (root->avl_forw)
+		avl_print(tree, root->avl_forw, depth+5);
+	for (i = 0; i < depth; i++)
+		putchar((int) ' ');
+	avl_printnode(tree, root,1);
+	if (root->avl_back)
+		avl_print(tree, root->avl_back, depth+5);
+}
+
+main()
+{
+	int 		i, j;
+	avlnode_t  	*np;
+	avltree_desc_t	tree;
+	char		linebuf[256], cmd[256];
+
+	avl_init_tree(&tree, &avl_debug_ops);
+
+	for (i = 100; i > 0; i = i - 10)
+	{	
+		np = alloc__debug_avlnode();
+		ASSERT(np);
+		np->avl_start = i;
+		np->avl_size = 10;
+		avl_insert(&tree, np);
+	}
+	avl_print(&tree, tree.avl_root, 0);
+
+	for (np = tree.avl_firstino; np != NULL; np = np->avl_nextino)
+		avl_printnode(&tree, np, 0);
+	printf("\n");
+
+	while (1) {
+		printf("Command [fpdir] : ");
+		fgets(linebuf, 256, stdin);
+		if (feof(stdin)) break;
+		cmd[0] = NULL;
+		if (sscanf(linebuf, "%[fpdir]%d", cmd, &i) != 2)
+			continue;
+		switch (cmd[0]) {
+		case 'd':
+		case 'f':
+			printf("end of range ? ");
+			fgets(linebuf, 256, stdin);
+			j = atoi(linebuf);
+
+			if (i == j) j = i+1;
+			np = avl_findinrange(&tree,i,j);
+			if (np) {
+				avl_printnode(&tree, np, 1);
+				if (cmd[0] == 'd')
+					avl_delete(&tree, np);
+			} else
+				printf("Cannot find %d\n", i);
+			break;
+		case 'p':
+			avl_print(&tree, tree.avl_root, 0);
+			for (np = tree.avl_firstino;
+				np != NULL; np = np->avl_nextino)
+					avl_printnode(&tree, np, 0);
+			printf("\n");
+			break;
+		case 'i':
+			np = alloc_avlnode();
+			ASSERT(np);
+			np->avl_start = i;
+			printf("size of range ? ");
+			fgets(linebuf, 256, stdin);
+			j = atoi(linebuf);
+
+			np->avl_size = j;
+			avl_insert(&tree, np);
+			break;
+		case 'r': {
+			avlnode_t 	*b, *e, *t;
+			int		checklen;
+
+			printf("End of range ? ");
+			fgets(linebuf, 256, stdin);
+			j = atoi(linebuf);
+
+			printf("checklen 0/1 ? ");
+			fgets(linebuf, 256, stdin);
+			checklen = atoi(linebuf);
+
+
+			b = avl_findanyrange(&tree, i, j, checklen);
+			if (b) {
+				printf("Found something\n");
+				t = b;
+				while (t)  {
+					if (t != b &&
+					    AVL_START(&tree, t) >= j)
+						break;
+					avl_printnode(&tree, t, 0);
+					t = t->avl_nextino;
+				}
+				printf("\n");
+			}
+		     }
+		}
+	}
+}
+#endif
+
+/*
+ * 	Given a tree, find value; will find return range enclosing value,
+ *	or range immediately succeeding value,
+ * 	or range immediately preceeding value.
+ */
+avlnode_t *
+avl_findadjacent(
+	register avltree_desc_t *tree,
+	register __psunsigned_t value,
+	register int		dir)
+{
+        register avlnode_t *np = tree->avl_root;
+
+	while (np) {
+		if (value < AVL_START(tree, np)) {
+			if (np->avl_back) {
+				np = np->avl_back;
+				continue;
+			}
+			/* if we were to add node with value, would
+			 * have a growth of AVL_BACK
+			 */
+			if (dir == AVL_SUCCEED) {
+				/* if succeeding node is needed, this is it.
+				 */
+				return(np);
+			}
+			if (dir == AVL_PRECEED) {
+				/*
+				 * find nearest ancestor with lesser value.
+				 */
+				 np = np->avl_parent;
+				 while (np) {
+					if (AVL_END(tree, np) <= value)
+						break;
+					np = np->avl_parent;
+				}
+				return(np);
+			}
+			ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED);
+			break;
+		}
+		if (value >= AVL_END(tree, np)) {
+			if (np->avl_forw) {
+				np = np->avl_forw;
+				continue;
+			}
+			/* if we were to add node with value, would
+			 * have a growth of AVL_FORW; 
+			 */
+			if (dir == AVL_SUCCEED) {
+				/* we are looking for a succeeding node;
+				 * this is nextino.
+				 */
+				return(np->avl_nextino);
+			}
+			if (dir == AVL_PRECEED) {
+				/* looking for a preceeding node; this is it. */
+				return(np);
+			}	
+			ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED);
+		}
+		/* AVL_START(tree, np) <= value < AVL_END(tree, np) */
+		return(np);
+	}
+	return NULL;
+}
+
+
+#ifdef AVL_FUTURE_ENHANCEMENTS
+/*
+ *  	avl_findranges:
+ *
+ *	Given range r [start, end), find all ranges in tree which are contained
+ *	in r. At return, startp and endp point to first and last of
+ * 	a chain of elements which describe the contained ranges. Elements
+ *	in startp ... endp are in sort order, and can be accessed by
+ *	using avl_nextino.
+ */
+
+void
+avl_findranges(
+	register avltree_desc_t *tree,
+	register __psunsigned_t start,
+	register __psunsigned_t end,
+	avlnode_t 	        **startp,
+	avlnode_t		**endp)
+{
+        register avlnode_t *np;
+
+	np = avl_findadjacent(tree, start, AVL_SUCCEED);
+	if (np == NULL 				/* nothing succeding start */
+		|| (np && (end <= AVL_START(tree, np))))
+						/* something follows start,
+						but... is entirely after end */
+	{
+		*startp = NULL;
+		*endp = NULL;
+		return;
+	}
+
+	*startp = np;
+
+	/* see if end is in this region itself */
+	if (end <= AVL_END(tree, np) ||
+	    np->avl_nextino == NULL ||
+	    (np->avl_nextino &&
+	    (end <= AVL_START(tree, np->avl_nextino)))) {
+		*endp = np;
+		return;
+	}
+	/* have to munge for end */
+	/*
+	 * note: have to look for (end - 1), since
+	 * findadjacent will look for exact value, and does not
+	 * care about the fact that end is actually one more
+	 * than the value actually being looked for; thus feed it one less.
+	 */
+	*endp = avl_findadjacent(tree, (end-1), AVL_PRECEED);
+	ASSERT(*endp);
+}
+
+#endif /* AVL_FUTURE_ENHANCEMENTS */
diff --git a/repair/avl.h b/repair/avl.h
new file mode 100644
index 000000000..a6d53f5b9
--- /dev/null
+++ b/repair/avl.h
@@ -0,0 +1,143 @@
+/**************************************************************************
+ *									  *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *									  *
+ **************************************************************************/
+#ifndef __SYS_AVL_H__
+#define __SYS_AVL_H__
+
+
+typedef struct	avlnode {
+	struct 	avlnode	*avl_forw;	/* pointer to right child  (> parent) */
+	struct 	avlnode *avl_back;	/* pointer to left child  (< parent) */
+	struct	avlnode *avl_parent;	/* parent pointer */
+	struct	avlnode *avl_nextino;	/* next in-order; NULL terminated list*/
+	char		 avl_balance;	/* tree balance */
+} avlnode_t;
+
+/*
+ * avl-tree operations
+ */
+typedef struct avlops {
+	__psunsigned_t	(*avl_start)(avlnode_t *);
+	__psunsigned_t	(*avl_end)(avlnode_t *);
+} avlops_t;
+
+#define	AVL_START(tree, n)	(*(tree)->avl_ops->avl_start)(n)
+#define	AVL_END(tree, n)	(*(tree)->avl_ops->avl_end)(n)
+
+/* 
+ * tree descriptor:
+ *	root points to the root of the tree.
+ *	firstino points to the first in the ordered list.
+ */
+typedef struct avltree_desc {
+	avlnode_t	*avl_root;
+	avlnode_t	*avl_firstino;
+	avlops_t	*avl_ops;
+	short		 avl_flags;
+} avltree_desc_t;
+
+/* possible values for avl_balance */
+
+#define AVL_BACK	1
+#define AVL_BALANCE	0
+#define AVL_FORW	2
+
+/* possible values for avl_flags */
+
+#define AVLF_DUPLICITY	0x0001		/* no warnings on insert dups */
+
+/*
+ * 'Exported' avl tree routines
+ */
+avlnode_t
+*avl_insert(
+	avltree_desc_t *tree,
+	avlnode_t *newnode);
+
+void
+avl_delete(
+	avltree_desc_t *tree,
+	avlnode_t *np);
+
+void
+avl_insert_immediate(
+	avltree_desc_t *tree,
+	avlnode_t *afterp,
+	avlnode_t *newnode);
+	
+void
+avl_init_tree(
+	avltree_desc_t  *tree,
+	avlops_t *ops);
+
+avlnode_t *
+avl_findrange(
+	avltree_desc_t *tree,
+	__psunsigned_t value);
+
+avlnode_t *
+avl_find(
+	avltree_desc_t *tree,
+	__psunsigned_t value);
+
+avlnode_t *
+avl_findanyrange(
+	avltree_desc_t *tree,
+	__psunsigned_t start,
+	__psunsigned_t end,
+	int     checklen);
+
+
+avlnode_t *
+avl_findadjacent(
+	avltree_desc_t *tree,
+	__psunsigned_t value,
+	int		dir);
+
+#ifdef AVL_FUTURE_ENHANCEMENTS
+void
+avl_findranges(
+	register avltree_desc_t *tree,
+	register __psunsigned_t start,
+	register __psunsigned_t end,
+	avlnode_t 	        **startp,
+	avlnode_t		**endp);
+#endif
+
+#define AVL_PRECEED	0x1
+#define AVL_SUCCEED	0x2
+
+#define AVL_INCLUDE_ZEROLEN	0x0000
+#define AVL_EXCLUDE_ZEROLEN	0x0001
+
+#endif /* __SYS_AVL_H__ */
diff --git a/repair/avl64.c b/repair/avl64.c
new file mode 100644
index 000000000..091bc8180
--- /dev/null
+++ b/repair/avl64.c
@@ -0,0 +1,1458 @@
+/**************************************************************************
+ *									  *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *									  *
+ **************************************************************************/
+
+/* to allow use by user-level utilities */
+
+#ifdef STAND_ALONE_DEBUG
+#define AVL_USER_MODE
+#endif
+
+#if defined(STAND_ALONE_DEBUG) || defined(AVL_USER_MODE_DEBUG)
+#define AVL_DEBUG
+#endif
+
+#include <stdio.h>
+#include <libxfs.h>
+#include "avl64.h"
+
+#define CERT	ASSERT
+
+#ifdef AVL_DEBUG
+
+static void
+avl64_checknode(
+	register avl64tree_desc_t *tree,
+	register avl64node_t *np)
+{
+	register avl64node_t *back = np->avl_back;
+	register avl64node_t *forw = np->avl_forw;
+	register avl64node_t *nextino = np->avl_nextino;
+	register int bal = np->avl_balance;
+
+	ASSERT(bal != AVL_BALANCE || (!back && !forw) || (back && forw));
+	ASSERT(bal != AVL_FORW || forw);
+	ASSERT(bal != AVL_BACK || back);
+
+	if (forw) {
+		ASSERT(AVL_START(tree, np) < AVL_START(tree, forw));
+		ASSERT(np->avl_forw->avl_parent == np);
+		ASSERT(back || bal == AVL_FORW);
+	} else {
+		ASSERT(bal != AVL_FORW);
+		ASSERT(bal == AVL_BALANCE || back);
+		ASSERT(bal == AVL_BACK || !back);
+	}
+
+	if (back) {
+		ASSERT(AVL_START(tree, np) > AVL_START(tree, back));
+		ASSERT(np->avl_back->avl_parent == np);
+		ASSERT(forw || bal == AVL_BACK);
+	} else {
+		ASSERT(bal != AVL_BACK);
+		ASSERT(bal == AVL_BALANCE || forw);
+		ASSERT(bal == AVL_FORW || !forw);
+	}
+
+	if (nextino == NULL)
+		ASSERT(forw == NULL);
+	else
+		ASSERT(AVL_END(tree, np) <= AVL_START(tree, nextino));
+}
+
+static void
+avl64_checktree(
+	register avl64tree_desc_t *tree,
+	register avl64node_t *root)
+{
+	register avl64node_t *nlast, *nnext, *np;
+	__uint64_t offset = 0;
+	__uint64_t end;
+
+	nlast = nnext = root;
+
+	ASSERT(!nnext || nnext->avl_parent == NULL);
+
+	while (nnext) {
+
+		avl64_checknode(tree, nnext);
+		end = AVL_END(tree, nnext);
+
+		if (end <= offset) {
+			if ((np = nnext->avl_forw) && np != nlast) {
+				nlast = nnext;
+				nnext = np;
+			} else {
+				nlast = nnext;
+				nnext = nnext->avl_parent;
+			}
+			continue;
+		}
+
+		nlast = nnext;
+		if (np = nnext->avl_back) {
+			if (AVL_END(tree, np) > offset) {
+				nnext = np;
+				continue;
+			}
+		}
+
+		np = nnext;
+		nnext = nnext->avl_forw;
+		if (!nnext)
+			nnext = np->avl_parent;
+
+		offset = end;
+	}
+}
+#else	/* ! AVL_DEBUG */
+#define avl64_checktree(t,x)
+#endif	/* AVL_DEBUG */
+
+
+/*
+ * Reset balance for np up through tree.
+ * ``direction'' is the way that np's balance
+ * is headed after the deletion of one of its children --
+ * e.g., deleting a avl_forw child sends avl_balance toward AVL_BACK.
+ * Called only when deleting a node from the tree.
+ */
+static void
+retreat(
+	avl64tree_desc_t *tree,
+	register avl64node_t *np,
+	register int direction)
+{
+	register avl64node_t **rootp = &tree->avl_root;
+	register avl64node_t *parent;
+	register avl64node_t *child;
+	register avl64node_t *tmp;
+	register int	bal;
+
+	do {
+		ASSERT(direction == AVL_BACK || direction == AVL_FORW);
+
+		if (np->avl_balance == AVL_BALANCE) {
+			np->avl_balance = direction;
+			return;
+		}
+
+		parent = np->avl_parent;
+
+		/*
+		 * If balance is being restored, no local node
+		 * reorganization is necessary, but may be at
+		 * a higher node.  Reset direction and continue.
+		 */
+		if (direction != np->avl_balance) {
+			np->avl_balance = AVL_BALANCE;
+			if (parent) {
+				if (parent->avl_forw == np)
+					direction = AVL_BACK;
+				else
+					direction = AVL_FORW;
+
+				np = parent;
+				continue;
+			}
+			return;
+		}
+
+		/*
+		 * Imbalance.  If a avl_forw node was removed, direction
+		 * (and, by reduction, np->avl_balance) is/was AVL_BACK.
+		 */
+		if (np->avl_balance == AVL_BACK) {
+
+			ASSERT(direction == AVL_BACK);
+			child = np->avl_back;
+			bal = child->avl_balance;
+
+			if (bal != AVL_FORW) /* single LL */ {
+				/*
+				 * np gets pushed down to lesser child's
+				 * avl_forw branch.
+				 *
+				 *  np->    -D 		    +B
+				 *	    / \		    / \
+				 * child-> B   deleted	   A  -D
+				 *	  / \		      /
+				 *	 A   C		     C
+				cmn_err(CE_CONT, "!LL delete b 0x%x c 0x%x\n",
+					np, child);
+				 */
+
+				np->avl_back = child->avl_forw;
+				if (child->avl_forw)
+					child->avl_forw->avl_parent = np;
+				child->avl_forw = np;
+
+				if (parent) {
+					if (parent->avl_forw == np) {
+						parent->avl_forw = child;
+						direction = AVL_BACK;
+					} else {
+						ASSERT(parent->avl_back == np);
+						parent->avl_back = child;
+						direction = AVL_FORW;
+					}
+				} else {
+					ASSERT(*rootp == np);
+					*rootp = child;
+				}
+				np->avl_parent = child;
+				child->avl_parent = parent;
+
+				if (bal == AVL_BALANCE) {
+					np->avl_balance = AVL_BACK;
+					child->avl_balance = AVL_FORW;
+					return;
+				} else {
+					np->avl_balance = AVL_BALANCE;
+					child->avl_balance = AVL_BALANCE;
+					np = parent;
+					avl64_checktree(tree, *rootp);
+					continue;
+				}
+			}
+
+			/* child->avl_balance == AVL_FORW  double LR rotation
+			 *
+			 * child's avl_forw node gets promoted up, along with
+			 * its avl_forw subtree
+			 *
+			 *  np->     -G 		  C
+			 *	     / \		 / \
+			 * child-> +B   H	       -B   G
+			 *	   / \   \	       /   / \
+			 *	  A  +C   deleted     A   D   H
+			 *	       \
+			 *	        D
+			cmn_err(CE_CONT, "!LR delete b 0x%x c 0x%x t 0x%x\n",
+				np, child, child->avl_forw);
+			 */
+
+			tmp = child->avl_forw;
+			bal = tmp->avl_balance;
+
+			child->avl_forw = tmp->avl_back;
+			if (tmp->avl_back)
+				tmp->avl_back->avl_parent = child;
+
+			tmp->avl_back = child;
+			child->avl_parent = tmp;
+
+			np->avl_back = tmp->avl_forw;
+			if (tmp->avl_forw)
+				tmp->avl_forw->avl_parent = np;
+			tmp->avl_forw = np;
+
+			if (bal == AVL_FORW)
+				child->avl_balance = AVL_BACK;
+			else
+				child->avl_balance = AVL_BALANCE;
+
+			if (bal == AVL_BACK)
+				np->avl_balance = AVL_FORW;
+			else
+				np->avl_balance = AVL_BALANCE;
+
+			goto next;
+		}
+
+		ASSERT(np->avl_balance == AVL_FORW && direction == AVL_FORW);
+
+		child = np->avl_forw;
+		bal = child->avl_balance;
+
+		if (bal != AVL_BACK) /* single RR */ {
+			/*
+			 * np gets pushed down to greater child's
+			 * avl_back branch.
+			 *
+			 *  np->    +B 		     -D
+			 *	    / \		     / \
+			 *   deleted   D <-child   +B   E
+			 *	      / \	     \
+			 *	     C   E	      C
+			cmn_err(CE_CONT, "!RR delete b 0x%x c 0x%x\n",
+				np, child);
+			 */
+
+			np->avl_forw = child->avl_back;
+			if (child->avl_back)
+				child->avl_back->avl_parent = np;
+			child->avl_back = np;
+
+			if (parent) {
+				if (parent->avl_forw == np) {
+					parent->avl_forw = child;
+					direction = AVL_BACK;
+				} else {
+					ASSERT(parent->avl_back == np);
+					parent->avl_back = child;
+					direction = AVL_FORW;
+				}
+			} else {
+				ASSERT(*rootp == np);
+				*rootp = child;
+			}
+			np->avl_parent = child;
+			child->avl_parent = parent;
+
+			if (bal == AVL_BALANCE) {
+				np->avl_balance = AVL_FORW;
+				child->avl_balance = AVL_BACK;
+				return;
+			} else {
+				np->avl_balance = AVL_BALANCE;
+				child->avl_balance = AVL_BALANCE;
+				np = parent;
+				avl64_checktree(tree, *rootp);
+				continue;
+			}
+		}
+
+		/* child->avl_balance == AVL_BACK  double RL rotation
+		cmn_err(CE_CONT, "!RL delete b 0x%x c 0x%x t 0x%x\n",
+			np, child, child->avl_back);
+		*/
+
+		tmp = child->avl_back;
+		bal = tmp->avl_balance;
+
+		child->avl_back = tmp->avl_forw;
+		if (tmp->avl_forw)
+			tmp->avl_forw->avl_parent = child;
+
+		tmp->avl_forw = child;
+		child->avl_parent = tmp;
+
+		np->avl_forw = tmp->avl_back;
+		if (tmp->avl_back)
+			tmp->avl_back->avl_parent = np;
+		tmp->avl_back = np;
+
+		if (bal == AVL_BACK)
+			child->avl_balance = AVL_FORW;
+		else
+			child->avl_balance = AVL_BALANCE;
+
+		if (bal == AVL_FORW)
+			np->avl_balance = AVL_BACK;
+		else
+			np->avl_balance = AVL_BALANCE;
+next:
+		np->avl_parent = tmp;
+		tmp->avl_balance = AVL_BALANCE;
+		tmp->avl_parent = parent;
+
+		if (parent) {
+			if (parent->avl_forw == np) {
+				parent->avl_forw = tmp;
+				direction = AVL_BACK;
+			} else {
+				ASSERT(parent->avl_back == np);
+				parent->avl_back = tmp;
+				direction = AVL_FORW;
+			}
+		} else {
+			ASSERT(*rootp == np);
+			*rootp = tmp;
+			return;
+		}
+
+		np = parent;
+		avl64_checktree(tree, *rootp);
+	} while (np);
+}
+
+/*
+ *	Remove node from tree.
+ *	avl_delete does the local tree manipulations,
+ *	calls retreat() to rebalance tree up to its root.
+ */
+void
+avl64_delete(
+	register avl64tree_desc_t *tree,
+	register avl64node_t *np)
+{
+	register avl64node_t *forw = np->avl_forw;
+	register avl64node_t *back = np->avl_back;
+	register avl64node_t *parent = np->avl_parent;
+	register avl64node_t *nnext;
+
+
+	if (np->avl_back) {
+		/*
+		 * a left child exits, then greatest left descendent's nextino
+		 * is pointing to np; make it point to np->nextino.
+		 */
+		nnext = np->avl_back;
+		while (nnext) {
+			if (!nnext->avl_forw)
+				break; /* can't find anything bigger */
+			nnext = nnext->avl_forw;
+		}
+	} else
+	if (np->avl_parent) {
+		/*
+		 * find nearest ancestor with lesser value. That ancestor's
+		 * nextino is pointing to np; make it point to np->nextino
+		 */
+		 nnext = np->avl_parent;
+		 while (nnext) {
+			if (AVL_END(tree, nnext) <= AVL_END(tree, np))
+				break;
+			nnext = nnext->avl_parent;
+		}
+	} else
+		nnext = NULL;
+
+	if (nnext) {
+		ASSERT(nnext->avl_nextino == np);
+		nnext->avl_nextino = np->avl_nextino;
+		/*
+		 * 	Something preceeds np; np cannot be firstino.
+		 */
+		ASSERT(tree->avl_firstino != np);
+	}
+	else {
+		/*
+		 * 	Nothing preceeding np; after deletion, np's nextino
+		 * 	is firstino of tree.
+		 */
+		ASSERT(tree->avl_firstino == np);
+		tree->avl_firstino = np->avl_nextino;
+	}
+	
+
+	/*
+	 * Degenerate cases...
+	 */
+	if (forw == NULL) {
+		forw = back;
+		goto attach;
+	}
+
+	if (back == NULL) {
+attach:
+		if (forw)
+			forw->avl_parent = parent;
+		if (parent) {
+			if (parent->avl_forw == np) {
+				parent->avl_forw = forw;
+				retreat(tree, parent, AVL_BACK);
+			} else {
+				ASSERT(parent->avl_back == np);
+				parent->avl_back = forw;
+				retreat(tree, parent, AVL_FORW);
+			}
+		} else {
+			ASSERT(tree->avl_root == np);
+			tree->avl_root = forw;
+		}
+		avl64_checktree(tree, tree->avl_root);
+		return;
+	}
+
+	/*
+	 * Harder case: children on both sides.
+	 * If back's avl_forw pointer is null, just have back
+	 * inherit np's avl_forw tree, remove np from the tree
+	 * and adjust balance counters starting at back.
+	 *
+	 * np->	    xI		    xH	(befor retreat())
+	 *	    / \		    / \
+	 * back->  H   J	   G   J
+	 *	  /   / \             / \
+	 *       G   ?   ?           ?   ?
+	 *      / \
+	 *     ?   ?
+	 */
+	if ((forw = back->avl_forw) == NULL) {
+		/*
+		 * AVL_FORW retreat below will set back's
+		 * balance to AVL_BACK.
+		 */
+		back->avl_balance = np->avl_balance;
+		back->avl_forw = forw = np->avl_forw;
+		forw->avl_parent = back;
+		back->avl_parent = parent;
+		
+		if (parent) {
+			if (parent->avl_forw == np)
+				parent->avl_forw = back;
+			else {
+				ASSERT(parent->avl_back == np);
+				parent->avl_back = back;
+			}
+		} else {
+			ASSERT(tree->avl_root == np);
+			tree->avl_root = back;
+		}
+
+		/*
+		 * back is taking np's place in the tree, and
+		 * has therefore lost a avl_back node (itself).
+		 */
+		retreat(tree, back, AVL_FORW);
+		avl64_checktree(tree, tree->avl_root);
+		return;
+	}
+
+	/*
+	 * Hardest case: children on both sides, and back's
+	 * avl_forw pointer isn't null.  Find the immediately
+	 * inferior buffer by following back's avl_forw line
+	 * to the end, then have it inherit np's avl_forw tree.
+	 *
+	 * np->	    xI			      xH
+	 *	    / \			      / \
+	 *         G   J	     back->  G   J   (before retreat())
+	 *	  / \			    / \
+	 *       F   ?...  		   F   ?1
+	 *      /     \
+	 *     ?       H  <-forw
+	 *	      /
+	 *	     ?1
+	 */
+	while (back = forw->avl_forw)
+		forw = back;
+
+	/*
+	 * Will be adjusted by retreat() below.
+	 */
+	forw->avl_balance = np->avl_balance;
+	
+	/*
+	 * forw inherits np's avl_forw...
+	 */
+	forw->avl_forw = np->avl_forw;
+	np->avl_forw->avl_parent = forw;
+
+	/*
+	 * ... forw's parent gets forw's avl_back...
+	 */
+	back = forw->avl_parent;
+	back->avl_forw = forw->avl_back;
+	if (forw->avl_back)
+		forw->avl_back->avl_parent = back;
+
+	/*
+	 * ... forw gets np's avl_back...
+	 */
+	forw->avl_back = np->avl_back;
+	np->avl_back->avl_parent = forw;
+
+	/*
+	 * ... and forw gets np's parent.
+	 */
+	forw->avl_parent = parent;
+
+	if (parent) {
+		if (parent->avl_forw == np)
+			parent->avl_forw = forw;
+		else
+			parent->avl_back = forw;
+	} else {
+		ASSERT(tree->avl_root == np);
+		tree->avl_root = forw;
+	}
+
+	/*
+	 * What used to be forw's parent is the starting
+	 * point for rebalancing.  It has lost a avl_forw node.
+	 */
+	retreat(tree, back, AVL_BACK);
+	avl64_checktree(tree, tree->avl_root);
+}
+
+
+/*
+ * 	avl_findanyrange:
+ *	
+ *	Given range r [start, end), find any range which is contained in r.
+ *	if checklen is non-zero, then only ranges of non-zero length are
+ * 	considered in finding a match.
+ */
+avl64node_t *
+avl64_findanyrange(
+	register avl64tree_desc_t *tree,
+	register __uint64_t start,
+	register __uint64_t end,
+	int 	checklen)
+{
+        register avl64node_t *np = tree->avl_root;
+
+	/* np = avl64_findadjacent(tree, start, AVL_SUCCEED); */
+	while (np) {
+		if (start < AVL_START(tree, np)) {
+			if (np->avl_back) {
+				np = np->avl_back;
+				continue;
+			}
+			/* if we were to add node with start, would
+			 * have a growth of AVL_BACK
+			 */
+			/* if succeeding node is needed, this is it.
+			 */
+			break;
+		}
+		if (start >= AVL_END(tree, np)) {
+			if (np->avl_forw) {
+				np = np->avl_forw;
+				continue;
+			}
+			/* if we were to add node with start, would
+			 * have a growth of AVL_FORW; 
+			 */
+			/* we are looking for a succeeding node;
+			 * this is nextino.
+			 */
+			np = np->avl_nextino;
+			break;
+		}
+		/* AVL_START(tree, np) <= start < AVL_END(tree, np) */
+		break;
+	}
+	if (np) {
+		if (checklen == AVL_INCLUDE_ZEROLEN) {
+			if (end <= AVL_START(tree, np)) {
+				/* something follows start, but is
+				 * is entierly after the range (end)
+				 */
+				return(NULL);
+			}
+			/* np may stradle [start, end) */
+			return(np);
+		}
+		/*
+		 * find non-zero length region 
+		 */
+		while (np && (AVL_END(tree, np) - AVL_START(tree, np) == 0)
+			&& (AVL_START(tree, np)  < end))
+				np = np->avl_nextino;
+
+		if ((np == NULL) || (AVL_START(tree, np) >= end))
+			return NULL;
+		return(np);
+	}
+	/*
+	 * nothing succeeds start, all existing ranges are before start.
+	 */
+	return NULL;
+}
+
+
+/*
+ * Returns a pointer to range which contains value.
+ */
+avl64node_t *
+avl64_findrange(
+	register avl64tree_desc_t *tree,
+	register __uint64_t value)
+{
+	register avl64node_t *np = tree->avl_root;
+
+	while (np) {
+		if (value < AVL_START(tree, np)) {
+			np = np->avl_back;
+			continue;
+		}
+		if (value >= AVL_END(tree, np)) {
+			np = np->avl_forw;
+			continue;
+		}
+		ASSERT(AVL_START(tree, np) <= value &&
+		       value < AVL_END(tree, np));
+		return np;
+	}
+	return NULL;
+}
+
+
+/*
+ * Returns a pointer to node which contains exact value.
+ */
+avl64node_t *
+avl64_find(
+	register avl64tree_desc_t *tree,
+	register __uint64_t value)
+{
+	register avl64node_t *np = tree->avl_root;
+	register __uint64_t nvalue;
+
+	while (np) {
+		nvalue = AVL_START(tree, np);
+		if (value < nvalue) {
+			np = np->avl_back;
+			continue;
+		}
+		if (value == nvalue) {
+			return np;
+		}
+		np = np->avl_forw;
+	}
+	return NULL;
+}
+
+
+/*
+ * Balance buffer AVL tree after attaching a new node to root.
+ * Called only by avl_insert.
+ */
+static void
+avl64_balance(
+	register avl64node_t **rootp,
+	register avl64node_t *np,
+	register int growth)
+{
+	/*
+	 * At this point, np points to the node to which
+	 * a new node has been attached.  All that remains is to
+	 * propagate avl_balance up the tree.
+	 */
+	for ( ; ; ) {
+		register avl64node_t *parent = np->avl_parent;
+		register avl64node_t *child;
+
+		CERT(growth == AVL_BACK || growth == AVL_FORW);
+
+		/*
+		 * If the buffer was already balanced, set avl_balance
+		 * to the new direction.  Continue if there is a
+		 * parent after setting growth to reflect np's
+		 * relation to its parent.
+		 */
+		if (np->avl_balance == AVL_BALANCE) {
+			np->avl_balance = growth;
+			if (parent) {
+				if (parent->avl_forw == np)
+					growth = AVL_FORW;
+				else {
+					ASSERT(parent->avl_back == np);
+					growth = AVL_BACK;
+				}
+
+				np = parent;
+				continue;
+			}
+			break;
+		}
+
+		if (growth != np->avl_balance) {
+			/*
+			 * Subtree is now balanced -- no net effect
+			 * in the size of the subtree, so leave.
+			 */
+			np->avl_balance = AVL_BALANCE;
+			break;
+		}
+
+		if (growth == AVL_BACK) {
+
+			child = np->avl_back;
+			CERT(np->avl_balance == AVL_BACK && child);
+
+			if (child->avl_balance == AVL_BACK) { /* single LL */
+				/*
+				 * ``A'' just got inserted;
+				 * np points to ``E'', child to ``C'',
+				 * and it is already AVL_BACK --
+				 * child will get promoted to top of subtree.
+
+				np->	     -E			C
+					     / \	       / \
+				child->	   -C   F	     -B   E
+					   / \		     /   / \
+					 -B   D		    A   D   F
+					 /
+					A
+
+					Note that child->avl_parent and
+					avl_balance get set in common code.
+				 */
+				np->avl_parent = child;
+				np->avl_balance = AVL_BALANCE;
+				np->avl_back = child->avl_forw;
+				if (child->avl_forw)
+					child->avl_forw->avl_parent = np;
+				child->avl_forw = np;
+			} else {
+				/*
+				 * double LR
+				 *
+				 * child's avl_forw node gets promoted to
+				 * the top of the subtree.
+
+				np->	     -E		      C
+					     / \	     / \
+				child->	   +B   F	   -B   E
+					   / \		   /   / \
+					  A  +C 	  A   D   F
+					       \
+						D
+
+				 */
+				register avl64node_t *tmp = child->avl_forw;
+
+				CERT(child->avl_balance == AVL_FORW && tmp);
+
+				child->avl_forw = tmp->avl_back;
+				if (tmp->avl_back)
+					tmp->avl_back->avl_parent = child;
+
+				tmp->avl_back = child;
+				child->avl_parent = tmp;
+
+				np->avl_back = tmp->avl_forw;
+				if (tmp->avl_forw)
+					tmp->avl_forw->avl_parent = np;
+
+				tmp->avl_forw = np;
+				np->avl_parent = tmp;
+
+				if (tmp->avl_balance == AVL_BACK)
+					np->avl_balance = AVL_FORW;
+				else
+					np->avl_balance = AVL_BALANCE;
+
+				if (tmp->avl_balance == AVL_FORW)
+					child->avl_balance = AVL_BACK;
+				else
+					child->avl_balance = AVL_BALANCE;
+
+				/*
+				 * Set child to point to tmp since it is
+				 * now the top of the subtree, and will
+				 * get attached to the subtree parent in
+				 * the common code below.
+				 */
+				child = tmp;
+			}
+
+		} else /* growth == AVL_BACK */ {
+
+			/*
+			 * This code is the mirror image of AVL_FORW above.
+			 */
+
+			child = np->avl_forw;
+			CERT(np->avl_balance == AVL_FORW && child);
+
+			if (child->avl_balance == AVL_FORW) { /* single RR */
+				np->avl_parent = child;
+				np->avl_balance = AVL_BALANCE;
+				np->avl_forw = child->avl_back;
+				if (child->avl_back)
+					child->avl_back->avl_parent = np;
+				child->avl_back = np;
+			} else {
+				/*
+				 * double RL
+				 */
+				register avl64node_t *tmp = child->avl_back;
+
+				ASSERT(child->avl_balance == AVL_BACK && tmp);
+
+				child->avl_back = tmp->avl_forw;
+				if (tmp->avl_forw)
+					tmp->avl_forw->avl_parent = child;
+
+				tmp->avl_forw = child;
+				child->avl_parent = tmp;
+
+				np->avl_forw = tmp->avl_back;
+				if (tmp->avl_back)
+					tmp->avl_back->avl_parent = np;
+
+				tmp->avl_back = np;
+				np->avl_parent = tmp;
+
+				if (tmp->avl_balance == AVL_FORW)
+					np->avl_balance = AVL_BACK;
+				else
+					np->avl_balance = AVL_BALANCE;
+
+				if (tmp->avl_balance == AVL_BACK)
+					child->avl_balance = AVL_FORW;
+				else
+					child->avl_balance = AVL_BALANCE;
+
+				child = tmp;
+			}
+		}
+
+		child->avl_parent = parent;
+		child->avl_balance = AVL_BALANCE;
+
+		if (parent) {
+			if (parent->avl_back == np)
+				parent->avl_back = child;
+			else
+				parent->avl_forw = child;
+		} else {
+			ASSERT(*rootp == np);
+			*rootp = child;
+		}
+
+		break;
+	}
+}
+
+static
+avl64node_t *
+avl64_insert_find_growth(
+		register avl64tree_desc_t *tree,
+		register __uint64_t start, 	/* range start at start, */
+		register __uint64_t end, 	/* exclusive */
+		register int   *growthp) 	/* OUT */ 
+{
+	avl64node_t *root = tree->avl_root;
+	register avl64node_t *np;
+
+	np = root;
+	ASSERT(np); /* caller ensures that there is atleast one node in tree */
+
+	for ( ; ; ) {
+		CERT(np->avl_parent || root == np);
+		CERT(!np->avl_parent || root != np);
+		CERT(!(np->avl_back) || np->avl_back->avl_parent == np);
+		CERT(!(np->avl_forw) || np->avl_forw->avl_parent == np);
+		CERT(np->avl_balance != AVL_FORW || np->avl_forw);
+		CERT(np->avl_balance != AVL_BACK || np->avl_back);
+		CERT(np->avl_balance != AVL_BALANCE ||
+		     np->avl_back == NULL || np->avl_forw);
+		CERT(np->avl_balance != AVL_BALANCE ||
+		     np->avl_forw == NULL || np->avl_back);
+
+		if (AVL_START(tree, np) >= end) {
+			if (np->avl_back) {
+				np = np->avl_back;
+				continue;
+			}
+			*growthp = AVL_BACK;
+			break;
+		}
+
+		if (AVL_END(tree, np) <= start) {
+			if (np->avl_forw) {
+				np = np->avl_forw;
+				continue;
+			}
+			*growthp = AVL_FORW;
+			break;
+		}
+		/* found exact match -- let caller decide if it is an error */
+		return(NULL);
+	}
+	return(np);
+}
+
+
+static void
+avl64_insert_grow(
+	register avl64tree_desc_t *tree,
+	register avl64node_t *parent,
+	register avl64node_t *newnode,
+	register int growth)
+{
+	register avl64node_t *nnext;
+	register __uint64_t start = AVL_START(tree, newnode);
+
+	if (growth == AVL_BACK) {
+
+		parent->avl_back = newnode;
+		/*
+		 * we are growing to the left; previous in-order to newnode is
+		 * closest ancestor with lesser value. Before this
+		 * insertion, this ancestor will be pointing to
+		 * newnode's parent. After insertion, next in-order to newnode
+		 * is the parent.
+		 */
+		newnode->avl_nextino = parent;
+		nnext = parent;
+		while (nnext) {
+			if (AVL_END(tree, nnext) <= start)
+				break;
+			nnext = nnext->avl_parent;
+		}
+		if (nnext)  {
+			/*
+			 * nnext will be null if newnode is
+			 * the least element, and hence very first in the list.
+			 */
+			ASSERT(nnext->avl_nextino == parent);
+			nnext->avl_nextino = newnode;
+		}
+	}
+	else {
+		parent->avl_forw = newnode;
+		newnode->avl_nextino = parent->avl_nextino;
+		parent->avl_nextino = newnode;
+	}
+}
+
+
+avl64node_t *
+avl64_insert(
+	register avl64tree_desc_t *tree,
+	register avl64node_t *newnode)
+{
+	register avl64node_t *np;
+	register __uint64_t start = AVL_START(tree, newnode);
+	register __uint64_t end = AVL_END(tree, newnode);
+	int growth;
+
+	ASSERT(newnode);
+	/*
+	 * Clean all pointers for sanity; some will be reset as necessary.
+	 */
+	newnode->avl_nextino = NULL;
+	newnode->avl_parent = NULL;
+	newnode->avl_forw = NULL;
+	newnode->avl_back = NULL;
+	newnode->avl_balance = AVL_BALANCE;
+
+	if ((np = tree->avl_root) == NULL) { /* degenerate case... */
+		tree->avl_root = newnode;
+		tree->avl_firstino = newnode;
+		return newnode;
+	}
+
+	if ((np = avl64_insert_find_growth(tree, start, end, &growth))
+			== NULL) {
+		if (start != end)  { /* non-zero length range */
+#ifdef	AVL_USER_MODE
+		printf("avl_insert: Warning! duplicate range [0x%llx,0x%llx)\n",
+				start, end);
+#else
+			cmn_err(CE_CONT,
+		"!avl_insert: Warning! duplicate range [0x%llx,0x%llx)\n",
+				start, end);
+#endif
+		}
+		return(NULL);
+	}
+
+	avl64_insert_grow(tree, np, newnode, growth);
+	if (growth == AVL_BACK) {
+		/*
+		 * Growing to left. if np was firstino, newnode will be firstino
+		 */
+		 if (tree->avl_firstino == np)
+			tree->avl_firstino = newnode;
+	}
+#ifdef notneeded
+	else
+	if (growth == AVL_FORW)
+		/*
+		 * Cannot possibly be firstino; there is somebody to our left.
+		 */
+		 ;
+#endif
+
+	newnode->avl_parent = np;
+	CERT(np->avl_forw == newnode || np->avl_back == newnode);
+
+	avl64_balance(&tree->avl_root, np, growth);
+
+	avl64_checktree(tree, tree->avl_root);
+
+	return newnode;
+}
+
+/*
+ *
+ * avl64_insert_immediate(tree, afterp, newnode):
+ * 	insert newnode immediately into tree immediately after afterp.
+ *	after insertion, newnode is right child of afterp.
+ */
+void
+avl64_insert_immediate(
+		avl64tree_desc_t *tree,
+		avl64node_t *afterp,
+		avl64node_t *newnode)
+{
+	/*
+	 * Clean all pointers for sanity; some will be reset as necessary.
+	 */
+	newnode->avl_nextino = NULL;
+	newnode->avl_parent = NULL;
+	newnode->avl_forw = NULL;
+	newnode->avl_back = NULL;
+	newnode->avl_balance = AVL_BALANCE;
+
+	if (afterp == NULL) {
+		tree->avl_root = newnode;
+		tree->avl_firstino = newnode;
+		return;
+	}
+
+	ASSERT(afterp->avl_forw == NULL);
+	avl64_insert_grow(tree, afterp, newnode, AVL_FORW); /* grow to right */
+	CERT(afterp->avl_forw == newnode);
+	avl64_balance(&tree->avl_root, afterp, AVL_FORW);
+	avl64_checktree(tree, tree->avl_root);
+}
+
+
+/*
+ *	Returns first in order node
+ */
+avl64node_t *
+avl64_firstino(register avl64node_t *root)
+{
+	register avl64node_t *np;
+
+	if ((np = root) == NULL)
+		return NULL;
+
+	while (np->avl_back)
+		np = np->avl_back;
+	return np;
+}
+
+#ifdef AVL_USER_MODE
+/*
+ * leave this as a user-mode only routine until someone actually
+ * needs it in the kernel
+ */
+
+/*
+ *	Returns last in order node
+ */
+avl64node_t *
+avl64_lastino(register avl64node_t *root)
+{
+	register avl64node_t *np;
+
+	if ((np = root) == NULL)
+		return NULL;
+
+	while (np->avl_forw)
+		np = np->avl_forw;
+	return np;
+}
+#endif
+
+void
+avl64_init_tree(avl64tree_desc_t *tree, avl64ops_t *ops)
+{
+	tree->avl_root = NULL;
+	tree->avl_firstino = NULL;
+	tree->avl_ops = ops;
+}
+
+#ifdef AVL_DEBUG
+static void
+avl64_printnode(avl64tree_desc_t *tree, avl64node_t *np, int nl)
+{
+	printf("[%d-%d]%c", AVL_START(tree, np),
+		(AVL_END(tree, np) - 1), nl ? '\n' : ' ');
+}
+#endif
+#ifdef STAND_ALONE_DEBUG
+
+struct avl_debug_node {
+	avl64node_t	avl_node;
+	xfs_off_t		avl_start;
+	unsigned int	avl_size;
+}
+
+avl64ops_t avl_debug_ops = {
+	avl_debug_start,
+	avl_debug_end,
+}
+
+static __uint64_t
+avl64_debug_start(avl64node_t *node)
+{
+	return (__uint64_t)(struct avl_debug_node *)node->avl_start;
+}
+
+static __uint64_t
+avl64_debug_end(avl64node_t *node)
+{
+	return (__uint64_t)
+		((struct avl_debug_node *)node->avl_start +
+		 (struct avl_debug_node *)node->avl_size);
+}
+
+avl_debug_node 	freenodes[100];
+avl_debug_node 	*freehead = &freenodes[0];
+
+static avl64node_t *
+alloc_avl64_debug_node()
+{
+	freehead->avl_balance = AVL_BALANCE;
+	freehead->avl_parent = freehead->avl_forw = freehead->avl_back = NULL;
+	return(freehead++);
+}
+
+static void
+avl64_print(avl64tree_desc_t *tree, avl64node_t *root, int depth)
+{
+	int i;
+
+	if (!root)
+		return;
+	if (root->avl_forw)
+		avl64_print(tree, root->avl_forw, depth+5);
+	for (i = 0; i < depth; i++)
+		putchar((int) ' ');
+	avl64_printnode(tree, root,1);
+	if (root->avl_back)
+		avl64_print(tree, root->avl_back, depth+5);
+}
+
+main()
+{
+	int 		i, j;
+	avl64node_t  	*np;
+	avl64tree_desc_t	tree;
+	char		linebuf[256], cmd[256];
+
+	avl64_init_tree(&tree, &avl_debug_ops);
+
+	for (i = 100; i > 0; i = i - 10)
+	{	
+		np = alloc__debug_avlnode();
+		ASSERT(np);
+		np->avl_start = i;
+		np->avl_size = 10;
+		avl64_insert(&tree, np);
+	}
+	avl64_print(&tree, tree.avl_root, 0);
+
+	for (np = tree.avl_firstino; np != NULL; np = np->avl_nextino)
+		avl64_printnode(&tree, np, 0);
+	printf("\n");
+
+	while (1) {
+		printf("Command [fpdir] : ");
+		fgets(linebuf, 256, stdin);
+		if (feof(stdin)) break;
+		cmd[0] = NULL;
+		if (sscanf(linebuf, "%[fpdir]%d", cmd, &i) != 2)
+			continue;
+		switch (cmd[0]) {
+		case 'd':
+		case 'f':
+			printf("end of range ? ");
+			fgets(linebuf, 256, stdin);
+			j = atoi(linebuf);
+
+			if (i == j) j = i+1;
+			np = avl64_findinrange(&tree,i,j);
+			if (np) {
+				avl64_printnode(&tree, np, 1);
+				if (cmd[0] == 'd')
+					avl64_delete(&tree, np);
+			} else
+				printf("Cannot find %d\n", i);
+			break;
+		case 'p':
+			avl64_print(&tree, tree.avl_root, 0);
+			for (np = tree.avl_firstino;
+				np != NULL; np = np->avl_nextino)
+					avl64_printnode(&tree, np, 0);
+			printf("\n");
+			break;
+		case 'i':
+			np = alloc_avlnode();
+			ASSERT(np);
+			np->avl_start = i;
+			printf("size of range ? ");
+			fgets(linebuf, 256, stdin);
+			j = atoi(linebuf);
+
+			np->avl_size = j;
+			avl64_insert(&tree, np);
+			break;
+		case 'r': {
+			avl64node_t 	*b, *e, *t;
+			int		checklen;
+
+			printf("End of range ? ");
+			fgets(linebuf, 256, stdin);
+			j = atoi(linebuf);
+
+			printf("checklen 0/1 ? ");
+			fgets(linebuf, 256, stdin);
+			checklen = atoi(linebuf);
+
+
+			b = avl64_findanyrange(&tree, i, j, checklen);
+			if (b) {
+				printf("Found something\n");
+				t = b;
+				while (t)  {
+					if (t != b &&
+					    AVL_START(&tree, t) >= j)
+						break;
+					avl64_printnode(&tree, t, 0);
+					t = t->avl_nextino;
+				}
+				printf("\n");
+			}
+		     }
+		}
+	}
+}
+#endif
+
+/*
+ * 	Given a tree, find value; will find return range enclosing value,
+ *	or range immediately succeeding value,
+ * 	or range immediately preceeding value.
+ */
+avl64node_t *
+avl64_findadjacent(
+	register avl64tree_desc_t *tree,
+	register __uint64_t value,
+	register int		dir)
+{
+        register avl64node_t *np = tree->avl_root;
+
+	while (np) {
+		if (value < AVL_START(tree, np)) {
+			if (np->avl_back) {
+				np = np->avl_back;
+				continue;
+			}
+			/* if we were to add node with value, would
+			 * have a growth of AVL_BACK
+			 */
+			if (dir == AVL_SUCCEED) {
+				/* if succeeding node is needed, this is it.
+				 */
+				return(np);
+			}
+			if (dir == AVL_PRECEED) {
+				/*
+				 * find nearest ancestor with lesser value.
+				 */
+				 np = np->avl_parent;
+				 while (np) {
+					if (AVL_END(tree, np) <= value)
+						break;
+					np = np->avl_parent;
+				}
+				return(np);
+			}
+			ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED);
+			break;
+		}
+		if (value >= AVL_END(tree, np)) {
+			if (np->avl_forw) {
+				np = np->avl_forw;
+				continue;
+			}
+			/* if we were to add node with value, would
+			 * have a growth of AVL_FORW; 
+			 */
+			if (dir == AVL_SUCCEED) {
+				/* we are looking for a succeeding node;
+				 * this is nextino.
+				 */
+				return(np->avl_nextino);
+			}
+			if (dir == AVL_PRECEED) {
+				/* looking for a preceeding node; this is it. */
+				return(np);
+			}	
+			ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED);
+		}
+		/* AVL_START(tree, np) <= value < AVL_END(tree, np) */
+		return(np);
+	}
+	return NULL;
+}
+
+
+#ifdef AVL_FUTURE_ENHANCEMENTS
+/*
+ *  	avl_findranges:
+ *
+ *	Given range r [start, end), find all ranges in tree which are contained
+ *	in r. At return, startp and endp point to first and last of
+ * 	a chain of elements which describe the contained ranges. Elements
+ *	in startp ... endp are in sort order, and can be accessed by
+ *	using avl_nextino.
+ */
+
+void
+avl64_findranges(
+	register avl64tree_desc_t *tree,
+	register __uint64_t start,
+	register __uint64_t end,
+	avl64node_t 	        **startp,
+	avl64node_t		**endp)
+{
+        register avl64node_t *np;
+
+	np = avl64_findadjacent(tree, start, AVL_SUCCEED);
+	if (np == NULL 				/* nothing succeding start */
+		|| (np && (end <= AVL_START(tree, np))))
+						/* something follows start,
+						but... is entirely after end */
+	{
+		*startp = NULL;
+		*endp = NULL;
+		return;
+	}
+
+	*startp = np;
+
+	/* see if end is in this region itself */
+	if (end <= AVL_END(tree, np) ||
+	    np->avl_nextino == NULL ||
+	    (np->avl_nextino &&
+	    (end <= AVL_START(tree, np->avl_nextino)))) {
+		*endp = np;
+		return;
+	}
+	/* have to munge for end */
+	/*
+	 * note: have to look for (end - 1), since
+	 * findadjacent will look for exact value, and does not
+	 * care about the fact that end is actually one more
+	 * than the value actually being looked for; thus feed it one less.
+	 */
+	*endp = avl64_findadjacent(tree, (end-1), AVL_PRECEED);
+	ASSERT(*endp);
+}
+
+#endif /* AVL_FUTURE_ENHANCEMENTS */
diff --git a/repair/avl64.h b/repair/avl64.h
new file mode 100644
index 000000000..26ed977c3
--- /dev/null
+++ b/repair/avl64.h
@@ -0,0 +1,151 @@
+/**************************************************************************
+ *									  *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *									  *
+ **************************************************************************/
+#ifndef __XR_AVL64_H__
+#define __XR_AVL64_H__
+
+#include <sys/types.h>
+
+typedef struct	avl64node {
+	struct 	avl64node	*avl_forw;	/* pointer to right child  (> parent) */
+	struct 	avl64node *avl_back;	/* pointer to left child  (< parent) */
+	struct	avl64node *avl_parent;	/* parent pointer */
+	struct	avl64node *avl_nextino;	/* next in-order; NULL terminated list*/
+	char		 avl_balance;	/* tree balance */
+} avl64node_t;
+
+/*
+ * avl-tree operations
+ */
+typedef struct avl64ops {
+	__uint64_t	(*avl_start)(avl64node_t *);
+	__uint64_t	(*avl_end)(avl64node_t *);
+} avl64ops_t;
+
+/*
+ * avoid complaints about multiple def's since these are only used by
+ * the avl code internally
+ */
+#ifndef AVL_START
+#define	AVL_START(tree, n)	(*(tree)->avl_ops->avl_start)(n)
+#define	AVL_END(tree, n)	(*(tree)->avl_ops->avl_end)(n)
+#endif
+
+/* 
+ * tree descriptor:
+ *	root points to the root of the tree.
+ *	firstino points to the first in the ordered list.
+ */
+typedef struct avl64tree_desc {
+	avl64node_t	*avl_root;
+	avl64node_t	*avl_firstino;
+	avl64ops_t	*avl_ops;
+} avl64tree_desc_t;
+
+/* possible values for avl_balance */
+
+#define AVL_BACK	1
+#define AVL_BALANCE	0
+#define AVL_FORW	2
+
+/*
+ * 'Exported' avl tree routines
+ */
+avl64node_t
+*avl64_insert(
+	avl64tree_desc_t *tree,
+	avl64node_t *newnode);
+
+void
+avl64_delete(
+	avl64tree_desc_t *tree,
+	avl64node_t *np);
+
+void
+avl64_insert_immediate(
+	avl64tree_desc_t *tree,
+	avl64node_t *afterp,
+	avl64node_t *newnode);
+	
+void
+avl64_init_tree(
+	avl64tree_desc_t  *tree,
+	avl64ops_t *ops);
+
+avl64node_t *
+avl64_findrange(
+	avl64tree_desc_t *tree,
+	__uint64_t value);
+
+avl64node_t *
+avl64_find(
+	avl64tree_desc_t *tree,
+	__uint64_t value);
+
+avl64node_t *
+avl64_findanyrange(
+	avl64tree_desc_t *tree,
+	__uint64_t	start,
+	__uint64_t	end,
+	int     checklen);
+
+
+avl64node_t *
+avl64_findadjacent(
+	avl64tree_desc_t *tree,
+	__uint64_t	value,
+	int		dir);
+
+#ifdef AVL_FUTURE_ENHANCEMENTS
+void
+avl64_findranges(
+	register avl64tree_desc_t *tree,
+	register __uint64_t	start,
+	register __uint64_t	end,
+	avl64node_t 	        **startp,
+	avl64node_t		**endp);
+#endif
+
+/*
+ * avoid complaints about multiple def's since these are only used by
+ * the avl code internally
+ */
+#ifndef AVL_PRECEED
+#define AVL_PRECEED	0x1
+#define AVL_SUCCEED	0x2
+
+#define AVL_INCLUDE_ZEROLEN	0x0000
+#define AVL_EXCLUDE_ZEROLEN	0x0001
+#endif
+
+#endif /* __XR_AVL64_H__ */
diff --git a/repair/bmap.c b/repair/bmap.c
new file mode 100644
index 000000000..47c8bbf1c
--- /dev/null
+++ b/repair/bmap.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "err_protos.h"
+#include "bmap.h"
+
+/*
+ * Block mapping code taken from xfs_db.
+ */
+
+/*
+ * Append an extent to the block entry.
+ */
+void
+blkent_append(
+	blkent_t	**entp,
+	xfs_dfsbno_t	b,
+	xfs_dfilblks_t	c)
+{
+	blkent_t	*ent;
+	size_t		size;
+	int		i;
+
+	ent = *entp;
+	size = BLKENT_SIZE(c + ent->nblks);
+	if ((*entp = ent = realloc(ent, size)) == NULL) {
+		do_warn("realloc failed in blkent_append (%u bytes)\n", size);
+		return;
+	}
+	for (i = 0; i < c; i++)
+		ent->blks[ent->nblks + i] = b + i;
+	ent->nblks += c;
+}
+
+/*
+ * Make a new block entry.
+ */
+blkent_t *
+blkent_new(
+	xfs_dfiloff_t	o,
+	xfs_dfsbno_t	b,
+	xfs_dfilblks_t	c)
+{
+	blkent_t	*ent;
+	int		i;
+
+	if ((ent = malloc(BLKENT_SIZE(c))) == NULL) {
+		do_warn("malloc failed in blkent_new (%u bytes)\n",
+			BLKENT_SIZE(c));
+		return ent;
+	}
+	ent->nblks = c;
+	ent->startoff = o;
+	for (i = 0; i < c; i++)
+		ent->blks[i] = b + i;
+	return ent;
+}
+
+/*
+ * Prepend an extent to the block entry.
+ */
+void
+blkent_prepend(
+	blkent_t	**entp,
+	xfs_dfsbno_t	b,
+	xfs_dfilblks_t	c)
+{
+	int		i;
+	blkent_t	*newent;
+	blkent_t	*oldent;
+
+	oldent = *entp;
+	if ((newent = malloc(BLKENT_SIZE(oldent->nblks + c))) == NULL) {
+		do_warn("malloc failed in blkent_prepend (%u bytes)\n",
+			BLKENT_SIZE(oldent->nblks + c));
+		*entp = newent;
+		return;
+	}
+	newent->nblks = oldent->nblks + c;
+	newent->startoff = oldent->startoff - c;
+	for (i = 0; i < c; i++)
+		newent->blks[i] = b + c;
+	for (; i < oldent->nblks + c; i++)
+		newent->blks[i] = oldent->blks[i - c];
+	free(oldent);
+	*entp = newent;
+}
+
+/*
+ * Allocate a block map.
+ */
+blkmap_t *
+blkmap_alloc(
+	xfs_extnum_t	nex)
+{
+	blkmap_t	*blkmap;
+
+	if (nex < 1)
+		nex = 1;
+	if ((blkmap = malloc(BLKMAP_SIZE(nex))) == NULL) {
+		do_warn("malloc failed in blkmap_alloc (%u bytes)\n",
+			BLKMAP_SIZE(nex));
+		return blkmap;
+	}
+	blkmap->naents = nex;
+	blkmap->nents = 0;
+	return blkmap;
+}
+
+/*
+ * Free a block map.
+ */
+void
+blkmap_free(
+	blkmap_t	*blkmap)
+{
+	blkent_t	**entp;
+	xfs_extnum_t	i;
+
+	if (blkmap == NULL)
+		return;
+	for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++)
+		free(*entp);
+	free(blkmap);
+}
+
+/*
+ * Get one entry from a block map.
+ */
+xfs_dfsbno_t
+blkmap_get(
+	blkmap_t	*blkmap,
+	xfs_dfiloff_t	o)
+{
+	blkent_t	*ent;
+	blkent_t	**entp;
+	int		i;
+
+	for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) {
+		ent = *entp;
+		if (o >= ent->startoff && o < ent->startoff + ent->nblks)
+			return ent->blks[o - ent->startoff];
+	}
+	return NULLDFSBNO;
+}
+
+/*
+ * Get a chunk of entries from a block map.
+ */
+int
+blkmap_getn(
+	blkmap_t	*blkmap,
+	xfs_dfiloff_t	o,
+	xfs_dfilblks_t	nb,
+	bmap_ext_t	**bmpp)
+{
+	bmap_ext_t	*bmp;
+	blkent_t	*ent;
+	xfs_dfiloff_t	ento;
+	blkent_t	**entp;
+	int		i;
+	int		nex;
+
+	for (i = nex = 0, bmp = NULL, entp = blkmap->ents;
+	     i < blkmap->nents;
+	     i++, entp++) {
+		ent = *entp;
+		if (ent->startoff >= o + nb)
+			break;
+		if (ent->startoff + ent->nblks <= o)
+			continue;
+		for (ento = ent->startoff;
+		     ento < ent->startoff + ent->nblks && ento < o + nb;
+		     ento++) {
+			if (ento < o)
+				continue;
+			if (bmp &&
+			    bmp[nex - 1].startoff + bmp[nex - 1].blockcount ==
+				    ento &&
+			    bmp[nex - 1].startblock + bmp[nex - 1].blockcount ==
+				    ent->blks[ento - ent->startoff])
+				bmp[nex - 1].blockcount++;
+			else {
+				bmp = realloc(bmp, ++nex * sizeof(*bmp));
+				if (bmp == NULL) {
+					do_warn("realloc failed in blkmap_getn"
+						" (%u bytes)\n",
+						nex * sizeof(*bmp));
+					continue;
+				}
+				bmp[nex - 1].startoff = ento;
+				bmp[nex - 1].startblock =
+					ent->blks[ento - ent->startoff];
+				bmp[nex - 1].blockcount = 1;
+				bmp[nex - 1].flag = 0;
+			}
+		}
+	}
+	*bmpp = bmp;
+	return nex;
+}
+
+/*
+ * Make a block map larger.
+ */
+void
+blkmap_grow(
+	blkmap_t	**blkmapp,
+	blkent_t	**entp,
+	blkent_t	*newent)
+{
+	blkmap_t	*blkmap;
+	size_t		size;
+	int		i;
+	int		idx;
+
+	blkmap = *blkmapp;
+	idx = (int)(entp - blkmap->ents);
+	if (blkmap->naents == blkmap->nents) {
+		size = BLKMAP_SIZE(blkmap->nents + 1);
+		if ((*blkmapp = blkmap = realloc(blkmap, size)) == NULL) {
+			do_warn("realloc failed in blkmap_grow (%u bytes)\n",
+				size);
+			return;
+		}
+		blkmap->naents++;
+	}
+	for (i = blkmap->nents; i > idx; i--)
+		blkmap->ents[i] = blkmap->ents[i - 1];
+	blkmap->ents[idx] = newent;
+	blkmap->nents++;
+}
+
+/*
+ * Return the last offset in a block map.
+ */
+xfs_dfiloff_t
+blkmap_last_off(
+	blkmap_t	*blkmap)
+{
+	blkent_t	*ent;
+
+	if (!blkmap->nents)
+		return NULLDFILOFF;
+	ent = blkmap->ents[blkmap->nents - 1];
+	return ent->startoff + ent->nblks;
+}
+
+/*
+ * Return the next offset in a block map.
+ */
+xfs_dfiloff_t
+blkmap_next_off(
+	blkmap_t	*blkmap,
+	xfs_dfiloff_t	o,
+	int		*t)
+{
+	blkent_t	*ent;
+	blkent_t	**entp;
+
+	if (!blkmap->nents)
+		return NULLDFILOFF;
+	if (o == NULLDFILOFF) {
+		*t = 0;
+		ent = blkmap->ents[0];
+		return ent->startoff;
+	}
+	entp = &blkmap->ents[*t];
+	ent = *entp;
+	if (o < ent->startoff + ent->nblks - 1)
+		return o + 1;
+	entp++;
+	if (entp >= &blkmap->ents[blkmap->nents])
+		return NULLDFILOFF;
+	(*t)++;
+	ent = *entp;
+	return ent->startoff;
+}
+
+/*
+ * Set a block value in a block map.
+ */
+void
+blkmap_set_blk(
+	blkmap_t	**blkmapp,
+	xfs_dfiloff_t	o,
+	xfs_dfsbno_t	b)
+{
+	blkmap_t	*blkmap;
+	blkent_t	*ent;
+	blkent_t	**entp;
+	blkent_t	*nextent;
+
+	blkmap = *blkmapp;
+	for (entp = blkmap->ents; entp < &blkmap->ents[blkmap->nents]; entp++) {
+		ent = *entp;
+		if (o < ent->startoff - 1) {
+			ent = blkent_new(o, b, 1);
+			blkmap_grow(blkmapp, entp, ent);
+			return;
+		}
+		if (o == ent->startoff - 1) {
+			blkent_prepend(entp, b, 1);
+			return;
+		}
+		if (o >= ent->startoff && o < ent->startoff + ent->nblks) {
+			ent->blks[o - ent->startoff] = b;
+			return;
+		}
+		if (o > ent->startoff + ent->nblks)
+			continue;
+		blkent_append(entp, b, 1);
+		if (entp == &blkmap->ents[blkmap->nents - 1])
+			return;
+		ent = *entp;
+		nextent = entp[1];
+		if (ent->startoff + ent->nblks < nextent->startoff)
+			return;
+		blkent_append(entp, nextent->blks[0], nextent->nblks);
+		blkmap_shrink(blkmap, &entp[1]);
+		return;
+	}
+	ent = blkent_new(o, b, 1);
+	blkmap_grow(blkmapp, entp, ent);
+}
+
+/*
+ * Set an extent into a block map.
+ */
+void
+blkmap_set_ext(
+	blkmap_t	**blkmapp,
+	xfs_dfiloff_t	o,
+	xfs_dfsbno_t	b,
+	xfs_dfilblks_t	c)
+{
+	blkmap_t	*blkmap;
+	blkent_t	*ent;
+	blkent_t	**entp;
+	xfs_extnum_t	i;
+
+	blkmap = *blkmapp;
+	if (!blkmap->nents) {
+		blkmap->ents[0] = blkent_new(o, b, c);
+		blkmap->nents = 1;
+		return;
+	}
+	entp = &blkmap->ents[blkmap->nents - 1];
+	ent = *entp;
+	if (ent->startoff + ent->nblks == o) {
+		blkent_append(entp, b, c);
+		return;
+	}
+	if (ent->startoff + ent->nblks < o) {
+		ent = blkent_new(o, b, c);
+		blkmap_grow(blkmapp, &blkmap->ents[blkmap->nents], ent);
+		return;
+	}
+	for (i = 0; i < c; i++)
+		blkmap_set_blk(blkmapp, o + i, b + i);
+}
+
+/*
+ * Make a block map smaller.
+ */
+void
+blkmap_shrink(
+	blkmap_t	*blkmap,
+	blkent_t	**entp)
+{
+	int		i;
+	int		idx;
+
+	free(*entp);
+	idx = (int)(entp - blkmap->ents);
+	for (i = idx + 1; i < blkmap->nents; i++)
+		blkmap->ents[i] = blkmap->ents[i - 1];
+	blkmap->nents--;
+}
diff --git a/repair/bmap.h b/repair/bmap.h
new file mode 100644
index 000000000..0b184ee6e
--- /dev/null
+++ b/repair/bmap.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Block mapping code taken from xfs_db.
+ */
+
+/*
+ * Block map entry.
+ */
+typedef struct blkent {
+	xfs_dfiloff_t	startoff;
+	xfs_dfilblks_t	nblks;
+	xfs_dfsbno_t	blks[1];
+} blkent_t;
+#define	BLKENT_SIZE(n)	\
+	(offsetof(blkent_t, blks) + (sizeof(xfs_dfsbno_t) * (n)))
+
+/*
+ * Block map.
+ */
+typedef	struct blkmap {
+	int		naents;
+	int		nents;
+	blkent_t	*ents[1];
+} blkmap_t;
+#define	BLKMAP_SIZE(n)	\
+	(offsetof(blkmap_t, ents) + (sizeof(blkent_t *) * (n)))
+
+/*
+ * Extent descriptor.
+ */
+typedef struct bmap_ext {
+	xfs_dfiloff_t	startoff;
+	xfs_dfsbno_t	startblock;
+	xfs_dfilblks_t	blockcount;
+	int		flag;
+} bmap_ext_t;
+
+void		blkent_append(blkent_t **entp, xfs_dfsbno_t b,
+			      xfs_dfilblks_t c);
+blkent_t	*blkent_new(xfs_dfiloff_t o, xfs_dfsbno_t b, xfs_dfilblks_t c);
+void		blkent_prepend(blkent_t **entp, xfs_dfsbno_t b,
+			       xfs_dfilblks_t c);
+blkmap_t	*blkmap_alloc(xfs_extnum_t);
+void		blkmap_free(blkmap_t *blkmap);
+xfs_dfsbno_t	blkmap_get(blkmap_t *blkmap, xfs_dfiloff_t o);
+int		blkmap_getn(blkmap_t *blkmap, xfs_dfiloff_t o,
+			    xfs_dfilblks_t nb, bmap_ext_t **bmpp);
+void		blkmap_grow(blkmap_t **blkmapp, blkent_t **entp,
+			    blkent_t *newent);
+xfs_dfiloff_t	blkmap_last_off(blkmap_t *blkmap);
+xfs_dfiloff_t	blkmap_next_off(blkmap_t *blkmap, xfs_dfiloff_t o, int *t);
+void		blkmap_set_blk(blkmap_t **blkmapp, xfs_dfiloff_t o,
+			       xfs_dfsbno_t b);
+void		blkmap_set_ext(blkmap_t **blkmapp, xfs_dfiloff_t o,
+			       xfs_dfsbno_t b, xfs_dfilblks_t c);
+void		blkmap_shrink(blkmap_t *blkmap, blkent_t **entp);
diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c
new file mode 100644
index 000000000..90d2e9f93
--- /dev/null
+++ b/repair/dino_chunks.c
@@ -0,0 +1,1178 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dir.h"
+#include "dinode.h"
+#include "versions.h"
+
+/*
+ * validates inode block or chunk, returns # of good inodes
+ * the dinodes are verified using verify_uncertain_dinode() which
+ * means only the basic inode info is checked, no fork checks.
+ */
+
+int
+check_aginode_block(xfs_mount_t	*mp,
+			xfs_agnumber_t	agno,
+			xfs_agblock_t	agbno)
+{
+
+	xfs_dinode_t	*dino_p;
+        int		i;
+        int		cnt = 0;
+	xfs_buf_t	*bp;
+
+	/*
+	 * it's ok to read these possible inode blocks in one at
+	 * a time because they don't belong to known inodes (if
+	 * they did, we'd know about them courtesy of the incore inode
+	 * tree and we wouldn't be here and we stale the buffers out
+	 * so no one else will overlap them.
+	 */
+	bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
+			XFS_FSB_TO_BB(mp, 1), 0);
+	if (!bp) {
+		do_warn("cannot read agbno (%u/%u), disk block %lld\n", agno,
+			agbno, (xfs_daddr_t)XFS_AGB_TO_DADDR(mp, agno, agbno));
+		return(0);
+	}
+
+	for (i = 0; i < mp->m_sb.sb_inopblock; i++)  {
+		dino_p = XFS_MAKE_IPTR(mp, bp, i);
+		if (!verify_uncertain_dinode(mp, dino_p, agno,
+				XFS_OFFBNO_TO_AGINO(mp, agbno, i)))
+			cnt++;
+	}
+
+	libxfs_putbuf(bp);
+	return(cnt);
+}
+
+int
+check_inode_block(xfs_mount_t		*mp,
+			xfs_ino_t	ino)
+{
+        return(check_aginode_block(mp, XFS_INO_TO_AGNO(mp, ino),
+					XFS_INO_TO_AGBNO(mp, ino)));
+}
+
+/*
+ * tries to establish if the inode really exists in a valid
+ * inode chunk.  returns number of new inodes if things are good
+ * and 0 if bad.  start is the start of the discovered inode chunk.
+ * routine assumes that ino is a legal inode number
+ * (verified by verify_inum()).  If the inode chunk turns out
+ * to be good, this routine will put the inode chunk into
+ * the good inode chunk tree if required.
+ *
+ * the verify_(ag)inode* family of routines are utility
+ * routines called by check_uncertain_aginodes() and
+ * process_uncertain_aginodes().
+ */
+int
+verify_inode_chunk(xfs_mount_t		*mp,
+			xfs_ino_t	ino,
+			xfs_ino_t	*start_ino)
+{
+	xfs_agnumber_t	agno;
+	xfs_agino_t	agino;
+	xfs_agino_t	start_agino;
+	xfs_agblock_t	agbno;
+	xfs_agblock_t	start_agbno = 0;
+	xfs_agblock_t	end_agbno;
+	xfs_agblock_t	max_agbno;
+	xfs_agblock_t	cur_agbno;
+	xfs_agblock_t	chunk_start_agbno;
+	xfs_agblock_t	chunk_stop_agbno;
+	ino_tree_node_t *irec_before_p = NULL;
+	ino_tree_node_t *irec_after_p = NULL;
+	ino_tree_node_t *irec_p;
+	ino_tree_node_t *irec_next_p;
+	int		irec_cnt;
+	int		ino_cnt = 0;
+	int		num_blks;
+	int		i;
+	int		j;
+	int		state;
+
+        agno = XFS_INO_TO_AGNO(mp, ino);
+        agino = XFS_INO_TO_AGINO(mp, ino);
+	agbno = XFS_INO_TO_AGBNO(mp, ino);
+	*start_ino = NULLFSINO;
+
+	ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+
+	if (agno == mp->m_sb.sb_agcount - 1)
+		max_agbno = mp->m_sb.sb_dblocks -
+			(xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno;
+	else
+		max_agbno = mp->m_sb.sb_agblocks;
+
+	/*
+	 * is the inode beyond the end of the AG?
+	 */
+	if (agbno >= max_agbno)
+		return(0);
+
+	/*
+	 * check for the easy case, inodes per block >= XFS_INODES_PER_CHUNK
+	 * (multiple chunks per block)
+	 */
+	if (XFS_IALLOC_BLOCKS(mp) == 1)  {
+		if (agbno > max_agbno)
+			return(0);
+
+		if (check_inode_block(mp, ino) == 0)
+			return(0);
+
+		switch (state = get_agbno_state(mp, agno, agbno))  {
+		case XR_E_INO:
+			do_warn("uncertain inode block %d/%d already known\n",
+				agno, agbno);
+			break;
+		case XR_E_UNKNOWN:
+		case XR_E_FREE1:
+		case XR_E_FREE:
+			set_agbno_state(mp, agno, agbno, XR_E_INO);
+			break;
+		case XR_E_MULT:
+		case XR_E_INUSE:
+		case XR_E_INUSE_FS:
+		case XR_E_FS_MAP:
+			/*
+			 * if block is already claimed, forget it.
+			 */
+			do_warn(
+			    "inode block %d/%d multiply claimed, (state %d)\n",
+				agno, agbno, state);
+			set_agbno_state(mp, agno, agbno, XR_E_MULT);
+			return(0);
+		default:
+			do_warn("inode block %d/%d bad state, (state %d)\n",
+				agno, agbno, state);
+			set_agbno_state(mp, agno, agbno, XR_E_INO);
+			break;
+		}
+
+		start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0);
+		*start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
+
+		/*
+		 * put new inode record(s) into inode tree
+		 */
+		for (j = 0; j < chunks_pblock; j++)  {
+			if ((irec_p = find_inode_rec(agno, start_agino))
+					== NULL)  {
+				irec_p = set_inode_free_alloc(agno,
+							start_agino);
+				for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
+					set_inode_free(irec_p, i);
+			}
+			if (start_agino <= agino && agino <
+					start_agino + XFS_INODES_PER_CHUNK)
+				set_inode_used(irec_p, agino - start_agino);
+
+			start_agino += XFS_INODES_PER_CHUNK;
+			ino_cnt += XFS_INODES_PER_CHUNK;
+		}
+
+		return(ino_cnt);
+	} else if (fs_aligned_inodes)  {
+		/*
+		 * next easy case -- aligned inode filesystem.
+		 * just check out the chunk
+		 */
+		start_agbno = rounddown(XFS_INO_TO_AGBNO(mp, ino),
+					fs_ino_alignment);
+		end_agbno = start_agbno + XFS_IALLOC_BLOCKS(mp);
+
+		/*
+		 * if this fs has aligned inodes but the end of the
+		 * chunk is beyond the end of the ag, this is a bad
+		 * chunk
+		 */
+		if (end_agbno > max_agbno)
+			return(0);
+
+		/*
+		 * check out all blocks in chunk
+		 */
+		ino_cnt = 0;
+		for (cur_agbno = start_agbno; cur_agbno < end_agbno;
+						cur_agbno++)  {
+			ino_cnt += check_aginode_block(mp, agno, cur_agbno);
+		}
+
+		/*
+		 * if we lose either 2 blocks worth of inodes or >25% of
+		 * the chunk, just forget it.
+		 */
+		if (ino_cnt < XFS_INODES_PER_CHUNK - 2 * mp->m_sb.sb_inopblock
+				|| ino_cnt < XFS_INODES_PER_CHUNK - 16)
+			return(0);
+
+		/*
+		 * ok, put the record into the tree.  we know that it's
+		 * not already there since the inode is guaranteed
+		 * not to be in the tree.
+		 */
+		start_agino = XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0);
+		*start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
+
+		irec_p = set_inode_free_alloc(agno,
+				XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0));
+
+		for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
+			set_inode_free(irec_p, i);
+
+		ASSERT(start_agino <= agino &&
+				start_agino + XFS_INODES_PER_CHUNK > agino);
+
+		set_inode_used(irec_p, agino - start_agino);
+
+		return(XFS_INODES_PER_CHUNK);
+	}
+
+	/*
+	 * hard case -- pre-6.3 filesystem.
+	 * set default start/end agbnos and ensure agbnos are legal.
+	 * we're setting a range [start_agbno, end_agbno) such that
+	 * a discovered inode chunk completely within that range
+	 * would include the inode passed into us.
+	 */
+	if (XFS_IALLOC_BLOCKS(mp) > 1)  {
+		if (agino > XFS_IALLOC_INODES(mp))
+			start_agbno = agbno - XFS_IALLOC_BLOCKS(mp) + 1;
+		else
+			start_agbno = 1;
+	}
+
+	end_agbno = agbno + XFS_IALLOC_BLOCKS(mp);
+
+	if (end_agbno > max_agbno)
+		end_agbno = max_agbno;
+
+	/*
+	 * search tree for known inodes within +/- 1 inode chunk range
+	 */
+	irec_before_p = irec_after_p = NULL;
+
+	find_inode_rec_range(agno, XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0),
+		XFS_OFFBNO_TO_AGINO(mp, end_agbno, mp->m_sb.sb_inopblock - 1),
+		&irec_before_p, &irec_after_p);
+
+	/*
+	 * if we have known inode chunks in our search range, establish
+	 * their start and end-points to tighten our search range.  range
+	 * is [start, end) -- e.g. max/end agbno is one beyond the
+	 * last block to be examined.  the avl routines work this way.
+	 */
+	if (irec_before_p)  {
+		/*
+		 * only one inode record in the range, move one boundary in
+		 */
+		if (irec_before_p == irec_after_p)  {
+			if (irec_before_p->ino_startnum < agino)
+				start_agbno = XFS_AGINO_TO_AGBNO(mp,
+						irec_before_p->ino_startnum +
+						XFS_INODES_PER_CHUNK);
+			else
+				end_agbno = XFS_AGINO_TO_AGBNO(mp,
+						irec_before_p->ino_startnum);
+		}
+
+		/*
+		 * find the start of the gap in the search range (which
+		 * should contain our unknown inode).  if the only irec
+		 * within +/- 1 chunks starts after the inode we're
+		 * looking for, skip this stuff since the end_agbno
+		 * of the range has already been trimmed in to not
+		 * include that irec.
+		 */
+		if (irec_before_p->ino_startnum < agino)  {
+			irec_p = irec_before_p;
+			irec_next_p = next_ino_rec(irec_p);
+
+			while(irec_next_p != NULL &&
+				irec_p->ino_startnum + XFS_INODES_PER_CHUNK ==
+					irec_next_p->ino_startnum)  {
+				irec_p = irec_next_p;
+				irec_next_p = next_ino_rec(irec_next_p);
+			}
+
+			start_agbno = XFS_AGINO_TO_AGBNO(mp,
+						irec_p->ino_startnum) +
+						XFS_IALLOC_BLOCKS(mp);
+
+			/*
+			 * we know that the inode we're trying to verify isn't
+			 * in an inode chunk so the next ino_rec marks the end
+			 * of the gap -- is it within the search range?
+			 */
+			if (irec_next_p != NULL &&
+					agino + XFS_IALLOC_INODES(mp) >=
+						irec_next_p->ino_startnum)
+				end_agbno = XFS_AGINO_TO_AGBNO(mp,
+						irec_next_p->ino_startnum);
+		}
+
+		ASSERT(start_agbno < end_agbno);
+	}
+
+	/*
+	 * if the gap is too small to contain a chunk, we lose.
+	 * this means that inode chunks known to be good surround
+	 * the inode in question and that the space between them
+	 * is too small for a legal inode chunk
+	 */
+	if (end_agbno - start_agbno < XFS_IALLOC_BLOCKS(mp))
+		return(0);
+
+	/*
+	 * now grunge around the disk, start at the inode block and
+	 * go in each direction until you hit a non-inode block or
+	 * run into a range boundary.  A non-inode block is block
+	 * with *no* good inodes in it.  Unfortunately, we can't
+	 * co-opt bad blocks into inode chunks (which might take
+	 * care of disk blocks that turn into zeroes) because the
+	 * filesystem could very well allocate two inode chunks
+	 * with a one block file in between and we'd zap the file.
+	 * We're better off just losing the rest of the
+	 * inode chunk instead.
+	 */
+	for (cur_agbno = agbno; cur_agbno >= start_agbno; cur_agbno--)  {
+		/*
+		 * if the block has no inodes, it's a bad block so
+		 * break out now without decrementing cur_agbno so
+		 * chunk start blockno will be set to the last good block
+		 */
+		if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno)))
+			break;
+		ino_cnt += irec_cnt;
+	}
+
+	chunk_start_agbno = cur_agbno + 1;
+
+	for (cur_agbno = agbno + 1; cur_agbno < end_agbno; cur_agbno++)   {
+		/*
+		 * if the block has no inodes, it's a bad block so
+		 * break out now without incrementing cur_agbno so
+		 * chunk start blockno will be set to the block
+		 * immediately after the last good block.
+		 */
+		if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno)))
+			break;
+		ino_cnt += irec_cnt;
+	}
+
+	chunk_stop_agbno = cur_agbno;
+
+	num_blks = chunk_stop_agbno - chunk_start_agbno;
+
+	if (num_blks < XFS_IALLOC_BLOCKS(mp) || ino_cnt == 0)
+		return(0);
+
+	/*
+	 * XXX - later - if the entire range is selected and they're all
+	 * good inodes, keep searching in either direction.
+	 * until you the range of inodes end, then split into chunks
+	 * for now, just take one chunk's worth starting at the lowest
+	 * possible point and hopefully we'll pick the rest up later.
+	 *
+	 * XXX - if we were going to fix up an inode chunk for
+	 * any good inodes in the chunk, this is where we would
+	 * do it.  For now, keep it simple and lose the rest of
+	 * the chunk
+	 */
+
+	if (num_blks % XFS_IALLOC_BLOCKS(mp) != 0)  {
+		num_blks = rounddown(num_blks, XFS_IALLOC_BLOCKS(mp));
+		chunk_stop_agbno = chunk_start_agbno + num_blks;
+	}
+
+	/*
+	 * ok, we've got a candidate inode chunk.  now we have to
+	 * verify that we aren't trying to use blocks that are already
+	 * in use.  If so, mark them as multiply claimed since odds
+	 * are very low that we found this chunk by stumbling across
+	 * user data -- we're probably here as a result of a directory
+	 * entry or an iunlinked pointer
+	 */
+	for (j = 0, cur_agbno = chunk_start_agbno;
+			cur_agbno < chunk_stop_agbno; cur_agbno++)  {
+		switch (state = get_agbno_state(mp, agno, cur_agbno))  {
+		case XR_E_MULT:
+		case XR_E_INUSE:
+		case XR_E_INUSE_FS:
+		case XR_E_FS_MAP:
+			do_warn(
+			    "inode block %d/%d multiply claimed, (state %d)\n",
+				agno, cur_agbno, state);
+			set_agbno_state(mp, agno, cur_agbno, XR_E_MULT);
+			j = 1;
+			break;
+		case XR_E_INO:
+			do_error(
+		"uncertain inode block overlap, agbno = %d, ino = %llu\n",
+				agbno, ino);
+			break;
+		default:
+			break;
+		}
+
+		if (j)
+			return(0);
+	}
+
+	/*
+	 * ok, chunk is good.  put the record into the tree if required,
+	 * and fill in the bitmap.  All inodes will be marked as "free"
+	 * except for the one that led us to discover the chunk.  That's
+	 * ok because we'll override the free setting later if the
+	 * contents of the inode indicate it's in use.
+	 */
+	start_agino = XFS_OFFBNO_TO_AGINO(mp, chunk_start_agbno, 0);
+	*start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
+
+	ASSERT(find_inode_rec(agno, start_agino) == NULL);
+
+	irec_p = set_inode_free_alloc(agno, start_agino);
+	for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
+		set_inode_free(irec_p, i);
+
+	ASSERT(start_agino <= agino &&
+			start_agino + XFS_INODES_PER_CHUNK > agino);
+
+	set_inode_used(irec_p, agino - start_agino);
+
+	for (cur_agbno = chunk_start_agbno;
+			cur_agbno < chunk_stop_agbno; cur_agbno++)  {
+		switch (state = get_agbno_state(mp, agno, cur_agbno))  {
+		case XR_E_INO:
+			do_error("uncertain inode block %llu already known\n",
+				XFS_AGB_TO_FSB(mp, agno, cur_agbno));
+			break;
+		case XR_E_UNKNOWN:
+		case XR_E_FREE1:
+		case XR_E_FREE:
+			set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
+			break;
+		case XR_E_MULT:
+		case XR_E_INUSE:
+		case XR_E_INUSE_FS:
+		case XR_E_FS_MAP:
+			do_error(
+			    "inode block %d/%d multiply claimed, (state %d)\n",
+				agno, cur_agbno, state);
+			break;
+		default:
+			do_warn("inode block %d/%d bad state, (state %d)\n",
+				agno, cur_agbno, state);
+			set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
+			break;
+		}
+	}
+
+	return(ino_cnt);
+}
+
+/*
+ * same as above only for ag inode chunks
+ */
+int
+verify_aginode_chunk(xfs_mount_t	*mp,
+			xfs_agnumber_t	agno,
+			xfs_agino_t	agino,
+			xfs_agino_t	*agino_start)
+{
+	xfs_ino_t	ino;
+	int		res;
+
+	res = verify_inode_chunk(mp, XFS_AGINO_TO_INO(mp, agno, agino), &ino);
+
+	if (res)
+		*agino_start = XFS_INO_TO_AGINO(mp, ino);
+	else
+		*agino_start = NULLAGINO;
+
+	return(res);
+}
+
+/*
+ * this does the same as the two above only it returns a pointer
+ * to the inode record in the good inode tree
+ */
+ino_tree_node_t *
+verify_aginode_chunk_irec(xfs_mount_t	*mp,
+			xfs_agnumber_t	agno,
+			xfs_agino_t	agino)
+{
+	xfs_agino_t start_agino;
+	ino_tree_node_t *irec = NULL;
+
+	if (verify_aginode_chunk(mp, agno, agino, &start_agino))
+		irec = find_inode_rec(agno, start_agino);
+
+	return(irec);
+}
+
+
+
+/*
+ * processes an inode allocation chunk/block, returns 1 on I/O errors,
+ * 0 otherwise
+ *
+ * *bogus is set to 1 if the entire set of inodes is bad.
+ */
+/* ARGSUSED */
+int
+process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
+			ino_tree_node_t *first_irec, int ino_discovery,
+			int check_dups, int extra_attr_check, int *bogus)
+{
+	xfs_ino_t		parent;
+	ino_tree_node_t		*ino_rec;
+	xfs_buf_t		*bp;
+	xfs_dinode_t		*dino;
+	int			icnt;
+	int			status;
+	int			is_used;
+	int			state;
+	int			done;
+	int			ino_dirty;
+	int			irec_offset;
+	int			ibuf_offset;
+	xfs_agino_t		agino;
+	xfs_agblock_t		agbno;
+	int			dirty = 0;
+	int			cleared = 0;
+	int			isa_dir = 0;
+
+	ASSERT(first_irec != NULL);
+	ASSERT(XFS_AGINO_TO_OFFSET(mp, first_irec->ino_startnum) == 0);
+
+	*bogus = 0;
+	ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+
+	/*
+	 * get all blocks required to read in this chunk (may wind up
+	 * having to process more chunks in a multi-chunk per block fs)
+	 */
+	agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
+
+	bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
+			XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0);
+	if (!bp) {
+		do_warn("cannot read inode %llu, disk block %lld, cnt %d\n",
+			XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum),
+			XFS_AGB_TO_DADDR(mp, agno, agbno),
+			(int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)));
+		return(1);
+	}
+
+	/*
+	 * set up first irec
+	 */
+	ino_rec = first_irec;
+	/*
+	 * initialize counters
+	 */
+	irec_offset = 0;
+	ibuf_offset = 0;
+	icnt = 0;
+	status = 0;
+	done = 0;
+
+	/*
+	 * verify inode chunk if necessary
+	 */
+	if (ino_discovery)  {
+		while (!done)  {
+			/*
+			 * make inode pointer
+			 */
+			dino = XFS_MAKE_IPTR(mp, bp, icnt);
+			agino = irec_offset + ino_rec->ino_startnum;
+
+			/*
+			 * we always think that the root and realtime
+			 * inodes are verified even though we may have
+			 * to reset them later to keep from losing the
+			 * chunk that they're in
+			 */
+			if (verify_dinode(mp, dino, agno, agino) == 0 ||
+					agno == 0 &&
+					(mp->m_sb.sb_rootino == agino ||
+					 mp->m_sb.sb_rsumino == agino ||
+					 mp->m_sb.sb_rbmino == agino))
+				status++;
+
+			irec_offset++;
+			icnt++;
+
+			if (icnt == XFS_IALLOC_INODES(mp) &&
+					irec_offset == XFS_INODES_PER_CHUNK)  {
+				/*
+				 * done! - finished up irec and block
+				 * simultaneously
+				 */
+				libxfs_putbuf(bp);
+				done = 1;
+				break;
+			} else if (irec_offset == XFS_INODES_PER_CHUNK)  {
+				/*
+				 * get new irec (multiple chunks per block fs)
+				 */
+				ino_rec = next_ino_rec(ino_rec);
+				ASSERT(ino_rec->ino_startnum == agino + 1);
+				irec_offset = 0;
+			}
+		}
+
+		/*
+		 * if chunk/block is bad, blow it off.  the inode records
+		 * will be deleted by the caller if appropriate.
+		 */
+		if (!status)  {
+			*bogus = 1;
+			if (!done) /* already free'd */
+			  libxfs_putbuf(bp);
+			return(0);
+		}
+
+		/*
+		 * reset irec and counters
+		 */
+		ino_rec = first_irec;
+
+		irec_offset = 0;
+		ibuf_offset = 0;
+		icnt = 0;
+		status = 0;
+		done = 0;
+
+		/* nathans TODO ... memory leak here?: */
+
+		/*
+		 * get first block
+		 */
+		bp = libxfs_readbuf(mp->m_dev,
+				XFS_AGB_TO_DADDR(mp, agno, agbno),
+				XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0);
+		if (!bp) {
+			do_warn("can't read inode %llu, disk block %lld, "
+				"cnt %d\n", XFS_AGINO_TO_INO(mp, agno, agino),
+				XFS_AGB_TO_DADDR(mp, agno, agbno),
+				(int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)));
+			return(1);
+		}
+	}
+
+	/*
+	 * mark block as an inode block in the incore bitmap
+	 */
+	switch (state = get_agbno_state(mp, agno, agbno))  {
+	case XR_E_INO:	/* already marked */
+		break;
+	case XR_E_UNKNOWN:
+	case XR_E_FREE:
+	case XR_E_FREE1:
+		set_agbno_state(mp, agno, agbno, XR_E_INO);
+		break;
+	case XR_E_BAD_STATE:
+		do_error("bad state in block map %d\n", state);
+		break;
+	default:
+		set_agbno_state(mp, agno, agbno, XR_E_MULT);
+		do_warn("inode block %llu multiply claimed, state was %d\n",
+			XFS_AGB_TO_FSB(mp, agno, agbno), state);
+		break;
+	}
+
+	while (!done)  {
+		/*
+		 * make inode pointer
+		 */
+		dino = XFS_MAKE_IPTR(mp, bp, icnt);
+		agino = irec_offset + ino_rec->ino_startnum;
+
+		is_used = 3;
+		ino_dirty = 0;
+		parent = 0;
+
+		status = process_dinode(mp, dino, agno, agino,
+				is_inode_free(ino_rec, irec_offset),
+				&ino_dirty, &cleared, &is_used,
+				ino_discovery, check_dups,
+				extra_attr_check, &isa_dir, &parent);
+
+		ASSERT(is_used != 3);
+		if (ino_dirty)
+			dirty = 1;
+		/*
+		 * XXX - if we want to try and keep
+		 * track of whether we need to bang on
+		 * the inode maps (instead of just
+		 * blindly reconstructing them like
+		 * we do now, this is where to start.
+		 */
+		if (is_used)  {
+			if (is_inode_free(ino_rec, irec_offset))  {
+				if (verbose || no_modify ||
+				    XFS_AGINO_TO_INO(mp, agno, agino) !=
+							old_orphanage_ino)  {
+					do_warn("imap claims in-use inode %llu"
+						" is free, ",
+						XFS_AGINO_TO_INO(mp, agno,
+						agino));
+				}
+
+				if (verbose || (!no_modify &&
+				    XFS_AGINO_TO_INO(mp, agno, agino) !=
+						old_orphanage_ino))
+					do_warn("correcting imap\n");
+				else
+					do_warn("would correct imap\n");
+			}
+			set_inode_used(ino_rec, irec_offset);
+		} else  {
+			set_inode_free(ino_rec, irec_offset);
+		}
+
+		/*
+		 * if we lose the root inode, or it turns into
+		 * a non-directory, that allows us to double-check
+		 * later whether or not we need to reinitialize it.
+		 */
+		if (isa_dir)  {
+			set_inode_isadir(ino_rec, irec_offset);
+			/*
+			 * we always set the parent but
+			 * we may as well wait until
+			 * phase 4 (no inode discovery)
+			 * because the parent info will
+			 * be solid then.
+			 */
+			if (!ino_discovery)  {
+				ASSERT(parent != 0);
+				set_inode_parent(ino_rec, irec_offset, parent);
+				ASSERT(parent ==
+					get_inode_parent(ino_rec, irec_offset));
+			}
+		} else  {
+			clear_inode_isadir(ino_rec, irec_offset);
+		}
+
+		if (status)  {
+			if (mp->m_sb.sb_rootino ==
+					XFS_AGINO_TO_INO(mp, agno, agino))  {
+				need_root_inode = 1;
+
+				if (!no_modify)  {
+					do_warn("cleared root inode %llu\n",
+						XFS_AGINO_TO_INO(mp, agno,
+						agino));
+				} else  {
+					do_warn("would clear root inode %llu\n",
+						XFS_AGINO_TO_INO(mp, agno,
+						agino));
+				}
+			} else if (mp->m_sb.sb_rbmino ==
+					XFS_AGINO_TO_INO(mp, agno, agino))  {
+				need_rbmino = 1;
+
+				if (!no_modify)  {
+					do_warn("cleared realtime bitmap "
+						"inode %llu\n",
+						XFS_AGINO_TO_INO(mp, agno,
+						agino));
+				} else  {
+					do_warn("would clear realtime bitmap "
+						"inode %llu\n",
+						XFS_AGINO_TO_INO(mp, agno,
+						agino));
+				}
+			} else if (mp->m_sb.sb_rsumino ==
+					XFS_AGINO_TO_INO(mp, agno, agino))  {
+				need_rsumino = 1;
+
+				if (!no_modify)  {
+					do_warn("cleared realtime summary "
+						"inode %llu\n",
+						XFS_AGINO_TO_INO(mp, agno,
+						agino));
+				} else  {
+					do_warn("would clear realtime summary "
+						"inode %llu\n",
+						XFS_AGINO_TO_INO(mp, agno,
+						agino));
+				}
+			} else if (!no_modify)  {
+				do_warn("cleared inode %llu\n",
+					XFS_AGINO_TO_INO(mp, agno, agino));
+			} else  {
+				do_warn("would have cleared inode %llu\n",
+					XFS_AGINO_TO_INO(mp, agno, agino));
+			}
+		}
+
+		irec_offset++;
+		ibuf_offset++;
+		icnt++;
+
+		if (icnt == XFS_IALLOC_INODES(mp) &&
+				irec_offset == XFS_INODES_PER_CHUNK)  {
+			/*
+			 * done! - finished up irec and block simultaneously
+			 */
+			if (dirty && !no_modify)
+				libxfs_writebuf(bp, 0);
+			else
+				libxfs_putbuf(bp);
+
+			done = 1;
+			break;
+		} else if (ibuf_offset == mp->m_sb.sb_inopblock)  {
+			/*
+			 * mark block as an inode block in the incore bitmap
+			 * and reset inode buffer offset counter
+			 */
+			ibuf_offset = 0;
+			agbno++;
+
+			switch (state = get_agbno_state(mp, agno, agbno))  {
+			case XR_E_INO:	/* already marked */
+				break;
+			case XR_E_UNKNOWN:
+			case XR_E_FREE:
+			case XR_E_FREE1:
+				set_agbno_state(mp, agno, agbno, XR_E_INO);
+				break;
+			case XR_E_BAD_STATE:
+				do_error( "bad state in block map %d\n",
+					state);
+				break;
+			default:
+				set_agbno_state(mp, agno, agbno, XR_E_MULT);
+				do_warn("inode block %llu multiply claimed, "
+					"state was %d\n",
+					XFS_AGB_TO_FSB(mp, agno, agbno), state);
+				break;
+			}
+
+		} else if (irec_offset == XFS_INODES_PER_CHUNK)  {
+			/*
+			 * get new irec (multiple chunks per block fs)
+			 */
+			ino_rec = next_ino_rec(ino_rec);
+			ASSERT(ino_rec->ino_startnum == agino + 1);
+			irec_offset = 0;
+		}
+	}
+	return(0);
+}
+
+/*
+ * check all inodes mentioned in the ag's incore inode maps.
+ * the map may be incomplete.  If so, we'll catch the missing
+ * inodes (hopefully) when we traverse the directory tree.
+ * check_dirs is set to 1 if directory inodes should be
+ * processed for internal consistency, parent setting and
+ * discovery of unknown inodes.  this only happens
+ * in phase 3.  check_dups is set to 1 if we're looking for
+ * inodes that reference duplicate blocks so we can trash
+ * the inode right then and there.  this is set only in
+ * phase 4 after we've run through and set the bitmap once.
+ */
+void
+process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno,
+		int ino_discovery, int check_dups, int extra_attr_check)
+{
+	int num_inos, bogus;
+	ino_tree_node_t *ino_rec, *first_ino_rec, *prev_ino_rec;
+
+	first_ino_rec = ino_rec = findfirst_inode_rec(agno);
+	while (ino_rec != NULL)  {
+		/*
+		 * paranoia - step through inode records until we step
+		 * through a full allocation of inodes.  this could
+		 * be an issue in big-block filesystems where a block
+		 * can hold more than one inode chunk.  make sure to
+		 * grab the record corresponding to the beginning of
+		 * the next block before we call the processing routines.
+		 */
+		num_inos = XFS_INODES_PER_CHUNK;
+		while (num_inos < XFS_IALLOC_INODES(mp) && ino_rec != NULL)  {
+			ASSERT(ino_rec != NULL);
+			/*
+			 * inodes chunks will always be aligned and sized
+			 * correctly
+			 */
+			if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
+				num_inos += XFS_INODES_PER_CHUNK;
+		}
+
+		ASSERT(num_inos == XFS_IALLOC_INODES(mp));
+
+		if (process_inode_chunk(mp, agno, num_inos, first_ino_rec,
+				ino_discovery, check_dups, extra_attr_check, &bogus))  {
+			/* XXX - i/o error, we've got a problem */
+			abort();
+		}
+
+		if (!bogus)
+			first_ino_rec = ino_rec = next_ino_rec(ino_rec);
+		else  {
+			/*
+			 * inodes pointed to by this record are
+			 * completely bogus, blow the records for
+			 * this chunk out.
+			 * the inode block(s) will get reclaimed
+			 * in phase 4 when the block map is
+			 * reconstructed after inodes claiming
+			 * duplicate blocks are deleted.
+			 */
+			num_inos = 0;
+			ino_rec = first_ino_rec;
+			while (num_inos < XFS_IALLOC_INODES(mp) &&
+					ino_rec != NULL)  {
+				prev_ino_rec = ino_rec;
+
+				if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
+					num_inos += XFS_INODES_PER_CHUNK;
+
+				get_inode_rec(agno, prev_ino_rec);
+				free_inode_rec(agno, prev_ino_rec);
+			}
+
+			first_ino_rec = ino_rec;
+		}
+	}
+}
+
+/*
+ * verify the uncertain inode list for an ag.
+ * Good inodes get moved into the good inode tree.
+ * returns 0 if there are no uncertain inode records to
+ * be processed, 1 otherwise.  This routine destroys the
+ * the entire uncertain inode tree for the ag as a side-effect.
+ */
+void
+check_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+	ino_tree_node_t		*irec;
+	ino_tree_node_t		*nrec;
+	xfs_agino_t		start;
+	xfs_agino_t		i;
+	xfs_agino_t		agino;
+	int			got_some;
+
+	nrec = NULL;
+	got_some = 0;
+
+	clear_uncertain_ino_cache(agno);
+
+	if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL)
+		return;
+
+	/*
+	 * the trick here is to find a contiguous range
+	 * of inodes, make sure that it doesn't overlap
+	 * with a known to exist chunk, and then make
+	 * sure it is a number of entire chunks.
+	 * we check on-disk once we have an idea of what's
+	 * going on just to double-check.
+	 *
+	 * process the uncertain inode record list and look
+	 * on disk to see if the referenced inodes are good
+	 */
+
+	do_warn("found inodes not in the inode allocation tree\n");
+
+	do {
+		/*
+		 * check every confirmed (which in this case means
+		 * inode that we really suspect to be an inode) inode
+		 */
+		for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+			if (!is_inode_confirmed(irec, i))
+				continue;
+
+			agino = i + irec->ino_startnum;
+
+			if (verify_aginum(mp, agno, agino))
+				continue;
+
+			if (nrec != NULL && nrec->ino_startnum <= agino &&
+					agino < nrec->ino_startnum +
+					XFS_INODES_PER_CHUNK)
+				continue;
+
+			if ((nrec = find_inode_rec(agno, agino)) == NULL)
+				if (!verify_aginum(mp, agno, agino))
+					if (verify_aginode_chunk(mp, agno,
+							agino, &start))
+						got_some = 1;
+		}
+
+		get_uncertain_inode_rec(agno, irec);
+		free_inode_rec(agno, irec);
+
+		irec = findfirst_uncertain_inode_rec(agno);
+	} while (irec != NULL);
+
+	if (got_some)
+		do_warn("found inodes not in the inode allocation tree\n");
+
+	return;
+}
+
+/*
+ * verify and process the uncertain inodes for an ag.
+ * this is different from check_ in that we can't just
+ * move the good inodes into the good inode tree and let
+ * process_aginodes() deal with them because this gets called
+ * after process_aginodes() has been run on the ag inode tree.
+ * So we have to process the inodes as well as verify since
+ * we don't want to rerun process_aginodes() on a tree that has
+ * mostly been processed.
+ *
+ * Note that if this routine does process some inodes, it can
+ * add uncertain inodes to any ag which would require that
+ * the routine be called again to process those newly-added
+ * uncertain inodes.
+ *
+ * returns 0 if no inodes were processed and 1 if inodes
+ * were processed (and it is possible that new uncertain
+ * inodes were discovered).
+ *
+ * as a side-effect, this routine tears down the uncertain
+ * inode tree for the ag.
+ */
+int
+process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+	ino_tree_node_t		*irec;
+	ino_tree_node_t		*nrec;
+	xfs_agino_t		agino;
+	int			i;
+	int			bogus;
+	int			cnt;
+	int			got_some;
+
+#ifdef XR_INODE_TRACE
+	fprintf(stderr, "in process_uncertain_aginodes, agno = %d\n", agno);
+#endif
+
+	got_some = 0;
+
+	clear_uncertain_ino_cache(agno);
+
+	if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL)
+		return(0);
+
+	nrec = NULL;
+
+	do  {
+		/*
+		 * check every confirmed inode
+		 */
+		for (cnt = i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+			if (!is_inode_confirmed(irec, i))
+				continue;
+			cnt++;
+			agino = i + irec->ino_startnum;
+#ifdef XR_INODE_TRACE
+	fprintf(stderr, "ag inode = %d (0x%x)\n", agino, agino);
+#endif
+			/*
+			 * skip over inodes already processed (in the
+			 * good tree), bad inode numbers, and inode numbers
+			 * pointing to bogus inodes
+			 */
+			if (verify_aginum(mp, agno, agino))
+				continue;
+
+			if (nrec != NULL && nrec->ino_startnum <= agino &&
+					agino < nrec->ino_startnum +
+					XFS_INODES_PER_CHUNK)
+				continue;
+
+			if ((nrec = find_inode_rec(agno, agino)) != NULL)
+				continue;
+
+			/*
+			 * verify the chunk.  if good, it will be
+			 * added to the good inode tree.
+			 */
+			if ((nrec = verify_aginode_chunk_irec(mp,
+						agno, agino)) == NULL)
+				continue;
+
+			got_some = 1;
+
+			/*
+			 * process the inode record we just added
+			 * to the good inode tree.  The inode
+			 * processing may add more records to the
+			 * uncertain inode lists.
+			 */
+			if (process_inode_chunk(mp, agno, XFS_IALLOC_INODES(mp),
+						nrec, 1, 0, 0, &bogus))  {
+				/* XXX - i/o error, we've got a problem */
+				abort();
+			}
+		}
+
+		ASSERT(cnt != 0);
+		/*
+		 * now return the uncertain inode record to the free pool
+		 * and pull another one off the list for processing
+		 */
+		get_uncertain_inode_rec(agno, irec);
+		free_inode_rec(agno, irec);
+
+		irec = findfirst_uncertain_inode_rec(agno);
+	} while (irec != NULL);
+
+	if (got_some)
+		do_warn("found inodes not in the inode allocation tree\n");
+
+	return(1);
+}
diff --git a/repair/dinode.c b/repair/dinode.c
new file mode 100644
index 000000000..2dcd98298
--- /dev/null
+++ b/repair/dinode.c
@@ -0,0 +1,2914 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dir.h"
+#include "dir2.h"
+#include "dinode.h"
+#include "scan.h"
+#include "versions.h"
+#include "attr_repair.h"
+#include "bmap.h"
+
+/*
+ * inode clearing routines
+ */
+
+/*
+ * return the offset into the inode where the attribute fork starts
+ */
+/* ARGSUSED */
+int
+calc_attr_offset(xfs_mount_t *mp, xfs_dinode_t *dino)
+{
+	xfs_dinode_core_t	*dinoc = &dino->di_core;
+	int			offset = ((__psint_t) &dino->di_u)
+						- (__psint_t)dino;
+
+	/*
+	 * don't worry about alignment when calculating offset
+	 * because the data fork is already 8-byte aligned
+	 */
+	switch (dinoc->di_format)  {
+	case XFS_DINODE_FMT_DEV:
+		offset += sizeof(dev_t);
+		break;
+	case XFS_DINODE_FMT_LOCAL:
+		offset += INT_GET(dinoc->di_size, ARCH_CONVERT);
+		break;
+	case XFS_DINODE_FMT_UUID:
+		offset += sizeof(uuid_t);
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		offset += INT_GET(dinoc->di_nextents, ARCH_CONVERT) * sizeof(xfs_bmbt_rec_32_t);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		offset += INT_GET(dino->di_u.di_bmbt.bb_numrecs, ARCH_CONVERT) * sizeof(xfs_bmbt_rec_32_t);
+		break;
+	default:
+		do_error("Unknown inode format.\n");
+		abort();
+		break;
+	}
+
+	return(offset);
+}
+
+/* ARGSUSED */
+int
+clear_dinode_attr(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num)
+{
+	xfs_dinode_core_t *dinoc = &dino->di_core;
+
+	ASSERT(dinoc->di_forkoff != 0);
+
+	if (!no_modify)
+		fprintf(stderr, "clearing inode %llu attributes \n", ino_num);
+	else
+		fprintf(stderr, "would have cleared inode %llu attributes\n",
+			ino_num);
+
+	if (INT_GET(dinoc->di_anextents, ARCH_CONVERT) != 0)  {
+		if (no_modify)
+			return(1);
+		INT_ZERO(dinoc->di_anextents, ARCH_CONVERT);
+	}
+
+	if (dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS)  {
+		if (no_modify)
+			return(1);
+		dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS;
+	}
+
+	/* get rid of the fork by clearing forkoff */
+
+	/* Originally, when the attr repair code was added, the fork was cleared
+	 * by turning it into shortform status.  This meant clearing the
+	 * hdr.totsize/count fields and also changing aformat to LOCAL
+	 * (vs EXTENTS).  Over various fixes, the aformat and forkoff have
+	 * been updated to not show an attribute fork at all, however.
+	 * It could be possible that resetting totsize/count are not needed,
+	 * but just to be safe, leave it in for now. 
+	 */
+
+	if (!no_modify) {
+		xfs_attr_shortform_t *asf = (xfs_attr_shortform_t *)
+				XFS_DFORK_APTR_ARCH(dino, ARCH_CONVERT);
+		INT_SET(asf->hdr.totsize, ARCH_CONVERT,
+			sizeof(xfs_attr_sf_hdr_t));
+		INT_SET(asf->hdr.count, ARCH_CONVERT, 0);
+		dinoc->di_forkoff = 0;  /* got to do this after asf is set */
+	}
+
+	/*
+	 * always returns 1 since the fork gets zapped
+	 */
+	return(1);
+}
+
+/* ARGSUSED */
+int
+clear_dinode_core(xfs_dinode_core_t *dinoc, xfs_ino_t ino_num)
+{
+	int dirty = 0;
+
+	if (INT_GET(dinoc->di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_SET(dinoc->di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+	}
+
+	if (!XFS_DINODE_GOOD_VERSION(dinoc->di_version) ||
+	    (!fs_inode_nlink && dinoc->di_version > XFS_DINODE_VERSION_1))  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		dinoc->di_version = (fs_inode_nlink) ? XFS_DINODE_VERSION_2
+						: XFS_DINODE_VERSION_1;
+	}
+
+	if (INT_GET(dinoc->di_mode, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_mode, ARCH_CONVERT);
+	}
+
+	if (INT_GET(dinoc->di_flags, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_flags, ARCH_CONVERT);
+	}
+
+	if (INT_GET(dinoc->di_dmevmask, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_dmevmask, ARCH_CONVERT);
+	}
+
+	if (dinoc->di_forkoff != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		dinoc->di_forkoff = 0;
+	}
+
+	if (dinoc->di_format != XFS_DINODE_FMT_EXTENTS)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		dinoc->di_format = XFS_DINODE_FMT_EXTENTS;
+	}
+
+	if (dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS;
+	}
+
+	if (INT_GET(dinoc->di_size, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_size, ARCH_CONVERT);
+	}
+
+	if (INT_GET(dinoc->di_nblocks, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_nblocks, ARCH_CONVERT);
+	}
+
+	if (INT_GET(dinoc->di_onlink, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_onlink, ARCH_CONVERT);
+	}
+
+	if (INT_GET(dinoc->di_nextents, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_nextents, ARCH_CONVERT);
+	}
+
+	if (INT_GET(dinoc->di_anextents, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_anextents, ARCH_CONVERT);
+	}
+
+	if (dinoc->di_version > XFS_DINODE_VERSION_1 &&
+			INT_GET(dinoc->di_nlink, ARCH_CONVERT) != 0)  {
+		dirty = 1;
+
+		if (no_modify)
+			return(1);
+
+		INT_ZERO(dinoc->di_nlink, ARCH_CONVERT);
+	}
+
+	return(dirty);
+}
+
+/* ARGSUSED */
+int
+clear_dinode_unlinked(xfs_mount_t *mp, xfs_dinode_t *dino)
+{
+
+	if (dino->di_next_unlinked != NULLAGINO)  {
+		if (!no_modify)
+			dino->di_next_unlinked = NULLAGINO;
+		return(1);
+	}
+
+	return(0);
+}
+
+/*
+ * this clears the unlinked list too so it should not be called
+ * until after the agi unlinked lists are walked in phase 3.
+ * returns > zero if the inode has been altered while being cleared
+ */
+int
+clear_dinode(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num)
+{
+	int dirty;
+
+	dirty = clear_dinode_core(&dino->di_core, ino_num);
+	dirty += clear_dinode_unlinked(mp, dino);
+
+	/* and clear the forks */
+
+	if (dirty && !no_modify)
+		bzero(&dino->di_u, XFS_LITINO(mp));
+
+	return(dirty);
+}
+
+
+/*
+ * misc. inode-related utility routines
+ */
+
+/*
+ * returns 0 if inode number is valid, 1 if bogus
+ */
+int
+verify_inum(xfs_mount_t		*mp,
+		xfs_ino_t	ino)
+{
+	xfs_agnumber_t	agno;
+	xfs_agino_t	agino;
+	xfs_agblock_t	agbno;
+	xfs_sb_t	*sbp = &mp->m_sb;;
+
+	/* range check ag #, ag block.  range-checking offset is pointless */
+
+	agno = XFS_INO_TO_AGNO(mp, ino);
+	agino = XFS_INO_TO_AGINO(mp, ino);
+	agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+
+	if (ino == 0 || ino == NULLFSINO)
+		return(1);
+
+	if (ino != XFS_AGINO_TO_INO(mp, agno, agino))
+		return(1);
+
+	if (agno >= sbp->sb_agcount ||
+		(agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) ||
+		(agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks -
+				(sbp->sb_agcount-1) * sbp->sb_agblocks) ||
+		(agbno == 0))
+		return(1);
+
+	return(0);
+}
+
+/*
+ * have a separate routine to ensure that we don't accidentally
+ * lose illegally set bits in the agino by turning it into an FSINO
+ * to feed to the above routine
+ */
+int
+verify_aginum(xfs_mount_t	*mp,
+		xfs_agnumber_t	agno,
+		xfs_agino_t	agino)
+{
+	xfs_agblock_t	agbno;
+	xfs_sb_t	*sbp = &mp->m_sb;;
+
+	/* range check ag #, ag block.  range-checking offset is pointless */
+
+	if (agino == 0 || agino == NULLAGINO)
+		return(1);
+
+	/*
+	 * agino's can't be too close to NULLAGINO because the min blocksize
+	 * is 9 bits and at most 1 bit of that gets used for the inode offset
+	 * so if the agino gets shifted by the # of offset bits and compared
+	 * to the legal agbno values, a bogus agino will be too large.  there
+	 * will be extra bits set at the top that shouldn't be set.
+	 */
+	agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+
+	if (agno >= sbp->sb_agcount ||
+		(agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) ||
+		(agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks -
+				(sbp->sb_agcount-1) * sbp->sb_agblocks) ||
+		(agbno == 0))
+		return(1);
+
+	return(0);
+}
+
+/*
+ * return 1 if block number is good, 0 if out of range
+ */
+int
+verify_dfsbno(xfs_mount_t	*mp,
+		xfs_dfsbno_t	fsbno)
+{
+	xfs_agnumber_t	agno;
+	xfs_agblock_t	agbno;
+	xfs_sb_t	*sbp = &mp->m_sb;;
+
+	/* range check ag #, ag block.  range-checking offset is pointless */
+
+	agno = XFS_FSB_TO_AGNO(mp, fsbno);
+	agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+	if (agno >= sbp->sb_agcount ||
+		(agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) ||
+		(agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks -
+				(sbp->sb_agcount-1) * sbp->sb_agblocks))
+		return(0);
+
+	return(1);
+}
+
+int
+verify_agbno(xfs_mount_t	*mp,
+		xfs_agnumber_t	agno,
+		xfs_agblock_t	agbno)
+{
+	xfs_sb_t	*sbp = &mp->m_sb;;
+
+	/* range check ag #, ag block.  range-checking offset is pointless */
+
+	if (agno >= sbp->sb_agcount ||
+		(agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) ||
+		(agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks -
+				(sbp->sb_agcount-1) * sbp->sb_agblocks))
+		return(0);
+
+	return(1);
+}
+
+void
+convert_extent(
+	xfs_bmbt_rec_32_t	*rp,
+	xfs_dfiloff_t		*op,	/* starting offset (blockno in file) */
+	xfs_dfsbno_t		*sp,	/* starting block (fs blockno) */
+	xfs_dfilblks_t		*cp,	/* blockcount */
+	int			*fp)	/* extent flag */
+{
+	xfs_bmbt_irec_t irec, *s = &irec;
+	/* Just use the extent parsing routine from the kernel */
+	libxfs_bmbt_get_all((xfs_bmbt_rec_t *)rp, s);
+
+	if (fs_has_extflgbit)  {
+		if (s->br_state == XFS_EXT_UNWRITTEN) {
+			*fp = 1;
+		} else {
+			*fp = 0;
+		}
+	} else  {
+		*fp = 0;
+	}
+	*op = s->br_startoff;
+	*sp = s->br_startblock;
+	*cp = s->br_blockcount;
+}
+
+/*
+ * return address of block fblock if it's within the range described
+ * by the extent list.  Otherwise, returns a null address.
+ */
+/* ARGSUSED */
+xfs_dfsbno_t
+get_bmbt_reclist(
+	xfs_mount_t		*mp,
+	xfs_bmbt_rec_32_t	*rp,
+	int			numrecs,
+	xfs_dfiloff_t		fblock)
+{
+	int			i;
+	xfs_dfilblks_t		cnt;
+	xfs_dfiloff_t		off_bno;
+	xfs_dfsbno_t		start;
+	int			flag;
+
+	for (i = 0; i < numrecs; i++, rp++) {
+		convert_extent(rp, &off_bno, &start, &cnt, &flag);
+		if (off_bno >= fblock && off_bno + cnt < fblock)
+			return(start + fblock - off_bno);
+	}
+
+	return(NULLDFSBNO);
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ * if check_dups should be set to 1, that implies that
+ * the primary purpose of this call is to see if the
+ * file overlaps with any duplicate extents (in the
+ * duplicate extent list).
+ */
+/* ARGSUSED */
+int
+process_bmbt_reclist_int(
+	xfs_mount_t		*mp,
+	xfs_bmbt_rec_32_t	*rp,
+	int			numrecs,
+	int			type,
+	xfs_ino_t		ino,
+	xfs_drfsbno_t		*tot,
+	blkmap_t		**blkmapp,
+	xfs_dfiloff_t		*first_key,
+	xfs_dfiloff_t		*last_key,
+	int			check_dups,
+	int			whichfork)
+{
+	xfs_dfsbno_t		b;
+	xfs_drtbno_t		ext;
+	xfs_dfilblks_t		c;		/* count */
+	xfs_dfilblks_t		cp = 0;		/* prev count */
+	xfs_dfsbno_t		s;		/* start */
+	xfs_dfsbno_t		sp = 0;		/* prev start */
+	xfs_dfiloff_t		o = 0;		/* offset */
+	xfs_dfiloff_t		op = 0;		/* prev offset */
+	char			*ftype;
+	char			*forkname;
+	int			i;
+	int			state;
+	int			flag;		/* extent flag */
+
+	if (whichfork == XFS_DATA_FORK)
+		forkname = "data";
+	else
+		forkname = "attr";
+
+	if (type == XR_INO_RTDATA)
+		ftype = "real-time";
+	else
+		ftype = "regular";
+
+	for (i = 0; i < numrecs; i++, rp++) {
+		convert_extent(rp, &o, &s, &c, &flag);
+		if (i == 0)
+			*last_key = *first_key = o;
+		else
+			*last_key = o;
+		if (i > 0 && op + cp > o)  {
+			do_warn(
+"bmap rec out of order, inode %llu entry %d [o s c] [%llu %llu %llu], %d [%llu %llu %llu]\n",
+				ino, i, o, s, c, i-1, op, sp, cp);
+			return(1);
+		}
+		op = o;
+		cp = c;
+		sp = s;
+
+		/*
+		 * check numeric validity of the extent
+		 */
+		if (c == 0)  {
+			do_warn(
+		"zero length extent (off = %llu, fsbno = %llu) in ino %llu\n",
+				o, s, ino);
+			return(1);
+		}
+		if (type == XR_INO_RTDATA) {
+			if (s >= mp->m_sb.sb_rblocks)  {
+				do_warn(
+"inode %llu - bad rt extent starting block number %llu, offset %llu\n",
+					ino, s, o);
+				return(1);
+			}
+			if (s + c - 1 >= mp->m_sb.sb_rblocks)  {
+				do_warn(
+"inode %llu - bad rt extent last block number %llu, offset %llu\n",
+					ino, s + c - 1, o);
+				return(1);
+			}
+			if (s + c - 1 < s)  {
+				do_warn(
+"inode %llu - bad rt extent overflows - start %llu, end %llu, offset %llu\n",
+					ino, s, s + c - 1, o);
+				return(1);
+			}
+		} else  {
+			if (!verify_dfsbno(mp, s))  {
+				do_warn(
+"inode %llu - bad extent starting block number %llu, offset %llu\n",
+					ino, s, o);
+				return(1);
+			}
+			if (!verify_dfsbno(mp, s + c - 1))  {
+				do_warn(
+"inode %llu - bad extent last block number %llu, offset %llu\n",
+					ino, s + c - 1, o);
+				return(1);
+			}
+			if (s + c - 1 < s)  {
+				do_warn(
+"inode %llu - bad extent overflows - start %llu, end %llu, offset %llu\n",
+					ino, s, s + c - 1, o);
+				return(1);
+			}
+			if (o >= fs_max_file_offset)  {
+				do_warn(
+"inode %llu - extent offset too large - start %llu, count %llu, offset %llu\n",
+					ino, s, c, o);
+				return(1);
+			}
+		}
+
+		/*
+		 * realtime file data fork
+		 */
+		if (type == XR_INO_RTDATA && whichfork == XFS_DATA_FORK)  {
+			/*
+			 * XXX - verify that the blocks listed in the record
+			 * are multiples of an extent
+			 */
+			if (s % mp->m_sb.sb_rextsize != 0 ||
+					c % mp->m_sb.sb_rextsize != 0)  {
+				do_warn(
+"malformed rt inode extent [%llu %llu] (fs rtext size = %u)\n",
+					s, c, mp->m_sb.sb_rextsize);
+				return(1);
+			}
+
+			/*
+			 * XXX - set the appropriate number of extents
+			 */
+			for (b = s; b < s + c; b += mp->m_sb.sb_rextsize)  {
+				ext = (xfs_drtbno_t) b / mp->m_sb.sb_rextsize;
+
+				if (check_dups == 1)  {
+					if (search_rt_dup_extent(mp, ext))  {
+						do_warn(
+"data fork in rt ino %llu claims dup rt extent, off - %llu, start - %llu, count %llu\n",
+							ino, o, s, c);
+						return(1);
+					}
+					continue;
+				}
+
+				state = get_rtbno_state(mp, ext);
+
+				switch (state)  {
+				case XR_E_FREE:
+/* XXX - turn this back on after we
+	run process_rtbitmap() in phase2
+					do_warn(
+			"%s fork in rt ino %llu claims free rt block %llu\n",
+						forkname, ino, ext);
+*/
+					/* fall through ... */
+				case XR_E_UNKNOWN:
+					set_rtbno_state(mp, ext, XR_E_INUSE);
+					break;
+				case XR_E_BAD_STATE:
+					do_error(
+				"bad state in rt block map %llu\n", ext);
+					abort();
+					break;
+				case XR_E_FS_MAP:
+				case XR_E_INO:
+				case XR_E_INUSE_FS:
+					do_error(
+	"%s fork in rt inode %llu found metadata block %llu in %s bmap\n",
+						forkname, ino, ext, ftype);
+				case XR_E_INUSE:
+				case XR_E_MULT:
+					set_rtbno_state(mp, ext, XR_E_MULT);
+					do_warn(
+			"%s fork in rt inode %llu claims used rt block %llu\n",
+						forkname, ino, ext);
+					return(1);
+				case XR_E_FREE1:
+				default:
+					do_error(
+				"illegal state %d in %s block map %llu\n",
+						state, ftype, b);
+				}
+			}
+
+			/*
+			 * bump up the block counter
+			 */
+			*tot += c;
+
+			/*
+			 * skip rest of loop processing since that's
+			 * all for regular file forks and attr forks
+			 */
+			continue;
+		}
+
+	
+		/*
+		 * regular file data fork or attribute fork
+		 */
+		if (blkmapp && *blkmapp)
+			blkmap_set_ext(blkmapp, o, s, c);
+		for (b = s; b < s + c; b++)  {
+			if (check_dups == 1)  {
+				/*
+				 * if we're just checking the bmap for dups,
+				 * return if we find one, otherwise, continue
+				 * checking each entry without setting the
+				 * block bitmap
+				 */
+				if (search_dup_extent(mp,
+						    XFS_FSB_TO_AGNO(mp, b),
+						    XFS_FSB_TO_AGBNO(mp, b)))  {
+					do_warn(
+"%s fork in ino %llu claims dup extent, off - %llu, start - %llu, cnt %llu\n",
+						forkname, ino, o, s, c);
+					return(1);
+				}
+				continue;
+			}
+
+			/* FIX FOR BUG 653709 -- EKN 
+			 * realtime attribute fork, should be valid block number
+	 		 * in regular data space, not realtime partion.
+			 */
+		        if (type == XR_INO_RTDATA && whichfork == XFS_ATTR_FORK) {
+			  if (mp->m_sb.sb_agcount < XFS_FSB_TO_AGNO(mp, b))
+				return(1);
+			}	
+		
+			state = get_fsbno_state(mp, b);
+			switch (state)  {
+			case XR_E_FREE:
+			case XR_E_FREE1:
+				do_warn(
+				"%s fork in ino %llu claims free block %llu\n",
+					forkname, ino, (__uint64_t) b);
+				/* fall through ... */
+			case XR_E_UNKNOWN:
+				set_fsbno_state(mp, b, XR_E_INUSE);
+				break;
+			case XR_E_BAD_STATE:
+				do_error("bad state in block map %llu\n", b);
+				abort();
+				break;
+			case XR_E_FS_MAP:
+			case XR_E_INO:
+			case XR_E_INUSE_FS:
+				do_warn(
+				"%s fork in inode %llu claims metadata block %llu\n",
+					forkname, ino, (__uint64_t) b);
+				return(1);
+			case XR_E_INUSE:
+			case XR_E_MULT:
+				set_fsbno_state(mp, b, XR_E_MULT);
+				do_warn(
+				"%s fork in %s inode %llu claims used block %llu\n",
+					forkname, ftype, ino, (__uint64_t) b);
+				return(1);
+			default:
+				do_error("illegal state %d in block map %llu\n",
+					state, b);
+				abort();
+			}
+		}
+		*tot += c;
+	}
+
+	return(0);
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise, sets block bitmap
+ * as a side-effect
+ */
+int
+process_bmbt_reclist(
+	xfs_mount_t		*mp,
+	xfs_bmbt_rec_32_t	*rp,
+	int			numrecs,
+	int			type,
+	xfs_ino_t		ino,
+	xfs_drfsbno_t		*tot,
+	blkmap_t		**blkmapp,
+	xfs_dfiloff_t		*first_key,
+	xfs_dfiloff_t		*last_key,
+	int			whichfork)
+{
+	return(process_bmbt_reclist_int(mp, rp, numrecs, type, ino, tot,
+					blkmapp, first_key, last_key, 0,
+					whichfork));
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise, does not set
+ * block bitmap
+ */
+int
+scan_bmbt_reclist(
+	xfs_mount_t		*mp,
+	xfs_bmbt_rec_32_t	*rp,
+	int			numrecs,
+	int			type,
+	xfs_ino_t		ino,
+	xfs_drfsbno_t		*tot,
+	int			whichfork)
+{
+	xfs_dfiloff_t		first_key = 0;
+	xfs_dfiloff_t		last_key = 0;
+
+	return(process_bmbt_reclist_int(mp, rp, numrecs, type, ino, tot,
+					NULL, &first_key, &last_key, 1,
+					whichfork));
+}
+
+/*
+ * these two are meant for routines that read and work with inodes
+ * one at a time where the inodes may be in any order (like walking
+ * the unlinked lists to look for inodes).  the caller is responsible
+ * for writing/releasing the buffer.
+ */
+xfs_buf_t *
+get_agino_buf(xfs_mount_t	 *mp,
+		xfs_agnumber_t	agno,
+		xfs_agino_t	agino,
+		xfs_dinode_t	**dipp)
+{
+	ino_tree_node_t *irec;
+	xfs_buf_t *bp;
+	int size;
+
+	if ((irec = find_inode_rec(agno, agino)) == NULL)
+		return(NULL);
+	
+	size = XFS_FSB_TO_BB(mp, MAX(1, XFS_INODES_PER_CHUNK/inodes_per_block));
+	bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno,
+		XFS_AGINO_TO_AGBNO(mp, irec->ino_startnum)), size, 0);
+	if (!bp) {
+		do_warn("cannot read inode (%u/%u), disk block %lld\n",
+			agno, irec->ino_startnum,
+			XFS_AGB_TO_DADDR(mp, agno,
+				XFS_AGINO_TO_AGBNO(mp, irec->ino_startnum)));
+		return(NULL);
+	}
+
+	*dipp = XFS_MAKE_IPTR(mp, bp, agino -
+		XFS_OFFBNO_TO_AGINO(mp, XFS_AGINO_TO_AGBNO(mp,
+						irec->ino_startnum),
+		0));
+
+	return(bp);
+}
+
+/*
+ * these next routines return the filesystem blockno of the
+ * block containing the block "bno" in the file whose bmap
+ * tree (or extent list) is rooted by "rootblock".
+ *
+ * the next routines are utility routines for the third
+ * routine, get_bmapi().
+ */
+/* ARGSUSED */
+xfs_dfsbno_t
+getfunc_extlist(xfs_mount_t		*mp,
+		xfs_ino_t		ino,
+		xfs_dinode_t		*dip,
+		xfs_dfiloff_t		bno,
+		int			whichfork)
+{
+	xfs_dfiloff_t		fbno;
+	xfs_dfilblks_t		bcnt;
+	xfs_dfsbno_t		fsbno;
+	xfs_dfsbno_t		final_fsbno = NULLDFSBNO;
+	xfs_bmbt_rec_32_t	*rootblock = (xfs_bmbt_rec_32_t *)
+						XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+	xfs_extnum_t		nextents = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
+	int			i;
+	int			flag;
+
+	for (i = 0; i < nextents; i++)  {
+		convert_extent(rootblock + i, &fbno, &fsbno, &bcnt, &flag);
+
+		if (fbno <= bno && bno < fbno + bcnt)  {
+			final_fsbno = bno - fbno + fsbno;
+			break;
+		}
+	}
+
+	return(final_fsbno);
+}
+
+xfs_dfsbno_t
+getfunc_btree(xfs_mount_t		*mp,
+		xfs_ino_t		ino,
+		xfs_dinode_t		*dip,
+		xfs_dfiloff_t		bno,
+		int			whichfork)
+{
+	int			i;
+	int			prev_level;
+	int			flag;
+	int			found;
+	xfs_bmbt_rec_32_t	*rec;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmbt_key_t		*key;
+	xfs_bmdr_key_t		*rkey;
+	xfs_bmdr_ptr_t		*rp;
+	xfs_dfiloff_t		fbno;
+	xfs_dfsbno_t		fsbno;
+	xfs_dfilblks_t		bcnt;
+	xfs_buf_t		*bp;
+	xfs_dfsbno_t		final_fsbno = NULLDFSBNO;
+	xfs_bmbt_block_t	*block;
+	xfs_bmdr_block_t	*rootblock = (xfs_bmdr_block_t *)
+			XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+
+	ASSERT(rootblock->bb_level != 0);
+	/*
+	 * deal with root block, it's got a slightly different
+	 * header structure than interior nodes.  We know that
+	 * a btree should have at least 2 levels otherwise it
+	 * would be an extent list.
+	 */
+	rkey = XFS_BTREE_KEY_ADDR(
+			XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+			xfs_bmdr, rootblock, 1,
+			XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip,
+						mp, whichfork, ARCH_CONVERT),
+			xfs_bmdr, 1));
+	rp = XFS_BTREE_PTR_ADDR(
+			XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+			xfs_bmdr, rootblock, 1,
+			XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip,
+						mp, whichfork, ARCH_CONVERT),
+			xfs_bmdr, 1));
+	for (found = -1, i = 0; i < rootblock->bb_numrecs - 1; i++)  {
+		if (rkey[i].br_startoff <= bno
+				&& bno < rkey[i+1].br_startoff)  {
+			found = i;
+			break;
+		}
+	}
+	if (i == rootblock->bb_numrecs - 1 && bno >= rkey[i].br_startoff)
+		found = i;
+
+	ASSERT(found != -1);
+
+	fsbno = INT_GET(rp[found], ARCH_CONVERT);
+
+	ASSERT(verify_dfsbno(mp, fsbno));
+
+	bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+	if (!bp) {
+		do_error("cannot read bmap block %llu\n", fsbno);
+		return(NULLDFSBNO);
+	}
+	block = XFS_BUF_TO_BMBT_BLOCK(bp);
+
+	/*
+	 * ok, now traverse any interior btree nodes
+	 */
+	prev_level = rootblock->bb_level;
+
+	while (INT_GET(block->bb_level, ARCH_CONVERT) > 0)  {
+		ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) < prev_level);
+
+		prev_level = INT_GET(block->bb_level, ARCH_CONVERT);
+
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >
+						mp->m_bmap_dmxr[1]) {
+			do_warn("# of bmap records in inode %llu exceeds max "
+				"(%u, max - %u)\n",
+				ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+				mp->m_bmap_dmxr[1]);
+			libxfs_putbuf(bp);
+			return(NULLDFSBNO);
+		}
+		if (verbose && INT_GET(block->bb_numrecs, ARCH_CONVERT) <
+						mp->m_bmap_dmnr[1]) {
+			do_warn("- # of bmap records in inode %llu < than min "
+				"(%u, min - %u), proceeding ...\n",
+				ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+				mp->m_bmap_dmnr[1]);
+		}
+		key = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize,
+			xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+		pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
+			xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+		for (	found = -1, i = 0;
+			i < INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1;
+			i++) {
+			if (INT_GET(key[i].br_startoff, ARCH_CONVERT) <= bno &&
+			    bno < INT_GET(key[i+1].br_startoff, ARCH_CONVERT)) {
+				found = i;
+				break;
+			}
+		}
+		if (i == INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1 &&
+			bno >= INT_GET(key[i].br_startoff, ARCH_CONVERT))
+			found = i;
+
+		ASSERT(found != -1);
+		fsbno = INT_GET(pp[found], ARCH_CONVERT);
+
+		ASSERT(verify_dfsbno(mp, fsbno));
+
+		/*
+		 * release current btree block and read in the
+		 * next btree block to be traversed
+		 */
+		libxfs_putbuf(bp);
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+					XFS_FSB_TO_BB(mp, 1), 0);
+		if (!bp) {
+			do_error("cannot read bmap block %llu\n", fsbno);
+			return(NULLDFSBNO);
+		}
+		block = XFS_BUF_TO_BMBT_BLOCK(bp);
+	}
+
+	/*
+	 * current block must be a leaf block
+	 */
+	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0]) {
+		do_warn("# of bmap records in inode %llu greater than max "
+			"(%u, max - %u)\n",
+			ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+			mp->m_bmap_dmxr[0]);
+		libxfs_putbuf(bp);
+		return(NULLDFSBNO);
+	}
+	if (verbose && INT_GET(block->bb_numrecs, ARCH_CONVERT) <
+					mp->m_bmap_dmnr[0])
+		do_warn("- # of bmap records in inode %llu < min "
+			"(%u, min - %u), continuing...\n",
+			ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+			mp->m_bmap_dmnr[0]);
+
+	rec = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
+		xfs_bmbt, block, 1, mp->m_bmap_dmxr[0]);
+	for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)  {
+		convert_extent(rec + i, &fbno, &fsbno, &bcnt, &flag);
+
+		if (fbno <= bno && bno < fbno + bcnt)  {
+			final_fsbno = bno - fbno + fsbno;
+			break;
+		}
+	}
+	libxfs_putbuf(bp);
+
+	if (final_fsbno == NULLDFSBNO)
+		do_warn("could not map block %llu\n", bno);
+
+	return(final_fsbno);
+}
+
+/*
+ * this could be smarter.  maybe we should have an open inode
+ * routine that would get the inode buffer and return back
+ * an inode handle.  I'm betting for the moment that this
+ * is used only by the directory and attribute checking code
+ * and that the avl tree find and buffer cache search are
+ * relatively cheap.  If they're too expensive, we'll just
+ * have to fix this and add an inode handle to the da btree
+ * cursor.
+ *
+ * caller is responsible for checking doubly referenced blocks
+ * and references to holes
+ */
+xfs_dfsbno_t
+get_bmapi(xfs_mount_t *mp, xfs_dinode_t *dino_p,
+		xfs_ino_t ino_num, xfs_dfiloff_t bno, int whichfork)
+{
+	xfs_dfsbno_t		fsbno;
+
+	switch (XFS_DFORK_FORMAT_ARCH(dino_p, whichfork, ARCH_CONVERT)) {
+	case XFS_DINODE_FMT_EXTENTS:
+		fsbno = getfunc_extlist(mp, ino_num, dino_p, bno, whichfork);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		fsbno = getfunc_btree(mp, ino_num, dino_p, bno, whichfork); 
+		break;
+	case XFS_DINODE_FMT_LOCAL:
+		do_error("get_bmapi() called for local inode %llu\n", ino_num);
+		fsbno = NULLDFSBNO;
+		break;
+	default:
+		/*
+		 * shouldn't happen
+		 */
+		do_error("bad inode format for inode %llu\n", ino_num);
+		fsbno = NULLDFSBNO;
+	}
+
+	return(fsbno);
+}
+
+/*
+ * higher level inode processing stuff starts here:
+ * first, one utility routine for each type of inode
+ */
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ */
+/* ARGSUSED */
+int
+process_btinode(
+	xfs_mount_t		*mp,
+	xfs_agnumber_t		agno,
+	xfs_agino_t		ino,
+	xfs_dinode_t		*dip,
+	int			type,
+	int			*dirty,
+	xfs_drfsbno_t		*tot,
+	__uint64_t		*nex,
+	blkmap_t		**blkmapp,
+	int			whichfork,
+	int			check_dups)
+{
+	xfs_bmdr_block_t	*dib;
+	xfs_dfiloff_t		last_key;
+	xfs_dfiloff_t		first_key = 0;
+	xfs_ino_t		lino;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmbt_key_t		*pkey;
+	char			*forkname;
+	int			i;
+	bmap_cursor_t		cursor;
+
+	dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+	lino = XFS_AGINO_TO_INO(mp, agno, ino);
+	*tot = 0;
+	*nex = 0;
+
+	if (whichfork == XFS_DATA_FORK)
+		forkname = "data";
+	else
+		forkname = "attr";
+
+	if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) {
+		/*
+		 * This should never happen since a btree inode
+		 * has to have at least one other block in the
+		 * bmap in addition to the root block in the
+		 * inode's data fork.
+		 *
+		 * XXX - if we were going to fix up the inode,
+		 * we'd try to treat the fork as an interior
+		 * node and see if we could get an accurate
+		 * level value from one of the blocks pointed
+		 * to by the pointers in the fork.  For now
+		 * though, we just bail (and blow out the inode).
+		 */
+		do_warn("bad level 0 in inode %llu bmap btree root block\n",
+			XFS_AGINO_TO_INO(mp, agno, ino));
+		return(1);
+	}
+	/*
+	 * use bmdr/dfork_dsize since the root block is in the data fork
+	 */
+	init_bm_cursor(&cursor, INT_GET(dib->bb_level, ARCH_CONVERT) + 1);
+
+	if (XFS_BMDR_SPACE_CALC(INT_GET(dib->bb_numrecs, ARCH_CONVERT)) >
+			((whichfork == XFS_DATA_FORK) ?
+			XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT) :
+			XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT)))  {
+		do_warn(
+"indicated size of %s btree root (%d bytes) > space in inode %llu %s fork\n",
+			forkname, XFS_BMDR_SPACE_CALC(INT_GET(dib->bb_numrecs, ARCH_CONVERT)),
+			lino, forkname);
+		return(1);
+	}
+
+	pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+		xfs_bmdr, dib, 1,
+		XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+		xfs_bmdr, 0));
+	pkey = XFS_BTREE_KEY_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+		xfs_bmdr, dib, 1,
+		XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+		xfs_bmdr, 0));
+
+	last_key = NULLDFILOFF;
+
+	for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++)  {
+		/*
+		 * XXX - if we were going to do more to fix up the inode
+		 * btree, we'd do it right here.  For now, if there's a
+		 * problem, we'll bail out and presumably clear the inode.
+		 */
+		if (!verify_dfsbno(mp, INT_GET(pp[i], ARCH_CONVERT)))  {
+			do_warn("bad bmap btree ptr 0x%llx in ino %llu\n",
+				INT_GET(pp[i], ARCH_CONVERT), lino);
+			return(1);
+		}
+
+		if (scan_lbtree((xfs_dfsbno_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT),
+				    scanfunc_bmap, type, whichfork,
+				    lino, tot, nex, blkmapp, &cursor,
+				    1, check_dups))
+			return(1);
+		/*
+		 * fix key (offset) mismatches between the keys in root
+		 * block records and the first key of each child block.
+		 * fixes cases where entries have been shifted between
+		 * blocks but the parent hasn't been updated
+		 */
+		if (check_dups == 0 &&
+				cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key !=
+					INT_GET(pkey[i].br_startoff, ARCH_CONVERT))  {
+			if (!no_modify)  {
+				do_warn(
+"correcting key in bmbt root (was %llu, now %llu) in inode %llu %s fork\n",
+					INT_GET(pkey[i].br_startoff, ARCH_CONVERT),
+					cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key,
+					XFS_AGINO_TO_INO(mp, agno, ino),
+					forkname);
+				*dirty = 1;
+				INT_SET(pkey[i].br_startoff, ARCH_CONVERT, cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key);
+			} else  {
+				do_warn(
+"bad key in bmbt root (is %llu, would reset to %llu) in inode %llu %s fork\n",
+					INT_GET(pkey[i].br_startoff, ARCH_CONVERT),
+					cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key,
+					XFS_AGINO_TO_INO(mp, agno, ino),
+					forkname);
+			}
+		}
+		/*
+		 * make sure that keys are in ascending order.  blow out
+		 * inode if the ordering doesn't hold
+		 */
+		if (check_dups == 0)  {
+			if (last_key != NULLDFILOFF && last_key >=
+			    cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key)  {
+				do_warn(
+		"out of order bmbt root key %llu in inode %llu %s fork\n",
+					first_key,
+					XFS_AGINO_TO_INO(mp, agno, ino),
+					forkname);
+				return(1);
+			}
+			last_key = cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key;
+		}
+	}
+	/*
+	 * Check that the last child block's forward sibling pointer
+	 * is NULL.
+	 */
+	if (check_dups == 0 &&
+		cursor.level[0].right_fsbno != NULLDFSBNO)  {
+		do_warn(
+	"bad fwd (right) sibling pointer (saw %llu should be NULLDFSBNO)\n",
+			cursor.level[0].right_fsbno);
+		do_warn(
+		"\tin inode %u (%s fork) bmap btree block %llu\n",
+			XFS_AGINO_TO_INO(mp, agno, ino), forkname,
+			cursor.level[0].fsbno);
+		return(1);
+	}
+	
+	return(0);
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ */
+/* ARGSUSED */
+int
+process_exinode(
+	xfs_mount_t		*mp,
+	xfs_agnumber_t		agno,
+	xfs_agino_t		ino,
+	xfs_dinode_t		*dip,
+	int			type,
+	int			*dirty,
+	xfs_drfsbno_t		*tot,
+	__uint64_t		*nex,
+	blkmap_t		**blkmapp,
+	int			whichfork,
+	int			check_dups)
+{
+	xfs_ino_t		lino;
+	xfs_bmbt_rec_32_t	*rp;
+	xfs_dfiloff_t		first_key;
+	xfs_dfiloff_t		last_key;
+
+	lino = XFS_AGINO_TO_INO(mp, agno, ino);
+	rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+	*tot = 0;
+	*nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
+	/*
+	 * XXX - if we were going to fix up the btree record,
+	 * we'd do it right here.  For now, if there's a problem,
+	 * we'll bail out and presumably clear the inode.
+	 */
+	if (check_dups == 0)
+		return(process_bmbt_reclist(mp, rp, *nex, type, lino,
+					tot, blkmapp, &first_key, &last_key,
+					whichfork));
+	else
+		return(scan_bmbt_reclist(mp, rp, *nex, type, lino, tot,
+					whichfork));
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ */
+/* ARGSUSED */
+int
+process_lclinode(
+	xfs_mount_t		*mp,
+	xfs_agnumber_t		agno,
+	xfs_agino_t		ino,
+	xfs_dinode_t		*dip,
+	int			type,
+	int			*dirty,
+	xfs_drfsbno_t		*tot,
+	__uint64_t		*nex,
+	blkmap_t		**blkmapp,
+	int			whichfork,
+	int			check_dups)
+{
+	xfs_attr_shortform_t	*asf;
+	xfs_dinode_core_t	*dic;
+	xfs_ino_t		lino;
+
+	*tot = 0;
+	*nex = 0;	/* local inodes have 0 extents */
+
+	dic = &dip->di_core;
+	lino = XFS_AGINO_TO_INO(mp, agno, ino);
+	if (whichfork == XFS_DATA_FORK &&
+	    INT_GET(dic->di_size, ARCH_CONVERT) > XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT)) {
+		do_warn(
+	"local inode %llu data fork is too large (size = %lld, max = %d)\n",
+			lino, INT_GET(dic->di_size, ARCH_CONVERT), XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT));
+		return(1);
+	} else if (whichfork == XFS_ATTR_FORK) {
+		asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+		if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) > XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT)) {
+			do_warn(
+		"local inode %llu attr fork too large (size %d, max = %d)\n",
+					lino, INT_GET(asf->hdr.totsize, ARCH_CONVERT),
+					XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT));
+			return(1);
+		}
+		if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) < sizeof(xfs_attr_sf_hdr_t)) {
+			do_warn(
+		"local inode %llu attr too small (size = %d, min size = %d)\n",
+					lino, INT_GET(asf->hdr.totsize, ARCH_CONVERT),
+					sizeof(xfs_attr_sf_hdr_t));
+			return(1);
+		}
+	}
+
+	return(0);
+}
+
+int
+process_symlink_extlist(xfs_mount_t *mp, xfs_ino_t lino, xfs_dinode_t *dino)
+{
+	xfs_dfsbno_t		start;		/* start */
+	xfs_dfilblks_t		cnt;		/* count */
+	xfs_dfiloff_t		offset;		/* offset */
+	xfs_dfiloff_t		expected_offset;
+	xfs_bmbt_rec_32_t	*rp;
+	int			numrecs;
+	int			i;
+	int			max_blocks;
+	int			whichfork = XFS_DATA_FORK;
+	int			flag;
+
+	if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_SIZE_ARCH(dino, mp, whichfork, ARCH_CONVERT))  {
+		if (dino->di_core.di_format == XFS_DINODE_FMT_LOCAL)  {
+			return(0);
+		} else  {
+			do_warn(
+"mismatch between format (%d) and size (%lld) in symlink ino %llu\n",
+				dino->di_core.di_format,
+				INT_GET(dino->di_core.di_size, ARCH_CONVERT),
+				lino);
+			return(1);
+		}
+	} else if (dino->di_core.di_format == XFS_DINODE_FMT_LOCAL)  {
+		do_warn(
+"mismatch between format (%d) and size (%lld) in symlink inode %llu\n",
+				dino->di_core.di_format,
+				INT_GET(dino->di_core.di_size, ARCH_CONVERT),
+				lino);
+		return(1);
+	}
+
+	rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dino, whichfork, ARCH_CONVERT);
+	numrecs = XFS_DFORK_NEXTENTS_ARCH(dino, whichfork, ARCH_CONVERT);
+
+	/*
+	 * the max # of extents in a symlink inode is equal to the
+	 * number of max # of blocks required to store the symlink 
+	 */
+	if (numrecs > max_symlink_blocks)  {
+		do_warn(
+		"bad number of extents (%d) in symlink %llu data fork\n",
+			numrecs, lino);
+		return(1);
+	}
+
+	max_blocks = max_symlink_blocks;
+	expected_offset = 0;
+
+	for (i = 0; numrecs > 0; i++, numrecs--)  {
+		convert_extent(rp, &offset, &start, &cnt, &flag);
+
+		if (offset != expected_offset)  {
+			do_warn(
+		"bad extent #%d offset (%llu) in symlink %llu data fork\n",
+				i, offset, lino);
+			return(1);
+		}
+		if (cnt == 0 || cnt > max_blocks)  {
+			do_warn(
+		"bad extent #%d count (%llu) in symlink %llu data fork\n",
+				i, cnt, lino);
+			return(1);
+		}
+
+		max_blocks -= cnt;
+		expected_offset += cnt;
+	}
+
+	return(0);
+}
+
+/*
+ * takes a name and length and returns 1 if the name contains
+ * a \0, returns 0 otherwise
+ */
+int
+null_check(char *name, int length)
+{
+	int i;
+
+	ASSERT(length < MAXPATHLEN);
+
+	for (i = 0; i < length; i++, name++)  {
+		if (*name == '\0')
+			return(1);
+	}
+
+	return(0);
+}
+
+/*
+ * like usual, returns 0 if everything's ok and 1 if something's
+ * bogus
+ */
+int
+process_symlink(xfs_mount_t *mp, xfs_ino_t lino, xfs_dinode_t *dino,
+		blkmap_t *blkmap)
+{
+	xfs_dfsbno_t		fsbno;
+	xfs_dinode_core_t	*dinoc = &dino->di_core;
+	xfs_buf_t		*bp = NULL;
+	char			*symlink, *cptr, *buf_data;
+	int			i, size, amountdone;
+	char			data[MAXPATHLEN];
+
+	/*
+	 * check size against kernel symlink limits.  we know
+	 * size is consistent with inode storage format -- e.g.
+	 * the inode is structurally ok so we don't have to check
+	 * for that
+	 */
+	if (INT_GET(dinoc->di_size, ARCH_CONVERT) >= MAXPATHLEN)  {
+		do_warn("symlink in inode %llu too long (%lld chars)\n",
+			lino, INT_GET(dinoc->di_size, ARCH_CONVERT));
+		return(1);
+	}
+
+	/*
+	 * have to check symlink component by component.
+	 * get symlink contents into data area
+	 */
+	symlink = &data[0];
+	if (INT_GET(dinoc->di_size, ARCH_CONVERT)
+			<= XFS_DFORK_DSIZE_ARCH(dino, mp, ARCH_CONVERT))  {
+		/*
+		 * local symlink, just copy the symlink out of the
+		 * inode into the data area
+		 */
+		bcopy((char *)XFS_DFORK_DPTR_ARCH(dino, ARCH_CONVERT),
+			symlink, INT_GET(dinoc->di_size, ARCH_CONVERT));
+	} else {
+		/*
+		 * stored in a meta-data file, have to bmap one block
+		 * at a time and copy the symlink into the data area
+		 */
+		i = size = amountdone = 0;
+		cptr = symlink;
+
+		while (amountdone < INT_GET(dinoc->di_size, ARCH_CONVERT)) {
+			fsbno = blkmap_get(blkmap, i);
+			if (fsbno != NULLDFSBNO)
+				bp = libxfs_readbuf(mp->m_dev,
+						XFS_FSB_TO_DADDR(mp, fsbno),
+						XFS_FSB_TO_BB(mp, 1), 0);
+			if (!bp || fsbno == NULLDFSBNO) {
+				do_warn("cannot read inode %llu, file block %d,"
+					" disk block %llu\n", lino, i, fsbno);
+				return(1);
+			}
+
+			buf_data = (char *)XFS_BUF_PTR(bp);
+			size = MIN(INT_GET(dinoc->di_size, ARCH_CONVERT)
+				- amountdone, (int)XFS_FSB_TO_BB(mp, 1)*BBSIZE);
+			bcopy(buf_data, cptr, size);
+			cptr += size;
+			amountdone += size;
+			i++;
+			libxfs_putbuf(bp);
+		}
+	}
+	data[INT_GET(dinoc->di_size, ARCH_CONVERT)] = '\0';
+
+	/*
+	 * check for nulls
+	 */
+	if (null_check(symlink, (int) INT_GET(dinoc->di_size, ARCH_CONVERT)))  {
+		do_warn("found illegal null character in symlink inode %llu\n",
+			lino);
+		return(1);
+	}
+
+	/*
+	 * check for any component being too long
+	 */
+	if (INT_GET(dinoc->di_size, ARCH_CONVERT) >= MAXNAMELEN)  {
+		cptr = strchr(symlink, '/');
+
+		while (cptr != NULL)  {
+			if (cptr - symlink >= MAXNAMELEN)  {
+				do_warn(
+				"component of symlink in inode %llu too long\n",
+					lino);
+				return(1);
+			}
+			symlink = cptr + 1;
+			cptr = strchr(symlink, '/');
+		}
+
+		if (strlen(symlink) >= MAXNAMELEN)  {
+			do_warn("component of symlink in inode %llu too long\n",
+				lino);
+			return(1);
+		}
+	}
+
+	return(0);
+}
+
+/*
+ * called to process the set of misc inode special inode types
+ * that have no associated data storage (fifos, pipes, devices, etc.).
+ */
+/* ARGSUSED */
+int
+process_misc_ino_types(xfs_mount_t	*mp,
+			xfs_dinode_t	*dino,
+			xfs_ino_t	lino,
+			int		type)
+{
+	/*
+	 * disallow mountpoint inodes until such time as the
+	 * kernel actually allows them to be created (will
+	 * probably require a superblock version rev, sigh).
+	 */
+	if (type == XR_INO_MOUNTPOINT)  {
+		do_warn("inode %llu has bad inode type (IFMNT)\n", lino);
+		return(1);
+	}
+
+	/*
+	 * must also have a zero size
+	 */
+	if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) != 0)  {
+		switch (type)  {
+		case XR_INO_CHRDEV:
+			do_warn("size of character device inode %llu != 0 "
+				"(%lld bytes)\n", lino,
+				INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+			break;
+		case XR_INO_BLKDEV:
+			do_warn("size of block device inode %llu != 0 "
+				"(%lld bytes)\n", lino,
+				INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+			break;
+		case XR_INO_SOCK:
+			do_warn("size of socket inode %llu != 0 "
+				"(%lld bytes)\n", lino,
+				INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+			break;
+		case XR_INO_FIFO:
+			do_warn("size of fifo inode %llu != 0 "
+				"(%lld bytes)\n", lino,
+				INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+			break;
+		default:
+			do_warn("Internal error - process_misc_ino_types, "
+				"illegal type %d\n", type);
+			abort();
+		}
+
+		return(1);
+	}
+
+	return(0);
+}
+
+int
+process_misc_ino_types_blocks(xfs_drfsbno_t totblocks, xfs_ino_t lino, int type)
+{
+	/*
+	 * you can not enforce all misc types have zero data fork blocks
+	 * by checking dino->di_core.di_nblocks because atotblocks (attribute
+	 * blocks) are part of nblocks. We must check this later when atotblocks
+	 * has been calculated or by doing a simple check that anExtents == 0. 
+	 * We must also guarantee that totblocks is 0. Thus nblocks checking
+	 * will be done later in process_dinode_int for misc types.
+	 */
+
+	if (totblocks != 0)  {
+		switch (type)  {
+		case XR_INO_CHRDEV:
+			do_warn(
+		"size of character device inode %llu != 0 (%llu blocks)\n",
+				lino, totblocks);
+			break;
+		case XR_INO_BLKDEV:
+			do_warn(
+		"size of block device inode %llu != 0 (%llu blocks)\n",
+				lino, totblocks);
+			break;
+		case XR_INO_SOCK:
+			do_warn(
+		"size of socket inode %llu != 0 (%llu blocks)\n",
+				lino, totblocks);
+			break;
+		case XR_INO_FIFO:
+			do_warn(
+		"size of fifo inode %llu != 0 (%llu blocks)\n",
+				lino, totblocks);
+			break;
+		default:
+			return(0);
+		}
+		return(1);
+	}
+	return (0);
+}
+
+/*
+ * returns 0 if the inode is ok, 1 if the inode is corrupt
+ * check_dups can be set to 1 *only* when called by the
+ * first pass of the duplicate block checking of phase 4.
+ * *dirty is set > 0 if the dinode has been altered and
+ * needs to be written out.
+ *
+ * for detailed, info, look at process_dinode() comments.
+ */
+/* ARGSUSED */
+int
+process_dinode_int(xfs_mount_t *mp,
+		xfs_dinode_t *dino,
+		xfs_agnumber_t agno,
+		xfs_agino_t ino,
+		int was_free,		/* 1 if inode is currently free */
+		int *dirty,		/* out == > 0 if inode is now dirty */
+		int *cleared,		/* out == 1 if inode was cleared */
+		int *used,		/* out == 1 if inode is in use */
+		int verify_mode,	/* 1 == verify but don't modify inode */
+		int uncertain,		/* 1 == inode is uncertain */
+		int ino_discovery,	/* 1 == check dirs for unknown inodes */
+		int check_dups,		/* 1 == check if inode claims
+					 * duplicate blocks		*/
+		int extra_attr_check, /* 1 == do attribute format and value checks */
+		int *isa_dir,		/* out == 1 if inode is a directory */
+		xfs_ino_t *parent)	/* out -- parent if ino is a dir */
+{
+	xfs_drfsbno_t		totblocks = 0;
+	xfs_drfsbno_t		atotblocks = 0;
+	xfs_dinode_core_t	*dinoc;
+	char			*rstring;
+	int			type;
+	int			rtype;
+	int			do_rt;
+	int			err;
+	int			retval = 0;
+	__uint64_t		nextents;
+	__uint64_t		anextents;
+	xfs_ino_t		lino;
+	const int		is_free = 0;
+	const int		is_used = 1;
+	int			repair = 0;
+	blkmap_t		*ablkmap = NULL;
+	blkmap_t		*dblkmap = NULL;
+	static char		okfmts[] = {
+		0,				/* free inode */
+		1 << XFS_DINODE_FMT_DEV,	/* FIFO */
+		1 << XFS_DINODE_FMT_DEV,	/* CHR */
+		0,				/* type 3 unused */
+		(1 << XFS_DINODE_FMT_LOCAL) |
+		(1 << XFS_DINODE_FMT_EXTENTS) |
+		(1 << XFS_DINODE_FMT_BTREE),	/* DIR */
+		0,				/* type 5 unused */
+		1 << XFS_DINODE_FMT_DEV,	/* BLK */
+		0,				/* type 7 unused */
+		(1 << XFS_DINODE_FMT_EXTENTS) |
+		(1 << XFS_DINODE_FMT_BTREE),	/* REG */
+		0,				/* type 9 unused */
+		(1 << XFS_DINODE_FMT_LOCAL) |
+		(1 << XFS_DINODE_FMT_EXTENTS),	/* LNK */
+		0,				/* type 11 unused */
+		1 << XFS_DINODE_FMT_DEV,	/* SOCK */
+		0,				/* type 13 unused */
+		1 << XFS_DINODE_FMT_UUID,	/* MNT */
+		0				/* type 15 unused */
+	};
+
+	retval = 0;
+	totblocks = atotblocks = 0;
+	*dirty = *isa_dir = *cleared = 0;
+	*used = is_used;
+	type = rtype = XR_INO_UNKNOWN;
+	rstring = NULL;
+	do_rt = 0;
+
+	dinoc = &dino->di_core;
+	lino = XFS_AGINO_TO_INO(mp, agno, ino);
+
+	/*
+	 * if in verify mode, don't modify the inode.
+	 *
+	 * if correcting, reset stuff that has known values
+	 *
+	 * if in uncertain mode, be silent on errors since we're
+	 * trying to find out if these are inodes as opposed
+	 * to assuming that they are.  Just return the appropriate
+	 * return code in that case.
+	 */
+
+	if (INT_GET(dinoc->di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC)  {
+		retval++;
+		if (!verify_mode)  {
+			do_warn("bad magic number 0x%x on inode %llu, ", 
+				INT_GET(dinoc->di_magic, ARCH_CONVERT), lino);
+			if (!no_modify)  {
+				do_warn("resetting magic number\n");
+				*dirty = 1;
+				INT_SET(dinoc->di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+			} else  {
+				do_warn("would reset magic number\n");
+			}
+		} else if (!uncertain) {
+			do_warn("bad magic number 0x%x on inode %llu\n", 
+				INT_GET(dinoc->di_magic, ARCH_CONVERT), lino);
+		}
+	}
+
+	if (!XFS_DINODE_GOOD_VERSION(dinoc->di_version) ||
+	    (!fs_inode_nlink && dinoc->di_version > XFS_DINODE_VERSION_1))  {
+		retval++;
+		if (!verify_mode)  {
+			do_warn("bad version number 0x%x on inode %llu, ", 
+				dinoc->di_version, lino);
+			if (!no_modify)  {
+				do_warn("resetting version number\n");
+				*dirty = 1;
+				dinoc->di_version = (fs_inode_nlink) ?
+					XFS_DINODE_VERSION_2 :
+					XFS_DINODE_VERSION_1;
+			} else  {
+				do_warn("would reset version number\n");
+			}
+		} else  if (!uncertain) {
+			do_warn("bad version number 0x%x on inode %llu\n", 
+				dinoc->di_version, lino);
+		}
+	}
+
+	/*
+	 * blow out of here if the inode size is < 0
+	 */
+	if (INT_GET(dinoc->di_size, ARCH_CONVERT) < 0)  {
+		retval++;
+		if (!verify_mode)  {
+			do_warn("bad (negative) size %lld on inode %llu\n",
+				INT_GET(dinoc->di_size, ARCH_CONVERT), lino);
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp, dino, lino);
+				*cleared = 1;
+			} else  {
+				*dirty = 1;
+				*cleared = 1;
+			}
+			*used = is_free;
+		} else if (!uncertain)  {
+			do_warn("bad (negative) size %lld on inode %llu\n",
+				INT_GET(dinoc->di_size, ARCH_CONVERT), lino);
+		}
+
+		return(1);
+	}
+
+	/*
+	 * was_free value is not meaningful if we're in verify mode
+	 */
+	if (!verify_mode && INT_GET(dinoc->di_mode, ARCH_CONVERT) == 0 && was_free == 1)  {
+		/*
+		 * easy case, inode free -- inode and map agree, clear
+		 * it just in case to ensure that format, etc. are
+		 * set correctly
+		 */
+		if (!no_modify)  {
+			err =  clear_dinode(mp, dino, lino);
+			if (err)  {
+				*dirty = 1;
+				*cleared = 1;
+			}
+		}
+		*used = is_free;
+		return(0);
+	} else if (!verify_mode && INT_GET(dinoc->di_mode, ARCH_CONVERT) == 0 && was_free == 0)  {
+		/*
+		 * the inode looks free but the map says it's in use.
+		 * clear the inode just to be safe and mark the inode
+		 * free.
+		 */
+		do_warn("imap claims a free inode %llu is in use, ", lino);
+
+		if (!no_modify)  {
+			do_warn("correcting imap and clearing inode\n");
+
+			err =  clear_dinode(mp, dino, lino);
+			if (err)  {
+				retval++;
+				*dirty = 1;
+				*cleared = 1;
+			}
+		} else  {
+			do_warn("would correct imap and clear inode\n");
+
+			*dirty = 1;
+			*cleared = 1;
+		}
+
+		*used = is_free;
+
+		return(retval > 0 ? 1 : 0);
+	}
+
+	/*
+	 * because of the lack of any write ordering guarantee, it's
+	 * possible that the core got updated but the forks didn't.
+	 * so rather than be ambitious (and probably incorrect),
+	 * if there's an inconsistency, we get conservative and 
+	 * just pitch the file.  blow off checking formats of
+	 * free inodes since technically any format is legal
+	 * as we reset the inode when we re-use it.
+	 */
+	if (INT_GET(dinoc->di_mode, ARCH_CONVERT) != 0 &&
+		((((INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) >> 12) > 15) ||
+		dinoc->di_format < XFS_DINODE_FMT_DEV ||
+		dinoc->di_format > XFS_DINODE_FMT_UUID ||
+			(!(okfmts[(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) >> 12] &
+			  (1 << dinoc->di_format))))) {
+		/* bad inode format */
+		retval++;
+		if (!uncertain)
+			do_warn("bad inode format in inode %llu\n", lino);
+		if (!verify_mode)  {
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp, dino, lino);
+				ASSERT(*dirty > 0);
+			}
+		}
+		*cleared = 1;
+		*used = is_free;
+
+		return(retval > 0 ? 1 : 0);
+	}
+
+	if (verify_mode)
+		return(retval > 0 ? 1 : 0);
+
+	/*
+	 * clear the next unlinked field if necessary on a good
+	 * inode only during phase 4 -- when checking for inodes
+	 * referencing duplicate blocks.  then it's safe because
+	 * we've done the inode discovery and have found all the inodes
+	 * we're going to find.  check_dups is set to 1 only during
+	 * phase 4.  Ugly.
+	 */
+	if (check_dups && !no_modify)
+		*dirty += clear_dinode_unlinked(mp, dino);
+
+	/* set type and map type info */
+
+	switch (INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) {
+	case IFDIR:
+		type = XR_INO_DIR;
+		*isa_dir = 1;
+		break;
+	case IFREG:
+		if (INT_GET(dinoc->di_flags, ARCH_CONVERT) & XFS_DIFLAG_REALTIME)
+			type = XR_INO_RTDATA;
+		else if (lino == mp->m_sb.sb_rbmino)
+			type = XR_INO_RTBITMAP;
+		else if (lino == mp->m_sb.sb_rsumino)
+			type = XR_INO_RTSUM;
+		else
+			type = XR_INO_DATA;
+		break;
+	case IFLNK:
+		type = XR_INO_SYMLINK;
+		break;
+	case IFCHR:
+		type = XR_INO_CHRDEV;
+		break;
+	case IFBLK:
+		type = XR_INO_BLKDEV;
+		break;
+	case IFSOCK:
+		type = XR_INO_SOCK;
+		break;
+	case IFIFO:
+		type = XR_INO_FIFO;
+		break;
+	case IFMNT:
+		type = XR_INO_MOUNTPOINT;
+		break;
+	default:
+		type = XR_INO_UNKNOWN;
+		do_warn("Unexpected inode type %#o inode %llu\n",
+			(int) (INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT), lino);
+		abort();
+		break;
+	}
+
+	/*
+	 * type checks for root, realtime inodes, and quota inodes
+	 */
+	if (lino == mp->m_sb.sb_rootino && type != XR_INO_DIR)  {
+		do_warn("bad inode type for root inode %llu, ", lino);
+		type = XR_INO_DIR;
+
+		if (!no_modify)  {
+			do_warn("resetting to directory\n");
+			INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, &= ~(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT));
+			INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, |= INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFDIR);
+		} else  {
+			do_warn("would reset to directory\n");
+		}
+	} else if (lino == mp->m_sb.sb_rsumino)  {
+		do_rt = 1;
+		rstring = "summary";
+		rtype = XR_INO_RTSUM;
+	} else if (lino == mp->m_sb.sb_rbmino)  {
+		do_rt = 1;
+		rstring = "bitmap";
+		rtype = XR_INO_RTBITMAP;
+	} else if (lino == mp->m_sb.sb_uquotino)  {
+		if (type != XR_INO_DATA)  {
+			do_warn("user quota inode has bad type 0x%x\n",
+				INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT);
+
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp, dino, lino);
+				ASSERT(*dirty > 0);
+			}
+
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+
+			mp->m_sb.sb_uquotino = NULLFSINO;
+
+			return(1);
+		}
+	} else if (lino == mp->m_sb.sb_pquotino)  {
+		if (type != XR_INO_DATA)  {
+			do_warn("project quota inode has bad type 0x%x\n",
+				INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT);
+
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp, dino, lino);
+				ASSERT(*dirty > 0);
+			}
+
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+
+			mp->m_sb.sb_pquotino = NULLFSINO;
+
+			return(1);
+		}
+	}
+
+	if (do_rt && type != rtype)  {
+		type = XR_INO_DATA;
+
+		do_warn("bad inode type for realtime %s inode %llu, ",
+			rstring, lino);
+
+		if (!no_modify)  {
+			do_warn("resetting to regular file\n");
+			INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, &= ~(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT));
+			INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, |= INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFREG);
+		} else  {
+			do_warn("would reset to regular file\n");
+		}
+	}
+
+	/*
+	 * only realtime inodes should have extsize set
+	 */
+	if (type != XR_INO_RTDATA && INT_GET(dinoc->di_extsize, ARCH_CONVERT) != 0)  {
+		do_warn(
+"bad non-zero extent size value %u for non-realtime inode %llu,",
+			INT_GET(dinoc->di_extsize, ARCH_CONVERT), lino);
+
+		if (!no_modify)  {
+			do_warn("resetting to zero\n");
+			INT_ZERO(dinoc->di_extsize, ARCH_CONVERT);
+			*dirty = 1;
+		} else  {
+			do_warn("would reset to zero\n");
+		}
+	}
+
+	/*
+	 * for realtime inodes, check sizes to see that
+	 * they are consistent with the # of realtime blocks.
+	 * also, verify that they contain only one extent and
+	 * are extent format files.  If anything's wrong, clear
+	 * the inode -- we'll recreate it in phase 6.
+	 */
+	if (do_rt && INT_GET(dinoc->di_size, ARCH_CONVERT)
+			!= mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize)  {
+		do_warn("bad size %llu for realtime %s inode %llu\n",
+			INT_GET(dinoc->di_size, ARCH_CONVERT), rstring, lino);
+
+		if (!no_modify)  {
+			*dirty += clear_dinode(mp, dino, lino);
+			ASSERT(*dirty > 0);
+		}
+
+		*cleared = 1;
+		*used = is_free;
+		*isa_dir = 0;
+
+		return(1);
+	}
+
+	if (do_rt && mp->m_sb.sb_rblocks == 0 && INT_GET(dinoc->di_nextents, ARCH_CONVERT) != 0)  {
+		do_warn("bad # of extents (%u) for realtime %s inode %llu\n",
+			INT_GET(dinoc->di_nextents, ARCH_CONVERT), rstring, lino);
+
+		if (!no_modify)  {
+			*dirty += clear_dinode(mp, dino, lino);
+			ASSERT(*dirty > 0);
+		}
+
+		*cleared = 1;
+		*used = is_free;
+		*isa_dir = 0;
+
+		return(1);
+	}
+
+	/*
+	 * Setup nextents and anextents for blkmap_alloc calls.
+	 */
+	nextents = INT_GET(dinoc->di_nextents, ARCH_CONVERT);
+	if (nextents > INT_GET(dinoc->di_nblocks, ARCH_CONVERT) || nextents > XFS_MAX_INCORE_EXTENTS)
+		nextents = 1;
+	anextents = INT_GET(dinoc->di_anextents, ARCH_CONVERT);
+	if (anextents > INT_GET(dinoc->di_nblocks, ARCH_CONVERT) || anextents > XFS_MAX_INCORE_EXTENTS)
+		anextents = 1;
+
+	/*
+	 * general size/consistency checks:
+	 *
+	 * if the size <= size of the data fork, directories  must be
+	 * local inodes unlike regular files which would be extent inodes.
+	 * all the other mentioned types have to have a zero size value.
+	 *
+	 * if the size and format don't match, get out now rather than
+	 * risk trying to process a non-existent extents or btree
+	 * type data fork.
+	 */
+	switch (type)  {
+	case XR_INO_DIR:
+		if (INT_GET(dinoc->di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dino, mp, ARCH_CONVERT)
+				&& dinoc->di_format != XFS_DINODE_FMT_LOCAL)  {
+			do_warn(
+"mismatch between format (%d) and size (%lld) in directory ino %llu\n",
+				dinoc->di_format,
+				INT_GET(dinoc->di_size, ARCH_CONVERT),
+				lino);
+
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp,
+						dino, lino);
+				ASSERT(*dirty > 0);
+			}
+
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+
+			return(1);
+		}
+		if (dinoc->di_format != XFS_DINODE_FMT_LOCAL)
+			dblkmap = blkmap_alloc(nextents);
+		break;
+	case XR_INO_SYMLINK:
+		if (process_symlink_extlist(mp, lino, dino))  {
+			do_warn("bad data fork in symlink %llu\n", lino);
+
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp,
+						dino, lino);
+				ASSERT(*dirty > 0);
+			}
+
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+
+			return(1);
+		}
+		if (dinoc->di_format != XFS_DINODE_FMT_LOCAL)
+			dblkmap = blkmap_alloc(nextents);
+		break;
+	case XR_INO_CHRDEV:	/* fall through to FIFO case ... */
+	case XR_INO_BLKDEV:	/* fall through to FIFO case ... */
+	case XR_INO_SOCK:	/* fall through to FIFO case ... */
+	case XR_INO_MOUNTPOINT:	/* fall through to FIFO case ... */
+	case XR_INO_FIFO:
+		if (process_misc_ino_types(mp, dino, lino, type))  {
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp, dino, lino);
+				ASSERT(*dirty > 0);
+			}
+
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+
+			return(1);
+		}
+		break;
+	case XR_INO_RTDATA:
+		/*
+		 * if we have no realtime blocks, any inode claiming
+		 * to be a real-time file is bogus
+		 */
+		if (mp->m_sb.sb_rblocks == 0)  {
+			do_warn(
+			"found inode %llu claiming to be a real-time file\n",
+				lino);
+
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp, dino, lino);
+				ASSERT(*dirty > 0);
+			}
+
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+
+			return(1);
+		}
+		break;
+	case XR_INO_RTBITMAP:
+		if (INT_GET(dinoc->di_size, ARCH_CONVERT) != (__int64_t) mp->m_sb.sb_rbmblocks *
+				mp->m_sb.sb_blocksize)  {
+			do_warn(
+	"realtime bitmap inode %llu has bad size %lld (should be %lld)\n",
+				lino, INT_GET(dinoc->di_size, ARCH_CONVERT),
+				(__int64_t) mp->m_sb.sb_rbmblocks *
+				mp->m_sb.sb_blocksize);
+
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp, dino, lino);
+				ASSERT(*dirty > 0);
+			}
+
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+
+			return(1);
+		}
+		dblkmap = blkmap_alloc(nextents);
+		break;
+	case XR_INO_RTSUM:
+		if (INT_GET(dinoc->di_size, ARCH_CONVERT) != mp->m_rsumsize)  {
+			do_warn(
+	"realtime summary inode %llu has bad size %lld (should be %d)\n",
+				lino, INT_GET(dinoc->di_size, ARCH_CONVERT), mp->m_rsumsize);
+
+			if (!no_modify)  {
+				*dirty += clear_dinode(mp, dino, lino);
+				ASSERT(*dirty > 0);
+			}
+
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+
+			return(1);
+		}
+		dblkmap = blkmap_alloc(nextents);
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * check for illegal values of forkoff
+	 */
+	err = 0;
+	if (dinoc->di_forkoff != 0)  {
+		switch (dinoc->di_format)  {
+		case XFS_DINODE_FMT_DEV:
+			if (dinoc->di_forkoff !=
+					(roundup(sizeof(dev_t), 8) >> 3))  {
+				do_warn(
+		"bad attr fork offset %d in dev inode %llu, should be %d\n",
+					(int) dinoc->di_forkoff,
+					lino,
+					(int) (roundup(sizeof(dev_t), 8) >> 3));
+				err = 1;
+			}
+			break;
+		case XFS_DINODE_FMT_UUID:
+			if (dinoc->di_forkoff !=
+					(roundup(sizeof(uuid_t), 8) >> 3))  {
+				do_warn(
+		"bad attr fork offset %d in uuid inode %llu, should be %d\n",
+					(int) dinoc->di_forkoff,
+					lino,
+					(int)(roundup(sizeof(uuid_t), 8) >> 3));
+				err = 1;
+			}
+			break;
+		case XFS_DINODE_FMT_LOCAL:	/* fall through ... */
+		case XFS_DINODE_FMT_EXTENTS:	/* fall through ... */
+		case XFS_DINODE_FMT_BTREE:
+			if (dinoc->di_forkoff != mp->m_attroffset >> 3)  {
+				do_warn(
+		"bad attr fork offset %d in inode %llu, should be %d\n",
+					(int) dinoc->di_forkoff,
+					lino,
+					(int) (mp->m_attroffset >> 3));
+				err = 1;
+			}
+			break;
+		default:
+			do_error("unexpected inode format %d\n",
+				(int) dinoc->di_format);
+			break;
+		}
+	}
+
+	if (err)  {
+		if (!no_modify)  {
+			*dirty += clear_dinode(mp, dino, lino);
+			ASSERT(*dirty > 0);
+		}
+
+		*cleared = 1;
+		*used = is_free;
+		*isa_dir = 0;
+		blkmap_free(dblkmap);
+		return(1);
+	}
+
+	/*
+	 * check data fork -- if it's bad, clear the inode
+	 */
+	nextents = 0;
+	switch (dinoc->di_format) {
+	case XFS_DINODE_FMT_LOCAL:
+		err = process_lclinode(mp, agno, ino, dino, type,
+			dirty, &totblocks, &nextents, &dblkmap,
+			XFS_DATA_FORK, check_dups);
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		err = process_exinode(mp, agno, ino, dino, type,
+			dirty, &totblocks, &nextents, &dblkmap,
+			XFS_DATA_FORK, check_dups);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		err = process_btinode(mp, agno, ino, dino, type,
+			dirty, &totblocks, &nextents, &dblkmap,
+			XFS_DATA_FORK, check_dups);
+		break;
+	case XFS_DINODE_FMT_DEV:	/* fall through */
+	case XFS_DINODE_FMT_UUID:
+		err = 0;
+		break;
+	default:
+		do_error("unknown format %d, ino %llu (mode = %d)\n",
+				dinoc->di_format, lino, INT_GET(dinoc->di_mode, ARCH_CONVERT));
+	}
+
+	if (err)  {
+		/*
+		 * problem in the data fork, clear out the inode
+		 * and get out
+		 */
+		do_warn("bad data fork in inode %llu\n", lino);
+
+		if (!no_modify)  {
+			*dirty += clear_dinode(mp, dino, lino);
+			ASSERT(*dirty > 0);
+		}
+
+		*cleared = 1;
+		*used = is_free;
+		*isa_dir = 0;
+		blkmap_free(dblkmap);
+
+		return(1);
+	}
+
+	if (check_dups)  {
+		/*
+		 * if check_dups was non-zero, we have to
+		 * re-process data fork to set bitmap since the
+		 * bitmap wasn't set the first time through
+		 */
+		switch (dinoc->di_format) {
+		case XFS_DINODE_FMT_LOCAL:
+			err = process_lclinode(mp, agno, ino, dino, type,
+				dirty, &totblocks, &nextents, &dblkmap,
+				XFS_DATA_FORK, 0);
+			break;
+		case XFS_DINODE_FMT_EXTENTS:
+			err = process_exinode(mp, agno, ino, dino, type,
+				dirty, &totblocks, &nextents, &dblkmap,
+				XFS_DATA_FORK, 0);
+			break;
+		case XFS_DINODE_FMT_BTREE:
+			err = process_btinode(mp, agno, ino, dino, type,
+				dirty, &totblocks, &nextents, &dblkmap,
+				XFS_DATA_FORK, 0);
+			break;
+		case XFS_DINODE_FMT_DEV:	/* fall through */
+		case XFS_DINODE_FMT_UUID:
+			err = 0;
+			break;
+		default:
+			do_error("unknown format %d, ino %llu (mode = %d)\n",
+					dinoc->di_format, lino, INT_GET(dinoc->di_mode, ARCH_CONVERT));
+		}
+
+		if (no_modify && err != 0)  {
+			*cleared = 1;
+			*used = is_free;
+			*isa_dir = 0;
+			blkmap_free(dblkmap);
+
+			return(1);
+		}
+
+		ASSERT(err == 0);
+	}
+
+	/*
+	 * check attribute fork if necessary.  attributes are
+	 * always stored in the regular filesystem.
+	 */
+
+	if (!XFS_DFORK_Q_ARCH(dino, ARCH_CONVERT) && dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS) {
+		do_warn("bad attribute format %d in inode %llu, ",
+			dinoc->di_aformat, lino);
+		if (!no_modify) {
+			do_warn("resetting value\n");
+			dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS;
+			*dirty = 1;
+		} else
+			do_warn("would reset value\n");
+		anextents = 0;
+	} else if (XFS_DFORK_Q_ARCH(dino, ARCH_CONVERT)) {
+		switch (dinoc->di_aformat) {
+		case XFS_DINODE_FMT_LOCAL:
+			anextents = 0;
+			err = process_lclinode(mp, agno, ino, dino,
+				type, dirty, &atotblocks, &anextents, &ablkmap,
+				XFS_ATTR_FORK, check_dups);
+			break;
+		case XFS_DINODE_FMT_EXTENTS:
+			ablkmap = blkmap_alloc(anextents);
+			anextents = 0;
+			err = process_exinode(mp, agno, ino, dino,
+				type, dirty, &atotblocks, &anextents, &ablkmap,
+				XFS_ATTR_FORK, check_dups);
+			break;
+		case XFS_DINODE_FMT_BTREE:
+			ablkmap = blkmap_alloc(anextents);
+			anextents = 0;
+			err = process_btinode(mp, agno, ino, dino,
+				type, dirty, &atotblocks, &anextents, &ablkmap,
+				XFS_ATTR_FORK, check_dups);
+			break;
+		default:
+			anextents = 0;
+			do_warn("illegal attribute format %d, ino %llu\n",
+					dinoc->di_aformat, lino);
+			err = 1;
+			break;
+		}
+
+		if (err)  {
+			/*
+			 * clear the attribute fork if necessary.  we can't
+			 * clear the inode because we've already put the
+			 * inode space info into the blockmap.
+			 *
+			 * XXX - put the inode onto the "move it" list and
+			 *	log the the attribute scrubbing
+			 */
+			do_warn("bad attribute fork in inode %llu", lino);
+
+			if (!no_modify)  {
+				if (delete_attr_ok)  {
+					do_warn(", clearing attr fork\n");
+					*dirty += clear_dinode_attr(mp,
+							dino, lino);
+				} else  {
+					do_warn("\n");
+					*dirty += clear_dinode(mp,
+							dino, lino);
+				}
+				ASSERT(*dirty > 0);
+			} else  {
+				do_warn(", would clear attr fork\n");
+			}
+
+			atotblocks = 0;
+			anextents = 0;
+
+			if (delete_attr_ok)  {
+				if (!no_modify)
+					dinoc->di_aformat = XFS_DINODE_FMT_LOCAL;
+			} else  {
+				*cleared = 1;
+				*used = is_free;
+				*isa_dir = 0;
+				blkmap_free(dblkmap);
+				blkmap_free(ablkmap);
+			}
+			return(1);
+			
+		} else if (check_dups)  {
+			switch (dinoc->di_aformat) {
+			case XFS_DINODE_FMT_LOCAL:
+				err = process_lclinode(mp, agno, ino, dino,
+					type, dirty, &atotblocks, &anextents,
+					&ablkmap, XFS_ATTR_FORK, 0);
+				break;
+			case XFS_DINODE_FMT_EXTENTS:
+				err = process_exinode(mp, agno, ino, dino,
+					type, dirty, &atotblocks, &anextents,
+					&ablkmap, XFS_ATTR_FORK, 0);
+				break;
+			case XFS_DINODE_FMT_BTREE:
+				err = process_btinode(mp, agno, ino, dino,
+					type, dirty, &atotblocks, &anextents,
+					&ablkmap, XFS_ATTR_FORK, 0);
+				break;
+			default:
+				do_error("illegal attribute fmt %d, ino %llu\n",
+						dinoc->di_aformat, lino);
+			}
+
+			if (no_modify && err != 0)  {
+				*cleared = 1;
+				*used = is_free;
+				*isa_dir = 0;
+				blkmap_free(dblkmap);
+				blkmap_free(ablkmap);
+
+				return(1);
+			}
+
+			ASSERT(err == 0);
+		}
+
+		/*
+		 * do attribute semantic-based consistency checks now
+		 */
+
+		/* get this only in phase 3, not in both phase 3 and 4 */
+		if (extra_attr_check) {
+		    if ((err = process_attributes(mp, lino, dino, ablkmap,
+				    &repair))) {
+			    do_warn("problem with attribute contents in inode %llu\n",lino);
+			    if(!repair) {
+				    /* clear attributes if not done already */
+				    if (!no_modify)  {
+					    *dirty += clear_dinode_attr(
+							mp, dino, lino);
+					    dinoc->di_aformat =
+						XFS_DINODE_FMT_LOCAL;
+				    } else  {
+					    do_warn("would clear attr fork\n");
+				    }
+				    atotblocks = 0;
+				    anextents = 0; 
+			    }
+			    else {
+				    *dirty = 1; /* it's been repaired */
+			     }
+		    }
+		}
+		blkmap_free(ablkmap);
+
+	} else
+		anextents = 0;
+
+	/* 
+	* enforce totblocks is 0 for misc types 
+	*/
+	if (process_misc_ino_types_blocks(totblocks, lino, type)) {
+		if (!no_modify)  {
+			*dirty += clear_dinode(mp, dino, lino);
+			ASSERT(*dirty > 0);
+		}
+		*cleared = 1;
+		*used = is_free;
+		*isa_dir = 0;
+		blkmap_free(dblkmap);
+
+		return(1);
+	}
+
+	/*
+	 * correct space counters if required
+	 */
+	if (totblocks + atotblocks != INT_GET(dinoc->di_nblocks, ARCH_CONVERT))  {
+		if (!no_modify)  {
+	do_warn("correcting nblocks for inode %llu, was %llu - counted %llu\n",
+				lino, INT_GET(dinoc->di_nblocks, ARCH_CONVERT),
+				totblocks + atotblocks);
+			*dirty = 1;
+			INT_SET(dinoc->di_nblocks, ARCH_CONVERT, totblocks + atotblocks);
+		} else  {
+		do_warn(
+	"bad nblocks %llu for inode %llu, would reset to %llu\n",
+				INT_GET(dinoc->di_nblocks, ARCH_CONVERT), lino,
+				totblocks + atotblocks);
+		}
+	}
+
+	if (nextents > MAXEXTNUM)  {
+		do_warn("too many data fork extents (%llu) in inode %llu\n",
+			nextents, lino);
+
+		if (!no_modify)  {
+			*dirty += clear_dinode(mp, dino, lino);
+			ASSERT(*dirty > 0);
+		}
+		*cleared = 1;
+		*used = is_free;
+		*isa_dir = 0;
+		blkmap_free(dblkmap);
+
+		return(1);
+	}
+	if (nextents != INT_GET(dinoc->di_nextents, ARCH_CONVERT))  {
+		if (!no_modify)  {
+	do_warn("correcting nextents for inode %llu, was %d - counted %llu\n",
+				lino, INT_GET(dinoc->di_nextents, ARCH_CONVERT), nextents);
+			*dirty = 1;
+			INT_SET(dinoc->di_nextents, ARCH_CONVERT, (xfs_extnum_t) nextents);
+		} else  {
+			do_warn(
+		"bad nextents %d for inode %llu, would reset to %llu\n",
+				INT_GET(dinoc->di_nextents, ARCH_CONVERT), lino, nextents);
+		}
+	}
+
+	if (anextents > MAXAEXTNUM)  {
+		do_warn("too many attr fork extents (%llu) in inode %llu\n",
+			anextents, lino);
+
+		if (!no_modify)  {
+			*dirty += clear_dinode(mp, dino, lino);
+			ASSERT(*dirty > 0);
+		}
+		*cleared = 1;
+		*used = is_free;
+		*isa_dir = 0;
+		blkmap_free(dblkmap);
+
+		return(1);
+	}
+	if (anextents != INT_GET(dinoc->di_anextents, ARCH_CONVERT))  {
+		if (!no_modify)  {
+	do_warn("correcting anextents for inode %llu, was %d - counted %llu\n",
+				lino, INT_GET(dinoc->di_anextents, ARCH_CONVERT), anextents);
+			*dirty = 1;
+			INT_SET(dinoc->di_anextents, ARCH_CONVERT, (xfs_aextnum_t) anextents);
+		} else  {
+			do_warn(
+		"bad anextents %d for inode %llu, would reset to %llu\n",
+				INT_GET(dinoc->di_anextents, ARCH_CONVERT), lino, anextents);
+		}
+	}
+
+	/*
+	 * do any semantic type-based checking here
+	 */
+	switch (type)  {
+	case XR_INO_DIR:
+		if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+			err = process_dir2(mp, lino, dino, ino_discovery,
+					dirty, "", parent, dblkmap);
+		else
+			err = process_dir(mp, lino, dino, ino_discovery,
+					dirty, "", parent, dblkmap);
+		if (err)
+			do_warn(
+			"problem with directory contents in inode %llu\n",
+				lino);
+		break;
+	case XR_INO_RTBITMAP:
+		/* process_rtbitmap XXX */
+		err = 0;
+		break;
+	case XR_INO_RTSUM:
+		/* process_rtsummary XXX */
+		err = 0;
+		break;
+	case XR_INO_SYMLINK:
+		if ((err = process_symlink(mp, lino, dino, dblkmap)))
+			do_warn("problem with symbolic link in inode %llu\n",
+				lino);
+		break;
+	case XR_INO_DATA:	/* fall through to FIFO case ... */
+	case XR_INO_RTDATA:	/* fall through to FIFO case ... */
+	case XR_INO_CHRDEV:	/* fall through to FIFO case ... */
+	case XR_INO_BLKDEV:	/* fall through to FIFO case ... */
+	case XR_INO_SOCK:	/* fall through to FIFO case ... */
+	case XR_INO_FIFO:
+		err = 0;
+		break;
+	default:
+		printf("Unexpected inode type\n");
+		abort();
+	}
+
+	blkmap_free(dblkmap);
+
+	if (err)  {
+		/*
+		 * problem in the inode type-specific semantic
+		 * checking, clear out the inode and get out
+		 */
+		if (!no_modify)  {
+			*dirty += clear_dinode(mp, dino, lino);
+			ASSERT(*dirty > 0);
+		}
+		*cleared = 1;
+		*used = is_free;
+		*isa_dir = 0;
+
+		return(1);
+	}
+
+	/*
+	 * check nlinks feature, if it's a version 1 inode,
+	 * just leave nlinks alone.  even if it's set wrong,
+	 * it'll be reset when read in.
+	 */
+	if (dinoc->di_version > XFS_DINODE_VERSION_1 && !fs_inode_nlink)  {
+		/*
+		 * do we have a fs/inode version mismatch with a valid
+		 * version 2 inode here that has to stay version 2 or
+		 * lose links?
+		 */
+		if (INT_GET(dinoc->di_nlink, ARCH_CONVERT) > XFS_MAXLINK_1)  {
+			/*
+			 * yes.  are nlink inodes allowed?
+			 */
+			if (fs_inode_nlink_allowed)  {
+				/*
+				 * yes, update status variable which will
+				 * cause sb to be updated later.
+				 */
+				fs_inode_nlink = 1;
+				do_warn(
+				"version 2 inode %llu claims > %u links,",
+					lino, XFS_MAXLINK_1);
+				if (!no_modify)  {
+					do_warn(
+			"updating superblock version number\n");
+				} else  {
+					do_warn(
+			"would update superblock version number\n");
+				}
+			} else  {
+				/*
+				 * no, have to convert back to onlinks
+				 * even if we lose some links
+				 */
+				do_warn(
+			"WARNING:  version 2 inode %llu claims > %u links,",
+					lino, XFS_MAXLINK_1);
+				if (!no_modify)  {
+					do_warn(
+	"converting back to version 1,\n\tthis may destroy %d links\n",
+						INT_GET(dinoc->di_nlink, ARCH_CONVERT)
+						- XFS_MAXLINK_1);
+
+					dinoc->di_version =
+						XFS_DINODE_VERSION_1;
+					INT_SET(dinoc->di_nlink, ARCH_CONVERT, XFS_MAXLINK_1);
+					INT_SET(dinoc->di_onlink, ARCH_CONVERT, XFS_MAXLINK_1);
+
+					*dirty = 1;
+				} else  {
+					do_warn(
+	"would convert back to version 1,\n\tthis might destroy %d links\n",
+						INT_GET(dinoc->di_nlink, ARCH_CONVERT)
+						- XFS_MAXLINK_1);
+				}
+			}
+		} else  {
+			/*
+			 * do we have a v2 inode that we could convert back
+			 * to v1 without losing any links?  if we do and
+			 * we have a mismatch between superblock bits and the
+			 * version bit, alter the version bit in this case.
+			 *
+			 * the case where we lost links was handled above.
+			 */
+			do_warn("found version 2 inode %llu, ", lino);
+			if (!no_modify)  {
+				do_warn("converting back to version 1\n");
+
+				dinoc->di_version =
+					XFS_DINODE_VERSION_1;
+				INT_SET(dinoc->di_onlink, ARCH_CONVERT, INT_GET(dinoc->di_nlink, ARCH_CONVERT));
+
+				*dirty = 1;
+			} else  {
+				do_warn("would convert back to version 1\n");
+			}
+		}
+	}
+
+	/*
+	 * ok, if it's still a version 2 inode, it's going
+	 * to stay a version 2 inode.  it should have a zero
+	 * onlink field, so clear it.
+	 */
+	if (dinoc->di_version > XFS_DINODE_VERSION_1 &&
+			INT_GET(dinoc->di_onlink, ARCH_CONVERT) > 0 && fs_inode_nlink > 0)  {
+		if (!no_modify)  {
+			do_warn(
+"clearing obsolete nlink field in version 2 inode %llu, was %d, now 0\n",
+				lino, INT_GET(dinoc->di_onlink, ARCH_CONVERT));
+			INT_ZERO(dinoc->di_onlink, ARCH_CONVERT);
+			*dirty = 1;
+		} else  {
+			do_warn(
+"would clear obsolete nlink field in version 2 inode %llu, currently %d\n",
+				lino, INT_GET(dinoc->di_onlink, ARCH_CONVERT));
+			*dirty = 1;
+		}
+	}
+
+	return(retval > 0 ? 1 : 0);
+}
+
+/*
+ * returns 1 if inode is used, 0 if free.
+ * performs any necessary salvaging actions.
+ * note that we leave the generation count alone
+ * because nothing we could set it to would be
+ * guaranteed to be correct so the best guess for
+ * the correct value is just to leave it alone.
+ *
+ * The trick is detecting empty files.  For those,
+ * the core and the forks should all be in the "empty"
+ * or zero-length state -- a zero or possibly minimum length
+ * (in the case of dirs) extent list -- although inline directories
+ * and symlinks might be handled differently.  So it should be
+ * possible to sanity check them against each other.
+ *
+ * If the forks are an empty extent list though, then forget it.
+ * The file is toast anyway since we can't recover its storage.
+ *
+ * Parameters:
+ *	Ins:
+ *		mp -- mount structure
+ *		dino -- pointer to on-disk inode structure
+ *		agno/ino -- inode numbers
+ *		free -- whether the map thinks the inode is free (1 == free)
+ *		ino_discovery -- whether we should examine directory
+ *				contents to discover new inodes
+ *		check_dups -- whether we should check to see if the
+ *				inode references duplicate blocks
+ *				if so, we compare the inode's claimed
+ *				blocks against the contents of the
+ *				duplicate extent list but we don't
+ *				set the bitmap.  If not, we set the
+ *				bitmap and try and detect multiply
+ *				claimed blocks using the bitmap.
+ *	Outs:
+ *		dirty -- whether we changed the inode (1 == yes)
+ *		cleared -- whether we cleared the inode (1 == yes).  In
+ *				no modify mode, if we would have cleared it
+ *		used -- 1 if the inode is used, 0 if free.  In no modify
+ *			mode, whether the inode should be used or free
+ *		isa_dir -- 1 if the inode is a directory, 0 if not.  In
+ *			no modify mode, if the inode would be a dir or not.
+ *
+ *	Return value -- 0 if the inode is good, 1 if it is/was corrupt
+ */
+
+int
+process_dinode(xfs_mount_t *mp,
+		xfs_dinode_t *dino,
+		xfs_agnumber_t agno,
+		xfs_agino_t ino,
+		int was_free,
+		int *dirty,
+		int *cleared,
+		int *used,
+		int ino_discovery,
+		int check_dups,
+		int extra_attr_check,
+		int *isa_dir,
+		xfs_ino_t *parent)
+{
+	const int verify_mode = 0;
+	const int uncertain = 0;
+
+#ifdef XR_INODE_TRACE
+	fprintf(stderr, "processing inode %d/%d\n", agno, ino);
+#endif
+	return(process_dinode_int(mp, dino, agno, ino, was_free, dirty,
+				cleared, used, verify_mode, uncertain,
+				ino_discovery, check_dups, extra_attr_check,
+				isa_dir, parent));
+}
+
+/*
+ * a more cursory check, check inode core, *DON'T* check forks
+ * this basically just verifies whether the inode is an inode
+ * and whether or not it has been totally trashed.  returns 0
+ * if the inode passes the cursory sanity check, 1 otherwise.
+ */
+int
+verify_dinode(xfs_mount_t *mp,
+		xfs_dinode_t *dino,
+		xfs_agnumber_t agno,
+		xfs_agino_t ino)
+{
+	xfs_ino_t parent;
+	int cleared = 0;
+	int used = 0;
+	int dirty = 0;
+	int isa_dir = 0;
+	const int verify_mode = 1;
+	const int check_dups = 0;
+	const int ino_discovery = 0;
+	const int uncertain = 0;
+
+	return(process_dinode_int(mp, dino, agno, ino, 0, &dirty,
+				&cleared, &used, verify_mode,
+				uncertain, ino_discovery, check_dups,
+				0, &isa_dir, &parent));
+}
+
+/*
+ * like above only for inode on the uncertain list.  it sets
+ * the uncertain flag which makes process_dinode_int quieter.
+ * returns 0 if the inode passes the cursory sanity check, 1 otherwise.
+ */
+int
+verify_uncertain_dinode(xfs_mount_t *mp,
+		xfs_dinode_t *dino,
+		xfs_agnumber_t agno,
+		xfs_agino_t ino)
+{
+	xfs_ino_t parent;
+	int cleared = 0;
+	int used = 0;
+	int dirty = 0;
+	int isa_dir = 0;
+	const int verify_mode = 1;
+	const int check_dups = 0;
+	const int ino_discovery = 0;
+	const int uncertain = 1;
+
+	return(process_dinode_int(mp, dino, agno, ino, 0, &dirty,
+				&cleared, &used, verify_mode,
+				uncertain, ino_discovery, check_dups,
+				0, &isa_dir, &parent));
+}
diff --git a/repair/dinode.h b/repair/dinode.h
new file mode 100644
index 000000000..196068af2
--- /dev/null
+++ b/repair/dinode.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef _XR_DINODE_H
+#define _XR_DINODE_H
+
+struct blkmap;
+
+int
+verify_agbno(xfs_mount_t	*mp,
+		xfs_agnumber_t	agno,
+		xfs_agblock_t	agbno);
+
+int
+verify_dfsbno(xfs_mount_t	*mp,
+		xfs_dfsbno_t	fsbno);
+
+void
+convert_extent(
+	xfs_bmbt_rec_32_t	*rp,
+	xfs_dfiloff_t		*op,	/* starting offset (blockno in file) */
+	xfs_dfsbno_t		*sp,	/* starting block (fs blockno) */
+	xfs_dfilblks_t		*cp,	/* blockcount */
+	int			*fp);	/* extent flag */
+
+int	
+process_bmbt_reclist(xfs_mount_t	*mp,
+		xfs_bmbt_rec_32_t	*rp,
+		int			numrecs,
+		int			type,
+		xfs_ino_t		ino,
+		xfs_drfsbno_t		*tot,
+		struct blkmap		**blkmapp,
+		__uint64_t		*first_key,
+		__uint64_t		*last_key,
+		int			whichfork);
+
+int
+scan_bmbt_reclist(
+	xfs_mount_t		*mp,
+	xfs_bmbt_rec_32_t	*rp,
+	int			numrecs,
+	int			type,
+	xfs_ino_t		ino,
+	xfs_drfsbno_t		*tot,
+	int			whichfork);
+
+int
+verify_inode_chunk(xfs_mount_t		*mp,
+			xfs_ino_t	ino,
+			xfs_ino_t	*start_ino);
+
+int	verify_aginode_chunk(xfs_mount_t	*mp,
+				xfs_agnumber_t	agno,
+				xfs_agino_t	agino,
+				xfs_agino_t	*agino_start);
+
+int
+clear_dinode(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num);
+
+void
+update_rootino(xfs_mount_t *mp);
+
+int
+process_dinode(xfs_mount_t *mp,
+		xfs_dinode_t *dino,
+		xfs_agnumber_t agno,
+		xfs_agino_t ino,
+		int was_free,
+		int *dirty,
+		int *tossit,
+		int *used,
+		int check_dirs,
+		int check_dups,
+		int extra_attr_check,
+		int *isa_dir,
+		xfs_ino_t *parent);
+
+int
+verify_dinode(xfs_mount_t *mp,
+		xfs_dinode_t *dino,
+		xfs_agnumber_t agno,
+		xfs_agino_t ino);
+
+int
+verify_uncertain_dinode(xfs_mount_t *mp,
+		xfs_dinode_t *dino,
+		xfs_agnumber_t agno,
+		xfs_agino_t ino);
+
+int
+verify_inum(xfs_mount_t		*mp,
+		xfs_ino_t	ino);
+
+int
+verify_aginum(xfs_mount_t	*mp,
+		xfs_agnumber_t	agno,
+		xfs_agino_t	agino);
+
+int
+process_uncertain_aginodes(xfs_mount_t		*mp,
+				xfs_agnumber_t	agno);
+void
+process_aginodes(xfs_mount_t	*mp,
+		xfs_agnumber_t	agno,
+		int		check_dirs,
+		int		check_dups,
+		int		extra_attr_check);
+
+void
+check_uncertain_aginodes(xfs_mount_t	*mp,
+			xfs_agnumber_t	agno);
+
+xfs_buf_t *
+get_agino_buf(xfs_mount_t	*mp,
+		xfs_agnumber_t	agno,
+		xfs_agino_t	agino,
+		xfs_dinode_t	**dipp);
+
+xfs_dfsbno_t
+get_bmapi(xfs_mount_t		*mp,
+		xfs_dinode_t	*dip,
+		xfs_ino_t	ino_num,
+		xfs_dfiloff_t	bno,
+	        int             whichfork );
+
+#endif /* _XR_DINODE_H */
diff --git a/repair/dir.c b/repair/dir.c
new file mode 100644
index 000000000..4854b54e5
--- /dev/null
+++ b/repair/dir.c
@@ -0,0 +1,3033 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "dir.h"
+#include "bmap.h"
+
+#if XFS_DIR_LEAF_MAPSIZE >= XFS_ATTR_LEAF_MAPSIZE
+#define XR_DA_LEAF_MAPSIZE	XFS_DIR_LEAF_MAPSIZE
+#else
+#define XR_DA_LEAF_MAPSIZE	XFS_ATTR_LEAF_MAPSIZE
+#endif
+
+
+
+typedef struct da_hole_map  {
+	int	lost_holes;
+	int	num_holes;
+	struct {
+		int	base;
+		int	size;
+	} hentries[XR_DA_LEAF_MAPSIZE];
+} da_hole_map_t;
+
+/*
+ * takes a name and length (name need not be null-terminated)
+ * and returns 1 if the name contains a '/' or a \0, returns 0
+ * otherwise
+ */
+int
+namecheck(char *name, int length)
+{
+	char *c;
+	int i;
+
+	ASSERT(length < MAXNAMELEN);
+
+	for (c = name, i = 0; i < length; i++, c++)  {
+		if (*c == '/' || *c == '\0')
+			return(1);
+	}
+
+	return(0);
+}
+
+/*
+ * this routine performs inode discovery and tries to fix things
+ * in place.  available redundancy -- inode data size should match
+ * used directory space in inode.  returns number of valid directory
+ * entries.  a non-zero return value means the directory is bogus
+ * and should be blasted.
+ */
+/* ARGSUSED */
+int
+process_shortform_dir(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dino_dirty,	/* out - 1 if dinode buffer dirty? */
+	xfs_ino_t	*parent,	/* out - NULLFSINO if entry doesn't exist */
+	char		*dirname,	/* directory pathname */
+	int		*repair)	/* out - 1 if dir was fixed up */
+{
+	xfs_dir_shortform_t	*sf;
+	xfs_dir_sf_entry_t	*sf_entry, *next_sfe, *tmp_sfe;
+	xfs_ino_t		lino;
+	int			max_size;
+	__int64_t		ino_dir_size;
+	int			num_entries;
+	int			ino_off;
+	int			namelen;
+	int			i;
+	int			junkit;
+	int			tmp_len;
+	int			tmp_elen;
+	int			bad_sfnamelen;
+	ino_tree_node_t		*irec_p;
+	char			name[MAXNAMELEN + 1];
+
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "process_shortform_dir - inode %llu\n", ino);
+#endif
+
+	sf = &dip->di_u.di_dirsf;
+
+	max_size = XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT);
+	num_entries = INT_GET(sf->hdr.count, ARCH_CONVERT);
+	ino_dir_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
+	*repair = 0;
+
+	ASSERT(ino_dir_size <= max_size);
+
+	/*
+	 * check for bad entry count
+	 */
+	if (num_entries * sizeof(xfs_dir_sf_entry_t) + sizeof(xfs_dir_sf_hdr_t)
+			> max_size || num_entries == 0)
+		num_entries = 0xFF;
+
+	/*
+	 * run through entries, stop at first bad entry, don't need
+	 * to check for .. since that's encoded in its own field
+	 */
+	sf_entry = next_sfe = &sf->list[0];
+	for (i = 0; i < num_entries && ino_dir_size >
+				(__psint_t)next_sfe - (__psint_t)sf; i++)  {
+		tmp_sfe = NULL;
+		sf_entry = next_sfe;
+		junkit = 0;
+		bad_sfnamelen = 0;
+		XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT);
+
+		/*
+		 * if entry points to self, junk it since only '.' or '..'
+		 * should do that and shortform dirs don't contain either
+		 * entry.  if inode number is invalid, trash entry.
+		 * if entry points to special inodes, trash it.
+		 * if inode is unknown but number is valid,
+		 * add it to the list of uncertain inodes.  don't
+		 * have to worry about an entry pointing to a
+		 * deleted lost+found inode because the entry was
+		 * deleted at the same time that the inode was cleared.
+		 */
+		if (lino == ino)  {
+			junkit = 1;
+		} else if (verify_inum(mp, lino))  {
+			/*
+			 * junk the entry, mark lino as NULL since it's bad
+			 */
+			do_warn("invalid inode number %llu in directory %llu\n",
+				lino, ino);
+			lino = NULLFSINO;
+			junkit = 1;
+		} else if (lino == mp->m_sb.sb_rbmino)  {
+			do_warn(
+	"entry in shorform dir %llu references realtime bitmap inode %llu\n",
+				ino, lino);
+			junkit = 1;
+		} else if (lino == mp->m_sb.sb_rsumino)  {
+			do_warn(
+	"entry in shorform dir %llu references realtime summary inode %llu\n",
+				ino, lino);
+			junkit = 1;
+		} else if (lino == mp->m_sb.sb_uquotino)  {
+			do_warn(
+	"entry in shorform dir %llu references user quota inode %llu\n",
+				ino, lino);
+			junkit = 1;
+		} else if (lino == mp->m_sb.sb_pquotino)  {
+			do_warn(
+	"entry in shorform dir %llu references proj quota inode %llu\n",
+				ino, lino);
+			junkit = 1;
+		} else if ((irec_p = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+					XFS_INO_TO_AGINO(mp, lino))) != NULL)  {
+			/*
+			 * if inode is marked free and we're in inode
+			 * discovery mode, leave the entry alone for now.
+			 * if the inode turns out to be used, we'll figure
+			 * that out when we scan it.  If the inode really
+			 * is free, we'll hit this code again in phase 4
+			 * after we've finished inode discovery and blow
+			 * out the entry then.
+			 */
+			ino_off = XFS_INO_TO_AGINO(mp, lino) -
+				irec_p->ino_startnum;
+			ASSERT(is_inode_confirmed(irec_p, ino_off));
+
+			if (!ino_discovery && is_inode_free(irec_p, ino_off))  {
+				do_warn(
+	"entry references free inode %llu in shortform directory %llu\n",
+					lino, ino);
+				junkit = 1;
+			}
+		} else if (ino_discovery) {
+			/*
+			 * put the inode on the uncertain list.  we'll
+			 * pull the inode off the list and check it later.
+			 * if the inode turns out be bogus, we'll delete
+			 * this entry in phase 6.
+			 */
+			add_inode_uncertain(mp, lino, 0);
+		} else  {
+			/*
+			 * blow the entry out.  we know about all
+			 * undiscovered entries now (past inode discovery
+			 * phase) so this is clearly a bogus entry.
+			 */
+			do_warn(
+	"entry references non-existent inode %llu in shortform dir %llu\n",
+					lino, ino);
+			junkit = 1;
+		}
+
+		namelen = sf_entry->namelen;
+
+		if (namelen == 0)  {
+			/*
+			 * if we're really lucky, this is
+			 * the last entry in which case we
+			 * can use the dir size to set the
+			 * namelen value.  otherwise, forget
+			 * it because we're not going to be
+			 * able to find the next entry.
+			 */
+			bad_sfnamelen = 1;
+
+			if (i == num_entries - 1)  {
+				namelen = ino_dir_size -
+					((__psint_t) &sf_entry->name[0] -
+					 (__psint_t) sf);
+				if (!no_modify)  {
+					do_warn(
+		"zero length entry in shortform dir %llu, resetting to %d\n",
+						ino, namelen);
+					sf_entry->namelen = namelen;
+				} else  {
+					do_warn(
+		"zero length entry in shortform dir %llu, would set to %d\n",
+						ino, namelen);
+				}
+			} else  {
+				do_warn(
+	"zero length entry in shortform dir %llu",
+					ino);
+				if (!no_modify)
+					do_warn(", junking %d entries\n",
+						num_entries - i);
+				else
+					do_warn(", would junk %d entries\n",
+						num_entries - i);
+				/*
+				 * don't process the rest of the directory,
+				 * break out of processing looop
+				 */
+				break;
+			}
+		} else if ((__psint_t) sf_entry - (__psint_t) sf +
+				+ XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)
+				> ino_dir_size)  {
+			bad_sfnamelen = 1;
+
+			if (i == num_entries - 1)  {
+				namelen = ino_dir_size -
+					((__psint_t) &sf_entry->name[0] -
+					 (__psint_t) sf);
+				do_warn(
+	"size of last entry overflows space left in in shortform dir %llu, ",
+					ino);
+				if (!no_modify)  {
+					do_warn("resetting to %d\n",
+						namelen);
+					sf_entry->namelen = namelen;
+					*dino_dirty = 1;
+				} else  {
+					do_warn("would reset to %d\n",
+						namelen);
+				}
+			} else  {
+				do_warn(
+	"size of entry #%d overflows space left in in shortform dir %llu\n",
+					i, ino);
+				if (!no_modify)  {
+					if (i == num_entries - 1)
+						do_warn("junking entry #%d\n",
+							i);
+					else
+						do_warn(
+						"junking %d entries\n",
+							num_entries - i);
+				} else  {
+					if (i == num_entries - 1)
+						do_warn(
+						"would junk entry #%d\n",
+							i);
+					else
+						do_warn(
+						"would junk %d entries\n",
+							num_entries - i);
+				}
+
+				break;
+			}
+		}
+
+		/*
+		 * check for illegal chars in name.
+		 * no need to check for bad length because
+		 * the length value is stored in a byte
+		 * so it can't be too big, it can only wrap
+		 */
+		if (namecheck((char *)&sf_entry->name[0], namelen))  {
+			/*
+			 * junk entry
+			 */
+			do_warn(
+		"entry contains illegal character in shortform dir %llu\n",
+				ino);
+			junkit = 1;
+		}
+
+		/*
+		 * junk the entry by copying up the rest of the
+		 * fork over the current entry and decrementing
+		 * the entry count.  if we're in no_modify mode,
+		 * just issue the warning instead.  then continue
+		 * the loop with the next_sfe pointer set to the
+		 * correct place in the fork and other counters
+		 * properly set to reflect the deletion if it
+		 * happened.
+		 */
+		if (junkit)  {
+			bcopy(sf_entry->name, name, namelen);
+			name[namelen] = '\0';
+
+			if (!no_modify)  {
+				tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry);
+				INT_MOD(dip->di_core.di_size, ARCH_CONVERT, -(tmp_elen));
+				ino_dir_size -= tmp_elen;
+
+				tmp_sfe = (xfs_dir_sf_entry_t *)
+					((__psint_t) sf_entry + tmp_elen);
+				tmp_len = max_size - ((__psint_t) tmp_sfe
+							- (__psint_t) sf);
+
+				memmove(sf_entry, tmp_sfe, tmp_len);
+
+				INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+				num_entries--;
+				bzero((void *) ((__psint_t) sf_entry + tmp_len),
+					tmp_elen);
+
+				/*
+				 * reset the tmp value to the current
+				 * pointer so we'll process the entry
+				 * we just moved up
+				 */
+				tmp_sfe = sf_entry;
+
+				/*
+				 * WARNING:  drop the index i by one
+				 * so it matches the decremented count
+				 * for accurate comparisons later
+				 */
+				i--;
+
+				*dino_dirty = 1;
+				*repair = 1;
+
+				do_warn(
+			"junking entry \"%s\" in directory inode %llu\n",
+					name, ino);
+			} else  {
+				do_warn(
+		"would have junked entry \"%s\" in directory inode %llu\n",
+					name, ino);
+			}
+		}
+
+		/*
+		 * go onto next entry unless we've just junked an
+		 * entry in which the current entry pointer points
+		 * to an unprocessed entry.  have to take into zero-len
+		 * entries into account in no modify mode since we
+		 * calculate size based on next_sfe.
+		 */
+		next_sfe = (tmp_sfe == NULL)
+			? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry
+				+ ((!bad_sfnamelen)
+					? XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)
+					: sizeof(xfs_dir_sf_entry_t) - 1
+						+ namelen))
+			: tmp_sfe;
+	}
+
+	/* sync up sizes and entry counts */
+
+	if (INT_GET(sf->hdr.count, ARCH_CONVERT) != i)  {
+		if (no_modify)  {
+do_warn("would have corrected entry count in directory %llu from %d to %d\n",
+			ino, INT_GET(sf->hdr.count, ARCH_CONVERT), i);
+		} else  {
+do_warn("corrected entry count in directory %llu, was %d, now %d\n",
+			ino, INT_GET(sf->hdr.count, ARCH_CONVERT), i);
+			INT_SET(sf->hdr.count, ARCH_CONVERT, i);
+			*dino_dirty = 1;
+			*repair = 1;
+		}
+	}
+
+	if ((__psint_t) next_sfe - (__psint_t) sf != ino_dir_size)  {
+		if (no_modify)  {
+			do_warn(
+		"would have corrected directory %llu size from %lld to %lld\n",
+				ino, (__int64_t) ino_dir_size,
+			(__int64_t)((__psint_t) next_sfe - (__psint_t) sf));
+		} else  {
+			do_warn(
+			"corrected directory %llu size, was %lld, now %lld\n",
+				ino, (__int64_t) ino_dir_size,
+			(__int64_t)((__psint_t) next_sfe - (__psint_t) sf));
+
+			INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t)
+					((__psint_t) next_sfe - (__psint_t) sf));
+			*dino_dirty = 1;
+			*repair = 1;
+		}
+	}
+	/*
+	 * check parent (..) entry
+	 */
+	XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, parent, ARCH_CONVERT);
+
+	/*
+	 * if parent entry is bogus, null it out.  we'll fix it later .
+	 */
+	if (verify_inum(mp, *parent))  {
+		*parent = NULLFSINO;
+
+		do_warn(
+	"bogus .. inode number (%llu) in directory inode %llu,",
+				*parent, ino);
+		if (!no_modify)  {
+			do_warn("clearing inode number\n");
+
+			XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT);
+			*dino_dirty = 1;
+			*repair = 1;
+		} else  {
+			do_warn("would clear inode number\n");
+		}
+	} else if (ino == mp->m_sb.sb_rootino && ino != *parent) {
+		/*
+		 * root directories must have .. == .
+		 */
+		if (!no_modify)  {
+			do_warn(
+	"corrected root directory %llu .. entry, was %llu, now %llu\n",
+				ino, *parent, ino);
+			*parent = ino;
+			XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT);
+			*dino_dirty = 1;
+			*repair = 1;
+		} else  {
+			do_warn(
+	"would have corrected root directory %llu .. entry from %llu to %llu\n",
+				ino, *parent, ino);
+		}
+	} else if (ino == *parent && ino != mp->m_sb.sb_rootino)  {
+		/*
+		 * likewise, non-root directories can't have .. pointing
+		 * to .
+		 */
+		*parent = NULLFSINO;
+		do_warn("bad .. entry in dir ino %llu, points to self,",
+			ino);
+		if (!no_modify)  {
+			do_warn(" clearing inode number\n");
+
+			XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT);
+			*dino_dirty = 1;
+			*repair = 1;
+		} else  {
+			do_warn(" would clear inode number\n");
+		}
+	}
+
+	return(0);
+}
+
+/*
+ * freespace map for directory leaf blocks (1 bit per byte)
+ * 1 == used, 0 == free
+ */
+static da_freemap_t dir_freemap[DA_BMAP_SIZE];
+
+#if 0
+unsigned char *
+alloc_da_freemap(xfs_mount_t *mp)
+{
+	unsigned char *freemap;
+
+	if ((freemap = malloc(mp->m_sb.sb_blocksize)) == NULL)
+		return(NULL);
+
+	bzero(freemap, mp->m_sb.sb_blocksize/NBBY);
+
+	return(freemap);
+}
+#endif
+
+void
+init_da_freemap(da_freemap_t *dir_freemap)
+{
+	bzero(dir_freemap, sizeof(da_freemap_t) * DA_BMAP_SIZE);
+}
+
+/*
+ * sets directory freemap, returns 1 if there is a conflict
+ * returns 0 if everything's good.  the range [start, stop) is set.
+ * right now, we just use the static array since only one directory
+ * block will be processed at once even though the interface allows
+ * you to pass in arbitrary da_freemap_t array's.
+ *
+ * Within a char, the lowest bit of the char represents the byte with
+ * the smallest address
+ */
+int
+set_da_freemap(xfs_mount_t *mp, da_freemap_t *map, int start, int stop)
+{
+	const da_freemap_t mask = 0x1;
+	int i;
+
+	if (start > stop)  {
+		/*
+		 * allow == relation since [x, x) claims 1 byte
+		 */
+		do_warn("bad range claimed [%d, %d) in da block\n",
+			start, stop);
+		return(1);
+	}
+
+	if (stop > mp->m_sb.sb_blocksize)  {
+		do_warn(
+		"byte range end [%d %d) in da block larger than blocksize %d\n",
+			start, stop, mp->m_sb.sb_blocksize);
+		return(1);
+	}
+
+	for (i = start; i < stop; i ++)  {
+		if (map[i / NBBY] & (mask << i % NBBY))  {
+			do_warn("multiply claimed byte %d in da block\n", i);
+			return(1);
+		}
+		map[i / NBBY] |= (mask << i % NBBY);
+	}
+
+	return(0);
+}
+
+/*
+ * returns 0 if holemap is consistent with reality (as expressed by
+ * the da_freemap_t).  returns 1 if there's a conflict.
+ */
+int
+verify_da_freemap(xfs_mount_t *mp, da_freemap_t *map, da_hole_map_t *holes,
+			xfs_ino_t ino, xfs_dablk_t da_bno)
+{
+	int i, j, start, len;
+	const da_freemap_t mask = 0x1;
+
+	for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++)  {
+		if (holes->hentries[i].size == 0)
+			continue;
+		
+		start = holes->hentries[i].base;
+		len = holes->hentries[i].size;
+
+		if (start >= mp->m_sb.sb_blocksize ||
+				start + len > mp->m_sb.sb_blocksize)  {
+			do_warn(
+	"hole (start %d, len %d) out of range, block %d, dir ino %llu\n",
+				start, len, da_bno, ino);
+			return(1);
+		}
+
+		for (j = start; j < start + len; j++)  {
+			if ((map[j / NBBY] & (mask << (j % NBBY))) != 0)  {
+				/*
+				 * bad news -- hole claims a used byte is free
+				 */
+				do_warn(
+		"hole claims used byte %d, block %d, dir ino %llu\n",
+					j, da_bno, ino);
+				return(1);
+			}
+		}
+	}
+
+	return(0);
+}
+
+void
+process_da_freemap(xfs_mount_t *mp, da_freemap_t *map, da_hole_map_t *holes)
+{
+	int i, j, in_hole, start, length, smallest, num_holes;
+	const da_freemap_t mask = 0x1;
+
+	num_holes = in_hole = start = length = 0;
+
+	for (i = 0; i < mp->m_sb.sb_blocksize; i++)  {
+		if ((map[i / NBBY] & (mask << (i % NBBY))) == 0)  {
+			/*
+			 * byte is free (unused)
+			 */
+			if (in_hole == 1)
+				continue;
+			/*
+			 * start of a new hole
+			 */
+			in_hole = 1;
+			start = i;
+		} else  {
+			/*
+			 * byte is used
+			 */
+			if (in_hole == 0)
+				continue;
+			/*
+			 * end of a hole
+			 */
+			in_hole = 0;
+			/*
+			 * if the hole disappears, throw it away
+			 */
+			length = i - start;
+
+			if (length <= 0)
+				continue;
+
+			num_holes++;
+
+			for (smallest = j = 0; j < XR_DA_LEAF_MAPSIZE; j++)  {
+				if (holes->hentries[j].size <
+						holes->hentries[smallest].size)
+					smallest = j;
+
+			}
+			if (length > holes->hentries[smallest].size)  {
+				holes->hentries[smallest].base = start;
+				holes->hentries[smallest].size = length;
+			}
+		}
+	}
+
+	/*
+	 * see if we have a big hole at the end
+	 */
+	if (in_hole == 1)  {
+		/*
+		 * duplicate of hole placement code above
+		 */
+		length = i - start;
+
+		if (length > 0)  {
+			num_holes++;
+
+			for (smallest = j = 0; j < XR_DA_LEAF_MAPSIZE; j++)  {
+				if (holes->hentries[j].size <
+						holes->hentries[smallest].size)
+					smallest = j;
+
+			}
+			if (length > holes->hentries[smallest].size)  {
+				holes->hentries[smallest].base = start;
+				holes->hentries[smallest].size = length;
+			}
+		}
+	}
+
+	holes->lost_holes = MAX(num_holes - XR_DA_LEAF_MAPSIZE, 0);
+	holes->num_holes = num_holes;
+
+	return;
+}
+
+/*
+ * returns 1 if the hole info doesn't match, 0 if it does
+ */
+/* ARGSUSED */
+int
+compare_da_freemaps(xfs_mount_t *mp, da_hole_map_t *holemap,
+			da_hole_map_t *block_hmap, int entries,
+			xfs_ino_t ino, xfs_dablk_t da_bno)
+{
+	int i, k, res, found;
+
+	res = 0;
+
+	/*
+	 * we chop holemap->lost_holes down to being two-valued
+	 * value (1 or 0) for the test  because the filesystem
+	 * value is two-valued
+	 */
+	if ((holemap->lost_holes > 0 ? 1 : 0) != block_hmap->lost_holes)  {
+		if (verbose)  {
+			do_warn(
+		"- derived hole value %d, saw %d, block %d, dir ino %llu\n",
+				holemap->lost_holes, block_hmap->lost_holes,
+				da_bno, ino);
+			res = 1;
+		} else
+			return(1);
+	}
+
+	for (i = 0; i < entries; i++)  {
+		for (found = k = 0; k < entries; k++)  {
+			if (holemap->hentries[i].base ==
+					block_hmap->hentries[k].base
+					&& holemap->hentries[i].size ==
+					block_hmap->hentries[k].size)  
+				found = 1;
+		}
+		if (!found)  {
+			if (verbose)  {
+				do_warn(
+"- derived hole (base %d, size %d) in block %d, dir inode %llu not found\n",
+					holemap->hentries[i].base,
+					holemap->hentries[i].size,
+					da_bno, ino);
+				res = 1;
+			} else
+				return(1);
+		}
+	}
+
+	return(res);
+}
+
+#if 0
+void
+test(xfs_mount_t *mp)
+{
+	int i = 0;
+	da_hole_map_t	holemap;
+
+	init_da_freemap(dir_freemap);
+	bzero(&holemap, sizeof(da_hole_map_t));
+
+	set_da_freemap(mp, dir_freemap, 0, 50);
+	set_da_freemap(mp, dir_freemap, 100, 126);
+	set_da_freemap(mp, dir_freemap, 126, 129);
+	set_da_freemap(mp, dir_freemap, 130, 131);
+	set_da_freemap(mp, dir_freemap, 150, 160);
+	process_da_freemap(mp, dir_freemap, &holemap);
+
+	return;
+}
+#endif
+
+
+/*
+ * walk tree from root to the left-most leaf block reading in
+ * blocks and setting up cursor.  passes back file block number of the
+ * left-most leaf block if successful (bno).  returns 1 if successful,
+ * 0 if unsuccessful.
+ */
+int
+traverse_int_dablock(xfs_mount_t	*mp,
+		da_bt_cursor_t		*da_cursor,
+		xfs_dablk_t		*rbno,
+		int 			whichfork)
+{
+	xfs_dablk_t		bno;
+	int			i;
+	xfs_da_intnode_t	*node;
+	xfs_dfsbno_t		fsbno;
+	xfs_buf_t		*bp;
+
+	/*
+	 * traverse down left-side of tree until we hit the
+	 * left-most leaf block setting up the btree cursor along
+	 * the way.
+	 */
+	bno = 0;
+	i = -1;
+	node = NULL;
+	da_cursor->active = 0;
+
+	do {
+		/*
+		 * read in each block along the way and set up cursor
+		 */
+		fsbno = blkmap_get(da_cursor->blkmap, bno);
+
+		if (fsbno == NULLDFSBNO)
+			goto error_out;
+
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+		if (!bp) {
+			if (whichfork == XFS_DATA_FORK)
+				do_warn("can't read block %u (fsbno %llu) for "
+					"directory inode %llu\n",
+					bno, fsbno, da_cursor->ino);
+			else
+				do_warn("can't read block %u (fsbno %llu) for "
+					"attrbute fork of inode %llu\n",
+					bno, fsbno, da_cursor->ino);
+			goto error_out;
+		}
+
+		node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
+
+		if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+			do_warn("bad dir/attr magic number in inode %llu, file "
+				"bno = %u, fsbno = %llu\n", da_cursor->ino, bno, fsbno);
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+		if (INT_GET(node->hdr.count, ARCH_CONVERT) > XFS_DA_NODE_ENTRIES(mp))  {
+			do_warn("bad record count in inode %llu, count = %d, max = %d\n",
+				da_cursor->ino, INT_GET(node->hdr.count, ARCH_CONVERT),
+				XFS_DA_NODE_ENTRIES(mp));
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+
+		/*
+		 * maintain level counter
+		 */
+		if (i == -1)
+			i = da_cursor->active = INT_GET(node->hdr.level, ARCH_CONVERT);
+		else  {
+			if (INT_GET(node->hdr.level, ARCH_CONVERT) == i - 1)  {
+				i--;
+			} else  {
+				if (whichfork == XFS_DATA_FORK) 
+					do_warn("bad directory btree for directory "
+						"inode %llu\n", da_cursor->ino);
+				else
+					do_warn("bad attribute fork btree for "
+						"inode %llu\n", da_cursor->ino);
+				libxfs_putbuf(bp);
+				goto error_out;
+			}
+		}
+
+		da_cursor->level[i].hashval =
+				INT_GET(node->btree[0].hashval, ARCH_CONVERT);
+		da_cursor->level[i].bp = bp;
+		da_cursor->level[i].bno = bno;
+		da_cursor->level[i].index = 0;
+#ifdef XR_DIR_TRACE
+		da_cursor->level[i].n = XFS_BUF_TO_DA_INTNODE(bp);
+#endif
+
+		/*
+		 * set up new bno for next level down
+		 */
+		bno = INT_GET(node->btree[0].before, ARCH_CONVERT);
+	} while(node != NULL && i > 1);
+
+	/*
+	 * now return block number and get out
+	 */
+	*rbno = da_cursor->level[0].bno = bno;
+	return(1);
+
+error_out:
+	while (i > 1 && i <= da_cursor->active)  {
+		libxfs_putbuf(da_cursor->level[i].bp);
+		i++;
+	}
+
+	return(0);
+}
+
+/*
+ * blow out buffer for this level and all the rest above as well
+ * if error == 0, we are not expecting to encounter any unreleased
+ * buffers (e.g. if we do, it's a mistake).  if error == 1, we're
+ * in an error-handling case so unreleased buffers may exist.
+ */
+void
+release_da_cursor_int(xfs_mount_t	*mp,
+			da_bt_cursor_t	*cursor,
+			int		prev_level,
+			int		error)
+{
+	int	level = prev_level + 1;
+
+	if (cursor->level[level].bp != NULL)  {
+		if (!error)  {
+			do_warn("release_da_cursor_int got unexpected non-null bp, "
+				"dabno = %u\n", cursor->level[level].bno);
+		}
+		ASSERT(error != 0);
+
+		libxfs_putbuf(cursor->level[level].bp);
+		cursor->level[level].bp = NULL;
+	}
+
+	if (level < cursor->active)
+		release_da_cursor_int(mp, cursor, level, error);
+
+	return;
+}
+
+void
+release_da_cursor(xfs_mount_t	*mp,
+		da_bt_cursor_t	*cursor,
+		int		prev_level)
+{
+	release_da_cursor_int(mp, cursor, prev_level, 0);
+}
+
+void
+err_release_da_cursor(xfs_mount_t	*mp,
+			da_bt_cursor_t	*cursor,
+			int		prev_level)
+{
+	release_da_cursor_int(mp, cursor, prev_level, 1);
+}
+
+/*
+ * like traverse_int_dablock only it does far less checking
+ * and doesn't maintain the cursor.  Just gets you to the
+ * leftmost block in the directory.  returns the fsbno
+ * of that block if successful, NULLDFSBNO if not.
+ */
+xfs_dfsbno_t
+get_first_dblock_fsbno(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_dinode_t	*dino)
+{
+	xfs_dablk_t		bno;
+	int			i;
+	xfs_da_intnode_t	*node;
+	xfs_dfsbno_t		fsbno;
+	xfs_buf_t		*bp;
+
+	/*
+	 * traverse down left-side of tree until we hit the
+	 * left-most leaf block setting up the btree cursor along
+	 * the way.
+	 */
+	bno = 0;
+	i = -1;
+	node = NULL;
+
+	fsbno = get_bmapi(mp, dino, ino, bno, XFS_DATA_FORK);
+
+	if (fsbno == NULLDFSBNO)  {
+		do_warn("bmap of block #%u of inode %llu failed\n",
+			bno, ino);
+		return(fsbno);
+	}
+
+	if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) <= XFS_LBSIZE(mp))
+		return(fsbno);
+
+	do {
+		/*
+		 * walk down left side of btree, release buffers as you
+		 * go.  if the root block is a leaf (single-level btree),
+		 * just return it.
+		 * 
+		 */
+
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+		if (!bp) {
+			do_warn("can't read block %u (fsbno %llu) for directory "
+				"inode %llu\n", bno, fsbno, ino);
+			return(NULLDFSBNO);
+		}
+
+		node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
+
+		if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+			do_warn("bad dir/attr magic number in inode %llu, file "
+				"bno = %u, fsbno = %llu\n", ino, bno, fsbno);
+			libxfs_putbuf(bp);
+			return(NULLDFSBNO);
+		}
+
+		if (i == -1)
+			i = INT_GET(node->hdr.level, ARCH_CONVERT);
+		bno = INT_GET(node->btree[0].before, ARCH_CONVERT);
+
+		libxfs_putbuf(bp);
+
+		fsbno = get_bmapi(mp, dino, ino, bno, XFS_DATA_FORK);
+
+		if (fsbno == NULLDFSBNO)  {
+			do_warn("bmap of block #%u of inode %llu failed\n", bno, ino);
+			return(NULLDFSBNO);
+		}
+
+		i--;
+	} while(i > 0);
+
+	return(fsbno);
+}
+
+/*
+ * make sure that all entries in all blocks along the right side of
+ * of the tree are used and hashval's are consistent.  level is the
+ * level of the descendent block.  returns 0 if good (even if it had
+ * to be fixed up), and 1 if bad.  The right edge of the tree is
+ * technically a block boundary.  this routine should be used then
+ * instead of verify_da_path().
+ */
+int
+verify_final_da_path(xfs_mount_t	*mp,
+		da_bt_cursor_t		*cursor,
+		const int		p_level)
+{
+	xfs_da_intnode_t	*node;
+	int			bad = 0;
+	int			entry;
+	int			this_level = p_level + 1;
+
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "in verify_final_da_path, this_level = %d\n",
+		this_level);
+#endif
+	/*
+	 * the index should point to the next "unprocessed" entry
+	 * in the block which should be the final (rightmost) entry
+	 */
+	entry = cursor->level[this_level].index;
+	node = (xfs_da_intnode_t *)XFS_BUF_PTR(cursor->level[this_level].bp);
+	/*
+	 * check internal block consistency on this level -- ensure
+	 * that all entries are used, encountered and expected hashvals
+	 * match, etc.
+	 */
+	if (entry != INT_GET(node->hdr.count, ARCH_CONVERT) - 1)  {
+		do_warn("directory/attribute block used/count inconsistency - %d/%hu\n",
+			entry, INT_GET(node->hdr.count, ARCH_CONVERT));
+		bad++;
+	}
+	/*
+	 * hash values monotonically increasing ???
+	 */
+	if (cursor->level[this_level].hashval >=
+				INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) {
+		do_warn("directory/attribute block hashvalue inconsistency, "
+			"expected > %u / saw %u\n", cursor->level[this_level].hashval,
+			INT_GET(node->btree[entry].hashval, ARCH_CONVERT));
+		bad++;
+	}
+	if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) != 0)  {
+		do_warn("bad directory/attribute forward block pointer, expected 0, "
+			"saw %u\n", INT_GET(node->hdr.info.forw, ARCH_CONVERT));
+		bad++;
+	}
+	if (bad) {
+		do_warn("bad directory block in dir ino %llu\n", cursor->ino);
+		return(1);
+	}
+	/*
+	 * keep track of greatest block # -- that gets
+	 * us the length of the directory
+	 */
+	if (cursor->level[this_level].bno > cursor->greatest_bno)
+		cursor->greatest_bno = cursor->level[this_level].bno;
+
+	/*
+	 * ok, now check descendant block number against this level
+	 */
+	if (cursor->level[p_level].bno !=
+			INT_GET(node->btree[entry].before, ARCH_CONVERT))  {
+#ifdef XR_DIR_TRACE
+		fprintf(stderr, "bad directory btree pointer, child bno should be %d, "
+			"block bno is %d, hashval is %u\n",
+			INT_GET(node->btree[entry].before, ARCH_CONVERT),
+			cursor->level[p_level].bno,
+			cursor->level[p_level].hashval);
+		fprintf(stderr, "verify_final_da_path returns 1 (bad) #1a\n");
+#endif
+		return(1);
+	}
+
+	if (cursor->level[p_level].hashval !=
+				INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) {
+		if (!no_modify)  {
+			do_warn("correcting bad hashval in non-leaf dir/attr block\n");
+			do_warn("\tin (level %d) in inode %llu.\n",
+				this_level, cursor->ino);
+			INT_SET(node->btree[entry].hashval, ARCH_CONVERT,
+				cursor->level[p_level].hashval);
+			cursor->level[this_level].dirty++;
+		} else  {
+			do_warn("would correct bad hashval in non-leaf dir/attr "
+				"block\n\tin (level %d) in inode %llu.\n",
+				this_level, cursor->ino);
+		}
+	}
+
+	/*
+	 * release/write buffer
+	 */
+	ASSERT(cursor->level[this_level].dirty == 0 ||
+		cursor->level[this_level].dirty && !no_modify);
+
+	if (cursor->level[this_level].dirty && !no_modify)
+		libxfs_writebuf(cursor->level[this_level].bp, 0);
+	else
+		libxfs_putbuf(cursor->level[this_level].bp);
+
+	cursor->level[this_level].bp = NULL;
+
+	/*
+	 * bail out if this is the root block (top of tree)
+	 */
+	if (this_level >= cursor->active)  {
+#ifdef XR_DIR_TRACE
+		fprintf(stderr, "verify_final_da_path returns 0 (ok)\n");
+#endif
+		return(0);
+	}
+	/*
+	 * set hashvalue to correctl reflect the now-validated
+	 * last entry in this block and continue upwards validation
+	 */
+	cursor->level[this_level].hashval =
+			INT_GET(node->btree[entry].hashval, ARCH_CONVERT);
+	return(verify_final_da_path(mp, cursor, this_level));
+}
+
+/*
+ * Verifies the path from a descendant block up to the root.
+ * Should be called when the descendant level traversal hits
+ * a block boundary before crossing the boundary (reading in a new
+ * block).
+ *
+ * the directory/attr btrees work differently to the other fs btrees.
+ * each interior block contains records that are <hashval, bno>
+ * pairs.  The bno is a file bno, not a filesystem bno.  The last
+ * hashvalue in the block <bno> will be <hashval>.  BUT unlike
+ * the freespace btrees, the *last* value in each block gets
+ * propagated up the tree instead of the first value in each block.
+ * that is, the interior records point to child blocks and the *greatest*
+ * hash value contained by the child block is the one the block above
+ * uses as the key for the child block.
+ *
+ * level is the level of the descendent block.  returns 0 if good,
+ * and 1 if bad.  The descendant block may be a leaf block.
+ *
+ * the invariant here is that the values in the cursor for the
+ * levels beneath this level (this_level) and the cursor index
+ * for this level *must* be valid.
+ *
+ * that is, the hashval/bno info is accurate for all
+ * DESCENDANTS and match what the node[index] information
+ * for the current index in the cursor for this level.
+ *
+ * the index values in the cursor for the descendant level
+ * are allowed to be off by one as they will reflect the
+ * next entry at those levels to be processed.
+ *
+ * the hashvalue for the current level can't be set until
+ * we hit the last entry in the block so, it's garbage
+ * until set by this routine.
+ *
+ * bno and bp for the current block/level are always valid
+ * since they have to be set so we can get a buffer for the
+ * block.
+ */
+int
+verify_da_path(xfs_mount_t	*mp,
+	da_bt_cursor_t		*cursor,
+	const int		p_level)
+{
+	xfs_da_intnode_t	*node;
+	xfs_da_intnode_t	*newnode;
+	xfs_dfsbno_t		fsbno;
+	xfs_dablk_t		dabno;
+	xfs_buf_t		*bp;
+	int			bad;
+	int			entry;
+	int			this_level = p_level + 1;
+
+	/*
+	 * index is currently set to point to the entry that
+	 * should be processed now in this level.
+	 */
+	entry = cursor->level[this_level].index;
+	node = (xfs_da_intnode_t *)XFS_BUF_PTR(cursor->level[this_level].bp);
+
+	/*
+	 * if this block is out of entries, validate this
+	 * block and move on to the next block.
+	 * and update cursor value for said level
+	 */
+	if (entry >= INT_GET(node->hdr.count, ARCH_CONVERT))  {
+		/*
+		 * update the hash value for this level before
+		 * validating it.  bno value should be ok since
+		 * it was set when the block was first read in.
+		 */
+		cursor->level[this_level].hashval = 
+				INT_GET(node->btree[entry - 1].hashval, ARCH_CONVERT);
+
+		/*
+		 * keep track of greatest block # -- that gets
+		 * us the length of the directory
+		 */
+		if (cursor->level[this_level].bno > cursor->greatest_bno)
+			cursor->greatest_bno = cursor->level[this_level].bno;
+
+		/*
+		 * validate the path for the current used-up block
+		 * before we trash it
+		 */
+		if (verify_da_path(mp, cursor, this_level))
+			return(1);
+		/*
+		 * ok, now get the next buffer and check sibling pointers
+		 */
+		dabno = INT_GET(node->hdr.info.forw, ARCH_CONVERT);
+		ASSERT(dabno != 0);
+		fsbno = blkmap_get(cursor->blkmap, dabno);
+
+		if (fsbno == NULLDFSBNO) {
+			do_warn("can't get map info for block %u of directory "
+				"inode %llu\n", dabno, cursor->ino);
+			return(1);
+		}
+
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+		if (!bp) {
+			do_warn("can't read block %u (%llu) for directory inode %llu\n",
+				dabno, fsbno, cursor->ino);
+			return(1);
+		}
+
+		newnode = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
+		/*
+		 * verify magic number and back pointer, sanity-check
+		 * entry count, verify level
+		 */
+		bad = 0;
+		if (INT_GET(newnode->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+			do_warn("bad magic number %x in block %u (%llu) for directory "
+				"inode %llu\n",
+				INT_GET(newnode->hdr.info.magic, ARCH_CONVERT),
+				dabno, fsbno, cursor->ino);
+			bad++;
+		}
+		if (INT_GET(newnode->hdr.info.back, ARCH_CONVERT) !=
+						cursor->level[this_level].bno)  {
+			do_warn("bad back pointer in block %u (%llu) for directory "
+				"inode %llu\n", dabno, fsbno, cursor->ino);
+			bad++;
+		}
+		if (INT_GET(newnode->hdr.count, ARCH_CONVERT) >
+						XFS_DA_NODE_ENTRIES(mp))  {
+			do_warn("entry count %d too large in block %u (%llu) for "
+				"directory inode %llu\n",
+				INT_GET(newnode->hdr.count, ARCH_CONVERT),
+				dabno, fsbno, cursor->ino);
+			bad++;
+		}
+		if (INT_GET(newnode->hdr.level, ARCH_CONVERT) != this_level)  {
+			do_warn("bad level %d in block %u (%llu) for directory inode "
+				"%llu\n", INT_GET(newnode->hdr.level, ARCH_CONVERT),
+				dabno, fsbno, cursor->ino);
+			bad++;
+		}
+		if (bad)  {
+#ifdef XR_DIR_TRACE
+			fprintf(stderr, "verify_da_path returns 1 (bad) #4\n");
+#endif
+			libxfs_putbuf(bp);
+			return(1);
+		}
+		/*
+		 * update cursor, write out the *current* level if
+		 * required.  don't write out the descendant level
+		 */
+		ASSERT(cursor->level[this_level].dirty == 0 ||
+			cursor->level[this_level].dirty && !no_modify);
+
+		if (cursor->level[this_level].dirty && !no_modify)
+			libxfs_writebuf(cursor->level[this_level].bp, 0);
+		else
+			libxfs_putbuf(cursor->level[this_level].bp);
+		cursor->level[this_level].bp = bp;
+		cursor->level[this_level].dirty = 0;
+		cursor->level[this_level].bno = dabno;
+		cursor->level[this_level].hashval =
+			INT_GET(newnode->btree[0].hashval, ARCH_CONVERT);
+#ifdef XR_DIR_TRACE
+		cursor->level[this_level].n = newnode;
+#endif
+		node = newnode;
+
+		entry = cursor->level[this_level].index = 0;
+	}
+	/*
+	 * ditto for block numbers
+	 */
+	if (cursor->level[p_level].bno !=
+			INT_GET(node->btree[entry].before, ARCH_CONVERT))  {
+#ifdef XR_DIR_TRACE
+		fprintf(stderr, "bad directory btree pointer, child bno should be %d, "
+			"block bno is %d, hashval is %u\n",
+			INT_GET(node->btree[entry].before, ARCH_CONVERT),
+			cursor->level[p_level].bno,
+			cursor->level[p_level].hashval);
+		fprintf(stderr, "verify_da_path returns 1 (bad) #1a\n");
+#endif
+		return(1);
+	}
+	/*
+	 * ok, now validate last hashvalue in the descendant
+	 * block against the hashval in the current entry
+	 */
+	if (cursor->level[p_level].hashval !=
+			INT_GET(node->btree[entry].hashval, ARCH_CONVERT))  {
+		if (!no_modify)  {
+			do_warn("correcting bad hashval in interior dir/attr block\n");
+			do_warn("\tin (level %d) in inode %llu.\n",
+				this_level, cursor->ino);
+			INT_SET(node->btree[entry].hashval, ARCH_CONVERT,
+				cursor->level[p_level].hashval);
+			cursor->level[this_level].dirty++;
+		} else  {
+			do_warn("would correct bad hashval in interior dir/attr "
+				"block\n\tin (level %d) in inode %llu.\n",
+				this_level, cursor->ino);
+		}
+	}
+	/*
+	 * increment index for this level to point to next entry
+	 * (which should point to the next descendant block)
+	 */
+	cursor->level[this_level].index++;
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "verify_da_path returns 0 (ok)\n");
+#endif
+	return(0);
+}
+
+#if 0
+/*
+ * handles junking directory leaf block entries that have zero lengths
+ * buf_dirty is an in/out, set to 1 if the leaf was modified.
+ * we do NOT initialize it to zero if nothing happened because it
+ * may be already set by the caller.  Assumes that the block
+ * has been compacted before calling this routine.
+ */
+void
+junk_zerolen_dir_leaf_entries(
+	xfs_mount_t		*mp,
+	xfs_dir_leafblock_t	*leaf,
+	xfs_ino_t		ino,
+	int			*buf_dirty)
+{
+	xfs_dir_leaf_entry_t	*entry;
+	xfs_dir_leaf_name_t	*namest;
+	xfs_dir_leaf_hdr_t	*hdr;
+	xfs_dir_leaf_map_t	*map;
+	xfs_ino_t		tmp_ino;
+	int			bytes;
+	int			tmp_bytes;
+	int			current_hole = 0;
+	int			i;
+	int			j;
+	int			tmp;
+	int			start;
+	int			before;
+	int			after;
+	int			smallest;
+	int			tablesize;
+
+	entry = &leaf->entries[0];
+	hdr = &leaf->hdr;
+
+	/*
+	 * we can convert the entries to one character entries
+	 * as long as we have space.  Once we run out, then
+	 * we have to delete really delete (copy over) an entry.
+	 * however, that frees up some space that we could use ...
+	 *
+	 * so the idea is, we'll use up space from all the holes,
+	 * potentially leaving each hole too small to do any good.
+	 * then if need to, we'll delete entries and use that space
+	 * up from the top-most byte down.  that may leave a 4th hole
+	 * but we can represent that by correctly setting the value
+	 * of firstused.  that leaves any hole between the end of
+	 * the entry list and firstused so it doesn't have to be
+	 * recorded in the hole map.
+	 */
+
+	for (bytes = i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) {
+		/*
+		 * skip over entries that are good or already converted
+		 */
+		if (entry->namelen != 0)
+			continue;
+
+		*buf_dirty = 1;
+#if 0
+		/*
+		 * try and use up existing holes first until they get
+		 * too small, then set bytes to the # of bytes between
+		 * the current heap beginning and the last used byte
+		 * in the entry table.
+		 */
+		if (bytes < sizeof(xfs_dir_leaf_name_t) &&
+				current_hole < XFS_DIR_LEAF_MAPSIZE)  {
+			/*
+			 * skip over holes that are too small
+			 */
+			while (current_hole < XFS_DIR_LEAF_MAPSIZE &&
+				INT_GET(hdr->freemap[current_hole].size, ARCH_CONVERT) <
+					sizeof(xfs_dir_leaf_name_t))  {
+				current_hole++;
+			}
+
+			if (current_hole < XFS_DIR_LEAF_MAPSIZE)
+				bytes = INT_GET(hdr->freemap[current_hole].size, ARCH_CONVERT);
+			else
+				bytes = (int) INT_GET(hdr->firstused, ARCH_CONVERT) -
+				 ((__psint_t) &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)] -
+				  (__psint_t) leaf);
+		}
+#endif
+		current_hole = 0;
+
+		for (map = &hdr->freemap[0];
+				current_hole < XFS_DIR_LEAF_MAPSIZE &&
+					INT_GET(map->size, ARCH_CONVERT) < sizeof(xfs_dir_leaf_name_t);
+				map++)  {
+			current_hole++;
+		}
+
+		/*
+		 * if we can use an existing hole, do it.  otherwise,
+		 * delete entries until the deletions create a big enough
+		 * hole to convert another entry.  then use up those bytes
+		 * bytes until you run low.  then delete entries again ...
+		 */
+		if (current_hole < XFS_DIR_LEAF_MAPSIZE)  {
+			ASSERT(sizeof(xfs_dir_leaf_name_t) <= bytes);
+
+			do_warn("marking bad entry in directory inode %llu\n",
+				ino);
+
+			entry->namelen = 1;
+			INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(hdr->freemap[current_hole].base, ARCH_CONVERT) +
+					bytes - sizeof(xfs_dir_leaf_name_t));
+
+			namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+			tmp_ino = NULLFSINO;
+			XFS_DIR_SF_PUT_DIRINO_ARCH(&tmp_ino, &namest->inumber, ARCH_CONVERT);
+			namest->name[0] = '/';
+
+			if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
+				INT_SET(hdr->firstused, ARCH_CONVERT, INT_GET(entry->nameidx, ARCH_CONVERT));
+			INT_MOD(hdr->freemap[current_hole].size, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_name_t)));
+			INT_MOD(hdr->namebytes, ARCH_CONVERT, +1);
+		} else  {
+			/*
+			 * delete the table entry and try and account for the
+			 * space in the holemap.  don't have to update namebytes
+			 * or firstused since we're not actually deleting any
+			 * bytes from the heap.  following code swiped from
+			 * xfs_dir_leaf_remove() in xfs_dir_leaf.c
+			 */
+			INT_MOD(hdr->count, ARCH_CONVERT, -1);
+			do_warn(
+			"deleting zero length entry in directory inode %llu\n",
+				ino);
+			/*
+			 * overwrite the bad entry unless it's the
+			 * last entry in the list (highly unlikely).
+			 * zero out the free'd bytes.
+			 */
+			if (INT_GET(hdr->count, ARCH_CONVERT) - i > 0)  {
+				memmove(entry, entry + 1, (INT_GET(hdr->count, ARCH_CONVERT) - i) *
+					sizeof(xfs_dir_leaf_entry_t));
+			}
+			bzero((void *) ((__psint_t) entry +
+				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) *
+				sizeof(xfs_dir_leaf_entry_t)),
+				sizeof(xfs_dir_leaf_entry_t));
+
+			start = (__psint_t) &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)] -
+				(__psint_t) &leaf;
+			tablesize = sizeof(xfs_dir_leaf_entry_t) *
+				(INT_GET(hdr->count, ARCH_CONVERT) + 1) + sizeof(xfs_dir_leaf_hdr_t);
+			map = &hdr->freemap[0];
+			tmp = INT_GET(map->size, ARCH_CONVERT);
+			before = after = -1;
+			smallest = XFS_DIR_LEAF_MAPSIZE - 1;
+			for (j = 0; j < XFS_DIR_LEAF_MAPSIZE; map++, j++) {
+				ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+				ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+				if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
+					INT_MOD(map->base, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_entry_t)));
+					INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+				}
+
+				if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == start) {
+					before = j;
+				} else if (INT_GET(map->base, ARCH_CONVERT) == start +
+						sizeof(xfs_dir_leaf_entry_t))  {
+					after = j;
+				} else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
+					tmp = INT_GET(map->size, ARCH_CONVERT);
+					smallest = j;
+				}
+			}
+
+			/*
+			 * Coalesce adjacent freemap regions,
+			 * or replace the smallest region.
+			 */
+			if ((before >= 0) || (after >= 0)) {
+				if ((before >= 0) && (after >= 0))  {
+					map = &hdr->freemap[before];
+					INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+					INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT));
+					INT_ZERO(hdr->freemap[after].base, ARCH_CONVERT);
+					INT_ZERO(hdr->freemap[after].size, ARCH_CONVERT);
+				} else if (before >= 0) {
+					map = &hdr->freemap[before];
+					INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+				} else {
+					map = &hdr->freemap[after];
+					INT_SET(map->base, ARCH_CONVERT, start);
+					INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+				}
+			} else  {
+				/*
+				 * Replace smallest region
+				 * (if it is smaller than free'd entry)
+				 */
+				map = &hdr->freemap[smallest];
+				if (INT_GET(map->size, ARCH_CONVERT) < sizeof(xfs_dir_leaf_entry_t))  {
+					INT_SET(map->base, ARCH_CONVERT, start);
+					INT_SET(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+				}
+				/*
+				 * mark as needing compaction
+				 */
+				hdr->holes = 1;
+			}
+#if 0
+			/*
+			 * do we have to delete stuff or is there
+			 * room for deletions?
+			 */
+			ASSERT(current_hole == XFS_DIR_LEAF_MAPSIZE);
+
+			/*
+			 * here, bytes == number of unused bytes from
+			 * end of list to top (beginning) of heap
+			 * (firstused).  It's ok to leave extra
+			 * unused bytes in that region because they
+			 * wind up before firstused (which we reset
+			 * appropriately
+			 */
+			if (bytes < sizeof(xfs_dir_leaf_name_t))  {
+				/*
+				 * have to delete an entry because
+				 * we have no room to convert it to
+				 * a bad entry
+				 */
+				do_warn(
+				"deleting entry in directory inode %llu\n",
+					ino);
+				/*
+				 * overwrite the bad entry unless it's the
+				 * last entry in the list (highly unlikely).
+				 */
+				if (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1> 0)  {
+					memmove(entry, entry + 1,
+						(INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) *
+						sizeof(xfs_dir_leaf_entry_t));
+				}
+				bzero((void *) ((__psint_t) entry +
+					(INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) *
+					sizeof(xfs_dir_leaf_entry_t)),
+					sizeof(xfs_dir_leaf_entry_t));
+
+				/*
+				 * bump up free byte count, drop other
+				 * index vars since the table just
+				 * shrank by one entry and we don't
+				 * want to miss any as we walk the table
+				 */
+				bytes += sizeof(xfs_dir_leaf_entry_t);
+				INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1);
+				entry--;
+				i--;
+			} else  {
+				/*
+				 * convert entry using the bytes in between
+				 * the end of the entry table and the heap
+				 */
+				entry->namelen = 1;
+				INT_MOD(leaf->hdr.firstused, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_name_t)));
+				INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(leaf->hdr.firstused, ARCH_CONVERT));
+
+				namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
+							INT_GET(entry->nameidx, ARCH_CONVERT));
+				tmp_ino = NULLFSINO;
+				XFS_DIR_SF_PUT_DIRINO_ARCH(&tmp_ino,
+							&namest->inumber, ARCH_CONVERT);
+				namest->name[0] = '/';
+
+				bytes -= sizeof(xfs_dir_leaf_entry_t);
+			}
+#endif
+		}
+	}
+
+	return;
+}
+#endif
+
+static char dirbuf[64 * 1024];
+
+/*
+ * called by both node dir and leaf dir processing routines
+ * validates all contents *but* the sibling pointers (forw/back)
+ * and the magic number.
+ *
+ * returns 0 if the directory is ok or has been brought to the
+ * stage that it can be fixed up later (in phase 6),
+ * 1 if it has to be junked.
+ *
+ * Right now we fix a lot of things (TBD == to be deleted).
+ *
+ *	incorrect . entries - inode # is corrected
+ *	entries with mismatched hashvalue/name strings - hashvalue reset
+ *	entries whose hashvalues are out-of-order - entry marked TBD
+ *	.. entries with invalid inode numbers - entry marked TBD
+ *	entries with invalid inode numbers - entry marked TBD
+ *	multiple . entries - all but the first entry are marked TBD
+ *	zero-length entries - entry is deleted
+ *	entries with an out-of-bounds name index ptr - entry is deleted
+ *
+ * entries marked TBD have the first character of the name (which
+ *	lives in the heap) have the first character in the name set
+ *	to '/' -- an illegal value.
+ *
+ * entries deleted right here are deleted by blowing away the entry
+ *	(but leaving the heap untouched).  any space that was used
+ *	by the deleted entry will be reclaimed by the block freespace
+ *	(da_freemap) processing code.
+ *
+ * if two entries claim the same space in the heap (say, due to
+ * bad entry name index pointers), we lose the directory.  We could
+ * try harder to fix this but it'll do for now.
+ */
+/* ARGSUSED */
+int
+process_leaf_dir_block(
+	xfs_mount_t		*mp,
+	xfs_dir_leafblock_t	*leaf,
+	xfs_dablk_t		da_bno,
+	xfs_ino_t		ino, 
+	xfs_dahash_t		last_hashval,	/* last hashval encountered */
+	int			ino_discovery,
+	blkmap_t		*blkmap,
+	int			*dot,
+	int			*dotdot,
+	xfs_ino_t		*parent,
+	int			*buf_dirty,	/* is buffer dirty? */
+	xfs_dahash_t		*next_hashval)	/* greatest hashval in block */
+{
+	xfs_ino_t			lino;
+	xfs_dir_leaf_entry_t		*entry;
+	xfs_dir_leaf_entry_t		*s_entry;
+	xfs_dir_leaf_entry_t		*d_entry;
+	xfs_dir_leafblock_t		*new_leaf;
+	char				*first_byte;
+	xfs_dir_leaf_name_t		*namest;
+	ino_tree_node_t			*irec_p;
+	int				num_entries;
+	xfs_dahash_t			hashval;
+	int				i;
+	int				nm_illegal;
+	int				bytes;
+	int				start;
+	int				stop;
+	int				res = 0;
+	int				ino_off;
+	int				first_used;
+	int				bytes_used;
+	int				reset_holes;
+	int				zero_len_entries;
+	char				fname[MAXNAMELEN + 1];
+	da_hole_map_t			holemap;
+	da_hole_map_t			bholemap;
+#if 0
+	unsigned char			*dir_freemap;
+#endif
+
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "\tprocess_leaf_dir_block - ino %llu\n", ino);
+#endif
+
+	/*
+	 * clear static dir block freespace bitmap
+	 */
+	init_da_freemap(dir_freemap);
+
+#if 0
+	/*
+	 * XXX - alternatively, do this for parallel usage.
+	 * set up block freespace map.  head part of dir leaf block
+	 * including all entries are packed so we can use sizeof
+	 * and not worry about alignment.
+	 */
+
+	if ((dir_freemap = alloc_da_freemap(mp)) == NULL)  {
+		do_error("couldn't allocate directory block freemap\n");
+		abort();
+	}
+#endif
+
+	*buf_dirty = 0;
+	first_used = mp->m_sb.sb_blocksize;
+	zero_len_entries = 0;
+	bytes_used = 0;
+
+	i = stop = sizeof(xfs_dir_leaf_hdr_t);
+	if (set_da_freemap(mp, dir_freemap, 0, stop))  {
+		do_warn(
+"directory block header conflicts with used space in directory inode %llu\n",
+				ino);
+		return(1);
+	}
+
+	/*
+	 * verify structure:  monotonically increasing hash value for
+	 * all leaf entries, indexes for all entries must be within
+	 * this fs block (trivially true for 64K blocks).  also track
+	 * used space so we can check the freespace map.  check for
+	 * zero-length entries.  for now, if anything's wrong, we
+	 * junk the directory and we'll pick up no-longer referenced
+	 * inodes on a later pass.
+	 */
+	for (i = 0, entry = &leaf->entries[0];
+			i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
+			i++, entry++)  {
+		/*
+		 * check that the name index isn't out of bounds
+		 * if it is, delete the entry since we can't
+		 * grab the inode #.
+		 */
+		if (INT_GET(entry->nameidx, ARCH_CONVERT) >= mp->m_sb.sb_blocksize)  {
+			if (!no_modify)  {
+				*buf_dirty = 1;
+
+				if (INT_GET(leaf->hdr.count, ARCH_CONVERT) > 1)  {
+					do_warn(
+"nameidx %d for entry #%d, bno %d, ino %llu > fs blocksize, deleting entry\n",
+						INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino);
+					ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > i);
+
+					bytes = (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i) *
+						sizeof(xfs_dir_leaf_entry_t);
+
+					/*
+					 * compress table unless we're
+					 * only dealing with 1 entry
+					 * (the last one) in which case
+					 * just zero it.
+					 */
+					if (bytes >
+					    sizeof(xfs_dir_leaf_entry_t))  {
+						memmove(entry, entry + 1,
+							bytes);
+						bzero((void *)
+						((__psint_t) entry + bytes),
+						sizeof(xfs_dir_leaf_entry_t));
+					} else  {
+						bzero(entry,
+						sizeof(xfs_dir_leaf_entry_t));
+					}
+
+					/*
+					 * sync vars to match smaller table.
+					 * don't have to worry about freespace
+					 * map since we haven't set it for
+					 * this entry yet.
+					 */
+					INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1);
+					i--;
+					entry--;
+				} else  {
+					do_warn(
+"nameidx %d, entry #%d, bno %d, ino %llu > fs blocksize, marking entry bad\n",
+						INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino);
+					INT_SET(entry->nameidx, ARCH_CONVERT, mp->m_sb.sb_blocksize -
+						sizeof(xfs_dir_leaf_name_t));
+					namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
+							INT_GET(entry->nameidx, ARCH_CONVERT));
+					lino = NULLFSINO;
+					XFS_DIR_SF_PUT_DIRINO_ARCH(&lino,
+							&namest->inumber, ARCH_CONVERT);
+					namest->name[0] = '/';
+				}
+			} else  {
+				do_warn(
+"nameidx %d, entry #%d, bno %d, ino %llu > fs blocksize, would delete entry\n",
+					INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino);
+			}
+			continue;
+		}
+		/*
+		 * inode processing -- make sure the inode
+		 * is in our tree or we add it to the uncertain
+		 * list if the inode # is valid.  if namelen is 0,
+		 * we can still try for the inode as long as nameidx
+		 * is ok.
+		 */
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+		XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT);
+
+		/*
+		 * we may have to blow out an entry because of bad
+		 * inode numbers.  do NOT touch the name until after
+		 * we've computed the hashvalue and done a namecheck()
+		 * on the name.
+		 */
+		if (!ino_discovery && lino == NULLFSINO)  {
+			/*
+			 * don't do a damned thing.  We already
+			 * found this (or did it ourselves) during
+			 * phase 3.
+			 */
+		} else if (verify_inum(mp, lino))  {
+			/*
+			 * bad inode number.  clear the inode
+			 * number and the entry will get removed
+			 * later.  We don't trash the directory
+			 * since it's still structurally intact.
+			 */
+			do_warn(
+"invalid ino number %llu in dir ino %llu, entry #%d, bno %d\n",
+				lino, ino, i, da_bno);
+			if (!no_modify)  {
+				do_warn(
+				"\tclearing ino number in entry %d...\n", i);
+
+				lino = NULLFSINO;
+				XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+				*buf_dirty = 1;
+			} else  {
+				do_warn(
+				"\twould clear ino number in entry %d...\n", i);
+			}
+		} else if (lino == mp->m_sb.sb_rbmino)  {
+			do_warn(
+"entry #%d, bno %d in directory %llu references realtime bitmap inode %llu\n",
+				i, da_bno, ino, lino);
+			if (!no_modify)  {
+				do_warn(
+				"\tclearing ino number in entry %d...\n", i);
+
+				lino = NULLFSINO;
+				XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+				*buf_dirty = 1;
+			} else  {
+				do_warn(
+				"\twould clear ino number in entry %d...\n", i);
+			}
+		} else if (lino == mp->m_sb.sb_rsumino)  {
+			do_warn(
+"entry #%d, bno %d in directory %llu references realtime summary inode %llu\n",
+				i, da_bno, ino, lino);
+			if (!no_modify)  {
+				do_warn(
+				"\tclearing ino number in entry %d...\n", i);
+
+				lino = NULLFSINO;
+				XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+				*buf_dirty = 1;
+			} else  {
+				do_warn(
+				"\twould clear ino number in entry %d...\n", i);
+			}
+		} else if (lino == mp->m_sb.sb_uquotino)  {
+			do_warn(
+"entry #%d, bno %d in directory %llu references user quota inode %llu\n",
+				i, da_bno, ino, lino);
+			if (!no_modify)  {
+				do_warn(
+				"\tclearing ino number in entry %d...\n", i);
+
+				lino = NULLFSINO;
+				XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+				*buf_dirty = 1;
+			} else  {
+				do_warn(
+				"\twould clear ino number in entry %d...\n", i);
+			}
+		} else if (lino == mp->m_sb.sb_pquotino)  {
+			do_warn(
+"entry #%d, bno %d in directory %llu references proj quota inode %llu\n",
+				i, da_bno, ino, lino);
+			if (!no_modify)  {
+				do_warn(
+				"\tclearing ino number in entry %d...\n", i);
+
+				lino = NULLFSINO;
+				XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+				*buf_dirty = 1;
+			} else  {
+				do_warn(
+				"\twould clear ino number in entry %d...\n", i);
+			}
+		} else if (lino == old_orphanage_ino)  {
+			/*
+			 * do nothing, silently ignore it, entry has
+			 * already been marked TBD since old_orphanage_ino
+			 * is set non-zero.
+			 */
+		} else if ((irec_p = find_inode_rec(
+				XFS_INO_TO_AGNO(mp, lino),
+				XFS_INO_TO_AGINO(mp, lino))) != NULL)  {
+			/*
+			 * inode recs should have only confirmed
+			 * inodes in them
+			 */
+			ino_off = XFS_INO_TO_AGINO(mp, lino) -
+					irec_p->ino_startnum;
+			ASSERT(is_inode_confirmed(irec_p, ino_off));
+			/*
+			 * if inode is marked free and we're in inode
+			 * discovery mode, leave the entry alone for now.
+			 * if the inode turns out to be used, we'll figure
+			 * that out when we scan it.  If the inode really
+			 * is free, we'll hit this code again in phase 4
+			 * after we've finished inode discovery and blow
+			 * out the entry then.
+			 */
+			if (!ino_discovery && is_inode_free(irec_p, ino_off))  {
+				if (!no_modify)  {
+					do_warn(
+"entry references free inode %llu in directory %llu, will clear entry\n",
+						lino, ino);
+					lino = NULLFSINO;
+					XFS_DIR_SF_PUT_DIRINO_ARCH(&lino,
+							&namest->inumber, ARCH_CONVERT);
+					*buf_dirty = 1;
+				} else  {
+					do_warn(
+"entry references free inode %llu in directory %llu, would clear entry\n",
+						lino, ino);
+				}
+			}
+		} else if (ino_discovery)  {
+			add_inode_uncertain(mp, lino, 0);
+		} else  {
+			do_warn(
+	"bad ino number %llu in dir ino %llu, entry #%d, bno %d\n",
+				lino, ino, i, da_bno);
+			if (!no_modify)  {
+				do_warn("clearing inode number...\n");
+				lino = NULLFSINO;
+				XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+				*buf_dirty = 1;
+			} else  {
+				do_warn("would clear inode number...\n");
+			}
+		}
+		/*
+		 * if we have a zero-length entry, trash it.
+		 * we may lose the inode (chunk) if we don't
+		 * finish the repair successfully and the inode
+		 * isn't mentioned anywhere else (like in the inode
+		 * tree) but the alternative is to risk losing the
+		 * entire directory by trying to use the next byte
+		 * to turn the entry into a 1-char entry.  That's
+		 * probably a safe bet but if it didn't work, we'd
+		 * lose the entire directory the way we currently do
+		 * things.  (Maybe we should change that later :-).
+		 */
+		if (entry->namelen == 0)  {
+			*buf_dirty = 1;
+
+			if (INT_GET(leaf->hdr.count, ARCH_CONVERT) > 1)  {
+				do_warn(
+	"entry #%d, dir inode %llu, has zero-len name, deleting entry\n",
+					i, ino);
+				ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > i);
+
+				bytes = (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i) *
+					sizeof(xfs_dir_leaf_entry_t);
+
+				/*
+				 * compress table unless we're
+				 * only dealing with 1 entry
+				 * (the last one) in which case
+				 * just zero it.
+				 */
+				if (bytes > sizeof(xfs_dir_leaf_entry_t))  {
+					memmove(entry, entry + 1,
+						bytes);
+					bzero((void *)
+						((__psint_t) entry + bytes),
+						sizeof(xfs_dir_leaf_entry_t));
+				} else  {
+					bzero(entry,
+						sizeof(xfs_dir_leaf_entry_t));
+				}
+
+				/*
+				 * sync vars to match smaller table.
+				 * don't have to worry about freespace
+				 * map since we haven't set it for
+				 * this entry yet.
+				 */
+				INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1);
+				i--;
+				entry--;
+			} else  {
+				/*
+				 * if it's the only entry, preserve the
+				 * inode number for now
+				 */
+				do_warn(
+	"entry #%d, dir inode %llu, has zero-len name, marking entry bad\n",
+					i, ino);
+				INT_SET(entry->nameidx, ARCH_CONVERT, mp->m_sb.sb_blocksize -
+						sizeof(xfs_dir_leaf_name_t));
+				namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
+						INT_GET(entry->nameidx, ARCH_CONVERT));
+				XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+				namest->name[0] = '/';
+			}
+		} else if (INT_GET(entry->nameidx, ARCH_CONVERT) + entry->namelen > XFS_LBSIZE(mp))  {
+			do_warn(
+"bad size, entry #%d in dir inode %llu, block %u -- entry overflows block\n",
+			i, ino, da_bno);
+
+			return(1);
+		}
+
+		start = (__psint_t)&leaf->entries[i] - (__psint_t)leaf;;
+		stop = start + sizeof(xfs_dir_leaf_entry_t);
+
+		if (set_da_freemap(mp, dir_freemap, start, stop))  {
+			do_warn(
+"dir entry slot %d in block %u conflicts with used space in dir inode %llu\n",
+				i, da_bno, ino);
+			return(1);
+		}
+
+		/*
+		 * check if the name is legal.  if so, then
+		 * check that the name and hashvalues match.
+		 *
+		 * if the name is illegal, we don't check the
+		 * hashvalue computed from it.  we just make
+		 * sure that the hashvalue in the entry is
+		 * monotonically increasing wrt to the previous
+		 * entry.
+		 *
+		 * Note that we do NOT have to check the length
+		 * because the length is stored in a one-byte
+		 * unsigned int which max's out at MAXNAMELEN
+		 * making it impossible for the stored length
+		 * value to be out of range.
+		 */
+		bcopy(namest->name, fname, entry->namelen);
+		fname[entry->namelen] = '\0';
+		hashval = libxfs_da_hashname(fname, entry->namelen);
+
+		/*
+		 * only complain about illegal names in phase 3 (when
+		 * inode discovery is turned on).  Otherwise, we'd complain
+		 * a lot during phase 4.  If the name is illegal, leave
+		 * the hash value in that entry alone.
+		 */
+		nm_illegal = namecheck(fname, entry->namelen);
+
+		if (ino_discovery && nm_illegal)  {
+			/*
+			 * junk the entry, illegal name
+			 */
+			if (!no_modify)  {
+				do_warn(
+	"illegal name \"%s\" in directory inode %llu, entry will be cleared\n",
+					fname, ino);
+				namest->name[0] = '/';
+				*buf_dirty = 1;
+			} else  {
+				do_warn(
+	"illegal name \"%s\" in directory inode %llu, entry would be cleared\n",
+					fname, ino);
+			}
+		} else if (!nm_illegal && INT_GET(entry->hashval, ARCH_CONVERT) != hashval)  {
+			/*
+			 * try resetting the hashvalue to the correct
+			 * value for the string, if the string has been
+			 * corrupted, too, that will get picked up next
+			 */
+			do_warn("\tmismatched hash value for entry \"%s\"\n",
+				fname);
+			if (!no_modify)  {
+				do_warn(
+			"\t\tin directory inode %llu.  resetting hash value.\n",
+					ino);
+				INT_SET(entry->hashval, ARCH_CONVERT, hashval);
+				*buf_dirty = 1;
+			} else  {
+				do_warn(
+		"\t\tin directory inode %llu.  would reset hash value.\n",
+					ino);
+			}
+		}
+		
+		/*
+		 * now we can mark entries with NULLFSINO's bad
+		 */
+		if (!no_modify && lino == NULLFSINO)  {
+			namest->name[0] = '/';
+			*buf_dirty = 1;
+		}
+
+		/*
+		 * regardless of whether the entry has or hasn't been
+		 * marked for deletion, the hash value ordering must
+		 * be maintained.
+		 */
+		if (INT_GET(entry->hashval, ARCH_CONVERT) < last_hashval)  {
+			/*
+			 * blow out the entry -- set hashval to sane value
+			 * and set the first character in the string to
+			 * the illegal value '/'.  Reset the hash value
+			 * to the last hashvalue so that verify_da_path
+			 * will fix up the interior pointers correctly.
+			 * the entry will be deleted later (by routines
+			 * that need only the entry #).  We keep the
+			 * inode number in the entry so we can attach
+			 * the inode to the orphanage later.
+			 */
+			do_warn("\tbad hash ordering for entry \"%s\"\n",
+				fname);
+			if (!no_modify)  {
+				do_warn(
+		"\t\tin directory inode %llu.  will clear entry\n",
+					ino);
+				INT_SET(entry->hashval, ARCH_CONVERT, last_hashval);
+				namest->name[0] = '/';
+				*buf_dirty = 1;
+			} else  {
+				do_warn(
+		"\t\tin directory inode %llu.  would clear entry\n",
+					ino);
+			}
+		}
+
+		*next_hashval = last_hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+
+		/*
+		 * if heap data conflicts with something,
+		 * blow it out and skip the rest of the loop
+		 */
+		if (set_da_freemap(mp, dir_freemap, INT_GET(entry->nameidx, ARCH_CONVERT),
+				INT_GET(entry->nameidx, ARCH_CONVERT) + sizeof(xfs_dir_leaf_name_t) +
+				entry->namelen - 1))  {
+			do_warn(
+"name \"%s\" (block %u, slot %d) conflicts with used space in dir inode %llu\n",
+				fname, da_bno, i, ino);
+			if (!no_modify)  {
+				entry->namelen = 0;
+				*buf_dirty = 1;
+
+				do_warn(
+		"will clear entry \"%s\" (#%d) in directory inode %llu\n",
+					fname, i, ino);
+			} else  {
+				do_warn(
+		"would clear entry \"%s\" (#%d)in directory inode %llu\n",
+					fname, i, ino);
+			}
+			continue;
+		}
+
+		/*
+		 * keep track of heap stats (first byte used, total bytes used)
+		 */
+		if (INT_GET(entry->nameidx, ARCH_CONVERT) < first_used)
+			first_used = INT_GET(entry->nameidx, ARCH_CONVERT);
+		bytes_used += entry->namelen;
+
+		/*
+		 * special . or .. entry processing
+		 */
+		if (entry->namelen == 2 && namest->name[0] == '.' &&
+						namest->name[1] == '.') {
+			/*
+			 * the '..' case
+			 */
+			if (!*dotdot) {
+				(*dotdot)++;
+				*parent = lino;
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "process_leaf_dir_block found .. entry (parent) = %llu\n", lino);
+#endif
+				/*
+				 * what if .. == .?  legal only in
+				 * the root inode.  blow out entry
+				 * and set parent to NULLFSINO otherwise.
+				 */
+				if (ino == lino &&
+						ino != mp->m_sb.sb_rootino)  {
+					*parent = NULLFSINO;
+					do_warn(
+	"bad .. entry in dir ino %llu, points to self",
+						ino);
+					if (!no_modify)  {
+						do_warn("will clear entry\n");
+
+						namest->name[0] = '/';
+						*buf_dirty = 1;
+					} else  {
+						do_warn("would clear entry\n");
+					}
+				} else if (ino != lino &&
+						ino == mp->m_sb.sb_rootino)  {
+					/*
+					 * we have to make sure that . == ..
+					 * in the root inode
+					 */
+					if (!no_modify)  {
+						do_warn(
+		"correcting .. entry in root inode %llu, was %llu\n",
+							ino, *parent);
+						XFS_DIR_SF_PUT_DIRINO_ARCH(
+							&ino,
+						&namest->inumber, ARCH_CONVERT);
+						*buf_dirty = 1;
+					} else  {
+						do_warn(
+	"bad .. entry (%llu) in root inode %llu should be %llu\n",
+							*parent,
+							ino, ino);
+					}
+				}
+			} else  {
+				/*
+				 * can't fix the directory unless we know
+				 * which .. entry is the right one.  Both
+				 * have valid inode numbers, match the hash
+				 * value and the hash values are ordered
+				 * properly or we wouldn't be here.  So
+				 * since both seem equally valid, trash
+				 * this one.
+				 */
+				if (!no_modify)  {
+					do_warn(
+"multiple .. entries in directory inode %llu, will clear second entry\n",
+						ino);
+					namest->name[0] = '/';
+					*buf_dirty = 1;
+				} else  {
+					do_warn(
+"multiple .. entries in directory inode %llu, would clear second entry\n",
+						ino);
+				}
+			}
+		} else if (entry->namelen == 1 && namest->name[0] == '.')  {
+			/*
+			 * the '.' case
+			 */
+			if (!*dot)  {
+				(*dot)++;
+				if (lino != ino)  {
+					if (!no_modify)  {
+						do_warn(
+	". in directory inode %llu has wrong value (%llu), fixing entry...\n",
+							ino, lino);
+						XFS_DIR_SF_PUT_DIRINO_ARCH(&ino,
+							&namest->inumber, ARCH_CONVERT);
+						*buf_dirty = 1;
+					} else  {
+						do_warn(
+			". in directory inode %llu has wrong value (%llu)\n",
+							ino, lino);
+					}
+				}
+			} else  {
+				do_warn(
+				"multiple . entries in directory inode %llu\n",
+					ino);
+				/*
+				 * mark entry as to be junked.
+				 */
+				if (!no_modify)  {
+					do_warn(
+			"will clear one . entry in directory inode %llu\n",
+						ino);
+					namest->name[0] = '/';
+					*buf_dirty = 1;
+				} else  {
+					do_warn(
+			"would clear one . entry in directory inode %llu\n",
+						ino);
+				}
+			}
+		} else  {
+			/*
+			 * all the rest -- make sure only . references self
+			 */
+			if (lino == ino)  {
+				do_warn(
+			"entry \"%s\" in directory inode %llu points to self, ",
+					fname, ino);
+				if (!no_modify)  {
+					do_warn("will clear entry\n");
+					namest->name[0] = '/';
+					*buf_dirty = 1;
+				} else  {
+					do_warn("would clear entry\n");
+				}
+			}
+		}
+	}
+
+	/*
+	 * compare top of heap values and reset as required.  if the
+	 * holes flag is set, don't reset first_used unless it's
+	 * pointing to used bytes.  we're being conservative here
+	 * since the block will get compacted anyhow by the kernel.
+	 */
+	if (leaf->hdr.holes == 0 && first_used != INT_GET(leaf->hdr.firstused, ARCH_CONVERT) ||
+			INT_GET(leaf->hdr.firstused, ARCH_CONVERT) > first_used)  {
+		if (!no_modify)  {
+			if (verbose)
+				do_warn(
+"- resetting first used heap value from %d to %d in block %u of dir ino %llu\n",
+					(int) INT_GET(leaf->hdr.firstused, ARCH_CONVERT), first_used,
+					da_bno, ino);
+			INT_SET(leaf->hdr.firstused, ARCH_CONVERT, first_used);
+			*buf_dirty = 1;
+		} else  {
+			if (verbose)
+				do_warn(
+"- would reset first used value from %d to %d in block %u of dir ino %llu\n",
+					(int) INT_GET(leaf->hdr.firstused, ARCH_CONVERT), first_used,
+					da_bno, ino);
+		}
+	}
+
+	if (bytes_used != INT_GET(leaf->hdr.namebytes, ARCH_CONVERT))  {
+		if (!no_modify)  {
+			if (verbose)
+				do_warn(
+"- resetting namebytes cnt from %d to %d in block %u of dir inode %llu\n",
+					(int) INT_GET(leaf->hdr.namebytes, ARCH_CONVERT), bytes_used,
+					da_bno, ino);
+			INT_SET(leaf->hdr.namebytes, ARCH_CONVERT, bytes_used);
+			*buf_dirty = 1;
+		} else  {
+			if (verbose)
+				do_warn(
+"- would reset namebytes cnt from %d to %d in block %u of dir inode %llu\n",
+					(int) INT_GET(leaf->hdr.namebytes, ARCH_CONVERT), bytes_used,
+					da_bno, ino);
+		}
+	}
+
+	/*
+	 * If the hole flag is not set, then we know that there can
+	 * be no lost holes.  If the hole flag is set, then it's ok
+	 * if the on-disk holemap doesn't describe everything as long
+	 * as what it does describe doesn't conflict with reality.
+	 */
+
+	reset_holes = 0;
+
+	bholemap.lost_holes = leaf->hdr.holes;
+	for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++)  {
+		bholemap.hentries[i].base = INT_GET(leaf->hdr.freemap[i].base, ARCH_CONVERT);
+		bholemap.hentries[i].size = INT_GET(leaf->hdr.freemap[i].size, ARCH_CONVERT);
+	}
+
+	/*
+	 * Ok, now set up our own freespace list
+	 * (XFS_DIR_LEAF_MAPSIZE (3) * biggest regions)
+	 * and see if they match what's in the block
+	 */
+	bzero(&holemap, sizeof(da_hole_map_t));
+	process_da_freemap(mp, dir_freemap, &holemap);
+
+	if (zero_len_entries)  {
+		reset_holes = 1;
+	} else if (leaf->hdr.holes == 0)  {
+		if (holemap.lost_holes > 0)  {
+			if (verbose)
+				do_warn(
+	"- found unexpected lost holes in block %u, dir inode %llu\n",
+					da_bno, ino);
+
+			reset_holes = 1;
+		} else if (compare_da_freemaps(mp, &holemap, &bholemap,
+				XFS_DIR_LEAF_MAPSIZE, ino, da_bno))  {
+			if (verbose)
+				do_warn(
+			"- hole info non-optimal in block %u, dir inode %llu\n",
+					da_bno, ino);
+			reset_holes = 1;
+		}
+	} else if (verify_da_freemap(mp, dir_freemap, &holemap, ino, da_bno))  {
+		if (verbose)
+			do_warn(
+			"- hole info incorrect in block %u, dir inode %llu\n",
+				da_bno, ino);
+		reset_holes = 1;
+	}
+
+	if (reset_holes)  {
+		/*
+		 * have to reset block hole info
+		 */
+		if (verbose)  {
+			do_warn(
+	"- existing hole info for block %d, dir inode %llu (base, size) - \n",
+				da_bno, ino);
+			do_warn("- \t");
+			for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++)  {
+				do_warn(
+				"- (%d, %d) ", bholemap.hentries[i].base,
+					bholemap.hentries[i].size);  
+			}
+			do_warn("- holes flag = %d\n", bholemap.lost_holes);
+		}
+
+		if (!no_modify)  {
+			if (verbose)
+				do_warn(
+		"- compacting block %u in dir inode %llu\n",
+					da_bno, ino);
+
+			new_leaf = (xfs_dir_leafblock_t *) &dirbuf[0];
+
+			/*
+			 * copy leaf block header
+			 */
+			bcopy(&leaf->hdr, &new_leaf->hdr,
+				sizeof(xfs_dir_leaf_hdr_t));
+
+			/*
+			 * reset count in case we have some zero length entries
+			 * that are being junked
+			 */
+			num_entries = 0;
+			first_used = XFS_LBSIZE(mp);
+			first_byte = (char *) new_leaf
+					+ (__psint_t) XFS_LBSIZE(mp);
+
+			/*
+			 * copy entry table and pack names starting from the end
+			 * of the block
+			 */
+			for (i = 0, s_entry = &leaf->entries[0],
+					d_entry = &new_leaf->entries[0];
+					i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
+					i++, s_entry++)  {
+				/*
+				 * skip zero-length entries
+				 */
+				if (s_entry->namelen == 0)
+					continue;
+
+				bytes = sizeof(xfs_dir_leaf_name_t)
+					+ s_entry->namelen - 1;
+
+				if ((__psint_t) first_byte - bytes <
+						sizeof(xfs_dir_leaf_entry_t)
+						+ (__psint_t) d_entry)  {
+					do_warn(
+	"not enough space in block %u of dir inode %llu for all entries\n",
+						da_bno, ino);
+					break;
+				}
+
+				first_used -= bytes;
+				first_byte -= bytes;
+
+				INT_SET(d_entry->nameidx, ARCH_CONVERT, first_used);
+				INT_SET(d_entry->hashval, ARCH_CONVERT, INT_GET(s_entry->hashval, ARCH_CONVERT));
+				d_entry->namelen = s_entry->namelen;
+				d_entry->pad2 = 0;
+
+				bcopy((char *) leaf + INT_GET(s_entry->nameidx, ARCH_CONVERT),
+					first_byte, bytes);
+
+				num_entries++;
+				d_entry++;
+			}
+
+			ASSERT((char *) first_byte >= (char *) d_entry);
+			ASSERT(first_used <= XFS_LBSIZE(mp));
+
+			/*
+			 * zero space between end of table and top of heap
+			 */
+			bzero(d_entry, (__psint_t) first_byte
+					- (__psint_t) d_entry);
+
+			/*
+			 * reset header info
+			 */
+			if (num_entries != INT_GET(new_leaf->hdr.count, ARCH_CONVERT))
+				INT_SET(new_leaf->hdr.count, ARCH_CONVERT, num_entries);
+
+			INT_SET(new_leaf->hdr.firstused, ARCH_CONVERT, first_used);
+			new_leaf->hdr.holes = 0;
+			new_leaf->hdr.pad1 = 0;
+
+			INT_SET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT, (__psint_t) d_entry
+							- (__psint_t) new_leaf);
+			INT_SET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT, (__psint_t) first_byte
+							- (__psint_t) d_entry);
+
+			ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) < first_used);
+			ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) ==
+					(__psint_t) (&new_leaf->entries[0])
+					- (__psint_t) new_leaf
+					+ i * sizeof(xfs_dir_leaf_entry_t));
+			ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+			ASSERT(INT_GET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+			ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) +
+				INT_GET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT) == first_used);
+
+			INT_ZERO(new_leaf->hdr.freemap[1].base, ARCH_CONVERT);
+			INT_ZERO(new_leaf->hdr.freemap[1].size, ARCH_CONVERT);
+			INT_ZERO(new_leaf->hdr.freemap[2].base, ARCH_CONVERT);
+			INT_ZERO(new_leaf->hdr.freemap[2].size, ARCH_CONVERT);
+
+			/*
+			 * final step, copy block back
+			 */
+			bcopy(new_leaf, leaf, mp->m_sb.sb_blocksize);
+
+			*buf_dirty = 1;
+		} else  {
+			if (verbose)
+				do_warn(
+			"- would compact block %u in dir inode %llu\n",
+					da_bno, ino);
+		}
+	}
+#if 0
+	if (!no_modify)  {
+		/*
+		 * now take care of deleting or marking the entries with
+		 * zero-length namelen's
+		 */
+		junk_zerolen_dir_leaf_entries(mp, leaf, ino, buf_dirty);
+	}
+#endif
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "process_leaf_dir_block returns %d\n", res);
+#endif
+	return((res > 0) ? 1 : 0);
+}
+
+/*
+ * returns 0 if the directory is ok, 1 if it has to be junked.
+ */
+int
+process_leaf_dir_level(xfs_mount_t	*mp,
+			da_bt_cursor_t	*da_cursor,
+			int		ino_discovery,
+			int		*repair,
+			int		*dot,
+			int		*dotdot,
+			xfs_ino_t	*parent)
+{
+	xfs_dir_leafblock_t	*leaf;
+	xfs_buf_t		*bp;
+	xfs_ino_t		ino;
+	xfs_dfsbno_t		dev_bno;
+	xfs_dablk_t		da_bno;
+	xfs_dablk_t		prev_bno;
+	int			res = 0;
+	int			buf_dirty = 0;
+	xfs_daddr_t		bd_addr;
+	xfs_dahash_t		current_hashval = 0;
+	xfs_dahash_t		greatest_hashval;
+
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "process_leaf_dir_level - ino %llu\n", da_cursor->ino);
+#endif
+	*repair = 0;
+	da_bno = da_cursor->level[0].bno;
+	ino = da_cursor->ino;
+	prev_bno = 0;
+
+	do {
+		dev_bno = blkmap_get(da_cursor->blkmap, da_bno);
+		/*
+		 * directory code uses 0 as the NULL block pointer
+		 * since 0 is the root block and no directory block
+		 * pointer can point to the root block of the btree
+		 */
+		ASSERT(da_bno != 0);
+
+		if (dev_bno == NULLDFSBNO) {
+			do_warn("can't map block %u for directory inode %llu\n",
+				da_bno, ino);
+			goto error_out;
+		}
+
+		bd_addr = (xfs_daddr_t)XFS_FSB_TO_DADDR(mp, dev_bno);
+
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, dev_bno),
+					XFS_FSB_TO_BB(mp, 1), 0);
+		if (!bp) {
+			do_warn("can't read file block %u (fsbno %llu, daddr %lld) "
+				"for directory inode %llu\n",
+				da_bno, dev_bno, (__int64_t) bd_addr, ino);
+			goto error_out;
+		}
+
+		leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+
+		/*
+		 * check magic number for leaf directory btree block
+		 */
+		if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+			do_warn("bad directory leaf magic # %#x for dir ino %llu\n",
+				INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino);
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+		/*
+		 * keep track of greatest block # -- that gets
+		 * us the length of the directory
+		 */
+		if (da_bno > da_cursor->greatest_bno)
+			da_cursor->greatest_bno = da_bno;
+
+		buf_dirty = 0;
+		/*
+		 * for each block, process the block, verify it's path,
+		 * then get next block.  update cursor values along the way
+		 */
+		if (process_leaf_dir_block(mp, leaf, da_bno, ino,
+				current_hashval, ino_discovery,
+				da_cursor->blkmap, dot, dotdot, parent,
+				&buf_dirty, &greatest_hashval))  {
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+
+		/*
+		 * index can be set to hdr.count so match the
+		 * indexes of the interior blocks -- which at the
+		 * end of the block will point to 1 after the final
+		 * real entry in the block
+		 */
+		da_cursor->level[0].hashval = greatest_hashval;
+		da_cursor->level[0].bp = bp;
+		da_cursor->level[0].bno = da_bno;
+		da_cursor->level[0].index = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		da_cursor->level[0].dirty = buf_dirty;
+
+		if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno)  {
+			do_warn("bad sibling back pointer for directory block %u "
+				"in directory inode %llu\n", da_bno, ino);
+			libxfs_putbuf(bp);
+			goto error_out;
+		}
+
+		prev_bno = da_bno;
+		da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+		if (da_bno != 0)
+			if (verify_da_path(mp, da_cursor, 0))  {
+				libxfs_putbuf(bp);
+				goto error_out;
+			}
+
+		current_hashval = greatest_hashval;
+
+		ASSERT(buf_dirty == 0 || buf_dirty && !no_modify);
+
+		if (buf_dirty && !no_modify)  {
+			*repair = 1;
+			libxfs_writebuf(bp, 0);
+		}
+		else
+			libxfs_putbuf(bp);
+	} while (da_bno != 0 && res == 0);
+
+	if (verify_final_da_path(mp, da_cursor, 0))  {
+		/*
+		 * verify the final path up (right-hand-side) if still ok
+		 */
+		do_warn("bad hash path in directory %llu\n", da_cursor->ino);
+		goto error_out;
+	}
+
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "process_leaf_dir_level returns %d (%s)\n",
+		res, ((res) ? "bad" : "ok"));
+#endif
+	/*
+	 * redundant but just for testing
+	 */
+	release_da_cursor(mp, da_cursor, 0);
+
+	return(res);
+
+error_out:
+	/*
+	 * release all buffers holding interior btree blocks
+	 */
+	err_release_da_cursor(mp, da_cursor, 0);
+
+	return(1);
+}
+
+/*
+ * a node directory is a true btree directory -- where the directory
+ * has gotten big enough that it is represented as a non-trivial (e.g.
+ * has more than just a root block) btree.
+ *
+ * Note that if we run into any problems, we trash the
+ * directory.  Even if it's the root directory,
+ * we'll be able to traverse all the disconnected
+ * subtrees later (phase 6).
+ *
+ * one day, if we actually fix things, we'll set repair to 1 to
+ * indicate that we have or that we should.
+ *
+ * dirname can be set to NULL if the name is unknown (or to
+ * the string representation of the inode)
+ *
+ * returns 0 if things are ok, 1 if bad (directory needs to be junked)
+ */
+/* ARGSUSED */
+int
+process_node_dir(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	blkmap_t	*blkmap,
+	int		*dot,
+	int		*dotdot,
+	xfs_ino_t	*parent,	/* out - parent ino #  or NULLFSINO */
+	char		*dirname,
+	int		*repair)
+{
+	xfs_dablk_t			bno;
+	int				error = 0;
+	da_bt_cursor_t			da_cursor;
+
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "process_node_dir - ino %llu\n", ino);
+#endif
+	*repair = *dot = *dotdot = 0;
+	*parent = NULLFSINO;
+
+	/*
+	 * try again -- traverse down left-side of tree until we hit
+	 * the left-most leaf block setting up the btree cursor along
+	 * the way.  Then walk the leaf blocks left-to-right, calling
+	 * a parent-verification routine each time we traverse a block.
+	 */
+	bzero(&da_cursor, sizeof(da_bt_cursor_t));
+
+	da_cursor.active = 0;
+	da_cursor.type = 0;
+	da_cursor.ino = ino;
+	da_cursor.dip = dip;
+	da_cursor.greatest_bno = 0;
+	da_cursor.blkmap = blkmap;
+
+	/*
+	 * now process interior node
+	 */
+
+	error = traverse_int_dablock(mp, &da_cursor, &bno, XFS_DATA_FORK);
+
+	if (error == 0)
+		return(1);
+
+	/*
+	 * now pass cursor and bno into leaf-block processing routine
+	 * the leaf dir level routine checks the interior paths
+	 * up to the root including the final right-most path.
+	 */
+
+	error = process_leaf_dir_level(mp, &da_cursor, ino_discovery,
+					repair, dot, dotdot, parent);
+
+	if (error)
+		return(1);
+
+	/*
+	 * sanity check inode size
+	 */
+	if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <
+			(da_cursor.greatest_bno + 1) * mp->m_sb.sb_blocksize)  {
+		if ((xfs_fsize_t) (da_cursor.greatest_bno
+				* mp->m_sb.sb_blocksize) > UINT_MAX)  { 
+			do_warn(
+"out of range internal directory block numbers (inode %llu)\n",
+				ino);
+			return(1);
+		}
+
+		do_warn(
+"setting directory inode (%llu) size to %llu bytes, was %lld bytes\n",
+			ino,
+			(xfs_dfiloff_t) (da_cursor.greatest_bno + 1)
+				* mp->m_sb.sb_blocksize,
+			INT_GET(dip->di_core.di_size, ARCH_CONVERT));
+
+		INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t)
+			(da_cursor.greatest_bno + 1) * mp->m_sb.sb_blocksize);
+	}
+	return(0);
+}
+
+/*
+ * a leaf directory is one where the directory is too big for
+ * the inode data fork but is small enough to fit into one
+ * directory btree block (filesystem block) outside the inode
+ *
+ * returns NULLFSINO if the directory is cannot be salvaged
+ * and the .. ino if things are ok (even if the directory had
+ * to be altered to make it ok).
+ *
+ * dirname can be set to NULL if the name is unknown (or to
+ * the string representation of the inode)
+ *
+ * returns 0 if things are ok, 1 if bad (directory needs to be junked)
+ */
+/* ARGSUSED */
+int
+process_leaf_dir(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dino_dirty,
+	blkmap_t	*blkmap,
+	int		*dot,		/* out - 1 if there is a dot, else 0 */
+	int		*dotdot,	/* out - 1 if there's a dotdot, else 0 */
+	xfs_ino_t	*parent,	/* out - parent ino #  or NULLFSINO */
+	char		*dirname,	/* in - directory pathname */
+	int		*repair)	/* out - 1 if something was fixed */
+{
+	xfs_dir_leafblock_t	*leaf;
+	xfs_dahash_t	next_hashval;
+	xfs_dfsbno_t	bno;
+	xfs_buf_t	*bp;
+	int		buf_dirty = 0;
+
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "process_leaf_dir - ino %llu\n", ino);
+#endif
+	*repair = *dot = *dotdot = 0;
+	*parent = NULLFSINO;
+
+	bno = blkmap_get(blkmap, 0);
+	if (bno == NULLDFSBNO) {
+		do_warn("block 0 for directory inode %llu is missing\n", ino);
+		return(1);
+	}
+	bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+			XFS_FSB_TO_BB(mp, 1), 0);
+	if (!bp) {
+		do_warn("can't read block 0 for directory inode %llu\n", ino);
+		return(1);
+	}
+	/*
+	 * verify leaf block
+	 */
+	leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+
+	/*
+	 * check magic number for leaf directory btree block
+	 */
+	if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+		do_warn("bad directory leaf magic # %#x for dir ino %llu\n",
+			INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino);
+		libxfs_putbuf(bp);
+		return(1);
+	}
+
+	if (process_leaf_dir_block(mp, leaf, 0, ino, 0, ino_discovery, blkmap,
+			dot, dotdot, parent, &buf_dirty, &next_hashval)) {
+		/*
+		 * the block is bad.  lose the directory.
+		 * XXX - later, we should try and just lose
+		 * the block without losing the entire directory
+		 */
+		ASSERT(*dotdot == 0 || *dotdot == 1 && *parent != NULLFSINO);
+		libxfs_putbuf(bp);
+		return(1);
+	}
+
+	/*
+	 * check sibling pointers in leaf block (above doesn't do it)
+	 */
+	if (INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) != 0 ||
+				INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != 0)  {
+		if (!no_modify)  {
+			do_warn("clearing forw/back pointers for directory inode "
+				"%llu\n", ino);
+			buf_dirty = 1;
+			INT_ZERO(leaf->hdr.info.forw, ARCH_CONVERT);
+			INT_ZERO(leaf->hdr.info.back, ARCH_CONVERT);
+		} else  {
+			do_warn("would clear forw/back pointers for directory inode "
+				"%llu\n", ino);
+		}
+	}
+
+	ASSERT(buf_dirty == 0 || buf_dirty && !no_modify);
+
+	if (buf_dirty && !no_modify)
+		libxfs_writebuf(bp, 0);
+	else
+		libxfs_putbuf(bp);
+
+	return(0);
+}
+
+/*
+ * returns 1 if things are bad (directory needs to be junked)
+ * and 0 if things are ok.  If ino_discovery is 1, add unknown
+ * inodes to uncertain inode list.
+ */
+int
+process_dir(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dino_dirty,
+	char		*dirname,
+	xfs_ino_t	*parent,
+	blkmap_t	*blkmap)
+{
+	int		dot;
+	int		dotdot;
+	int		repair = 0;
+	int		res = 0;
+
+	*parent = NULLFSINO;
+	dot = dotdot = 0;
+
+	/*
+	 * branch off depending on the type of inode.  This routine
+	 * is only called ONCE so all the subordinate routines will
+	 * fix '.' and junk '..' if they're bogus.
+	 */
+	if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT))  {
+		dot = 1;
+		dotdot = 1;
+		if (process_shortform_dir(mp, ino, dip, ino_discovery,
+				dino_dirty, parent, dirname, &repair))  {
+			res = 1;
+		}
+	} else if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_LBSIZE(mp))  {
+		if (process_leaf_dir(mp, ino, dip, ino_discovery,
+				dino_dirty, blkmap, &dot, &dotdot,
+				parent, dirname, &repair))  {
+			res = 1;
+		}
+	} else  {
+		if (process_node_dir(mp, ino, dip, ino_discovery,
+				blkmap, &dot, &dotdot,
+				parent, dirname, &repair))  {
+			res = 1;
+		}
+	}
+	/*
+	 * bad . entries in all directories will be fixed up in phase 6
+	 */
+	if (dot == 0) {
+		do_warn("no . entry for directory %llu\n", ino);
+	}
+
+	/*
+	 * shortform dirs always have a .. entry.  .. for all longform
+	 * directories will get fixed in phase 6. .. for other shortform
+	 * dirs also get fixed there.  .. for a shortform root was
+	 * fixed in place since we know what it should be
+	 */
+	if (dotdot == 0 && ino != mp->m_sb.sb_rootino) {
+		do_warn("no .. entry for directory %llu\n", ino);
+	} else if (dotdot == 0 && ino == mp->m_sb.sb_rootino) {
+		do_warn("no .. entry for root directory %llu\n", ino);
+		need_root_dotdot = 1;
+	}
+	
+#ifdef XR_DIR_TRACE
+	fprintf(stderr, "(process_dir), parent of %llu is %llu\n", ino, parent);
+#endif
+	return(res);
+}
diff --git a/repair/dir.h b/repair/dir.h
new file mode 100644
index 000000000..9d2b069b0
--- /dev/null
+++ b/repair/dir.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XR_DIR_H
+#define _XR_DIR_H
+
+struct blkmap;
+
+/* 1 bit per byte, max XFS blocksize == 64K bits / NBBY */
+#define DA_BMAP_SIZE		8192
+
+typedef unsigned char	da_freemap_t;
+
+/*
+ * the cursor gets passed up and down the da btree processing
+ * routines.  The interior block processing routines use the
+ * cursor to determine if the pointers to and from the preceding
+ * and succeeding sibling blocks are ok and whether the values in
+ * the current block are consistent with the entries in the parent
+ * nodes.  When a block is traversed, a parent-verification routine
+ * is called to verify if the next logical entry in the next level up
+ * is consistent with the greatest hashval in the next block of the
+ * current level.  The verification routine is itself recursive and
+ * calls itself if it has to traverse an interior block to get
+ * the next logical entry.  The routine recurses upwards through
+ * the tree until it finds a block where it can simply step to
+ * the next entry.  The hashval in that entry should be equal to
+ * the hashval being passed to it (the greatest hashval in the block
+ * that the entry points to).  If that isn't true, then the tree
+ * is blown and we need to trash it, salvage and trash it, or fix it.
+ * Currently, we just trash it.
+ */
+typedef struct da_level_state  {
+	xfs_buf_t	*bp;		/* block bp */
+#ifdef XR_DIR_TRACE
+	xfs_da_intnode_t *n;		/* bp data */
+#endif
+	xfs_dablk_t	bno;		/* file block number */
+	xfs_dahash_t	hashval;	/* last verified hashval */
+	int		index;		/* current index in block */
+	int		dirty;		/* is buffer dirty ? (1 == yes) */
+} da_level_state_t;
+
+typedef struct da_bt_cursor  {
+	int			active;	/* highest level in tree (# levels-1) */
+	int			type;	/* 0 if dir, 1 if attr */
+	xfs_ino_t		ino;
+	xfs_dablk_t		greatest_bno;
+	xfs_dinode_t		*dip;
+	da_level_state_t	level[XFS_DA_NODE_MAXDEPTH];
+	struct blkmap		*blkmap;
+} da_bt_cursor_t;
+
+
+/* ROUTINES */
+
+void
+err_release_da_cursor(
+	xfs_mount_t	*mp,
+	da_bt_cursor_t	*cursor,
+	int		prev_level);
+
+xfs_dfsbno_t
+get_first_dblock_fsbno(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dino);
+
+void
+init_da_freemap(
+	da_freemap_t *dir_freemap);
+
+int
+namecheck(
+	char		*name, 
+	int 		length);
+
+int
+process_shortform_dir(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dino_dirty,	/* is dinode buffer dirty? */
+	xfs_ino_t	*parent,	/* out - NULLFSINO if entry doesn't exist */
+	char		*dirname,	/* directory pathname */
+	int		*repair);	/* out - 1 if dir was fixed up */
+
+int
+process_dir(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dirty,
+	char		*dirname,
+	xfs_ino_t	*parent,
+	struct blkmap	*blkmap);
+
+void
+release_da_cursor(
+	xfs_mount_t	*mp,
+	da_bt_cursor_t	*cursor,
+	int		prev_level);
+
+int
+set_da_freemap(
+	xfs_mount_t *mp, da_freemap_t *map,
+	int start, int stop);
+
+int
+traverse_int_dablock(
+	xfs_mount_t	*mp,
+	da_bt_cursor_t		*da_cursor,
+	xfs_dablk_t		*rbno,
+	int 			whichfork);
+
+int
+verify_da_path(
+	xfs_mount_t	*mp,
+	da_bt_cursor_t		*cursor,
+	const int		p_level);
+
+int
+verify_final_da_path(
+	xfs_mount_t	*mp,
+	da_bt_cursor_t		*cursor,
+	const int		p_level);
+
+
+#endif /* _XR_DIR_H */
diff --git a/repair/dir2.c b/repair/dir2.c
new file mode 100644
index 000000000..e2675df9b
--- /dev/null
+++ b/repair/dir2.c
@@ -0,0 +1,2070 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "dir.h"
+#include "dir2.h"
+#include "bmap.h"
+
+/*
+ * Tag bad directory entries with this.
+ * We can't tag them with -1 since that will look like a 
+ * data_unused_t instead of a data_entry_t.
+ */
+#define	BADFSINO	((xfs_ino_t)0xfeffffffffffffffULL)
+
+/*
+ * Known bad inode list.  These are seen when the leaf and node
+ * block linkages are incorrect.
+ */
+typedef struct dir2_bad {
+	xfs_ino_t	ino;
+	struct dir2_bad	*next;
+} dir2_bad_t;
+dir2_bad_t *dir2_bad_list;
+
+void
+dir2_add_badlist(
+	xfs_ino_t	ino)
+{
+	dir2_bad_t	*l;
+
+	if ((l = malloc(sizeof(dir2_bad_t))) == NULL) {
+		do_error("malloc failed (%u bytes) dir2_add_badlist:ino %llu\n",
+			sizeof(dir2_bad_t), ino);
+		exit(1);
+	}
+	l->next = dir2_bad_list;
+	dir2_bad_list = l;
+	l->ino = ino;
+}
+
+int
+dir2_is_badino(
+	xfs_ino_t	ino)
+{
+	dir2_bad_t	*l;
+
+	for (l = dir2_bad_list; l; l = l->next)
+		if (l->ino == ino)
+			return 1;
+	return 0;
+}
+
+/*
+ * Multibuffer handling.
+ * V2 directory blocks can be noncontiguous, needing multiple buffers.
+ */
+xfs_dabuf_t *
+da_read_buf(
+	xfs_mount_t	*mp,
+	int		nex,
+	bmap_ext_t	*bmp)
+{
+	xfs_buf_t	*bp;
+	xfs_buf_t	**bplist;
+	xfs_dabuf_t	*dabuf;
+	int		i;
+	int		off;
+
+	bplist = calloc(nex, sizeof(*bplist));
+	if (bplist == NULL) {
+		do_error("couldn't malloc dir2 buffer list\n");
+		exit(1);
+	}
+	for (i = 0; i < nex; i++) {
+		bplist[i] = libxfs_readbuf(mp->m_dev,
+				XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
+				XFS_FSB_TO_BB(mp, bmp[i].blockcount), 0);
+		if (!bplist[i])
+			goto failed;
+	}
+	dabuf = malloc(XFS_DA_BUF_SIZE(nex));
+	if (dabuf == NULL) {
+		do_error("couldn't malloc dir2 buffer header\n");
+		exit(1);
+	}
+	dabuf->dirty = 0;
+	dabuf->nbuf = nex;
+	if (nex == 1) {
+		bp = bplist[0];
+		dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
+		dabuf->data = XFS_BUF_PTR(bp);
+		dabuf->bps[0] = bp;
+	} else {
+		for (i = 0, dabuf->bbcount = 0; i < nex; i++) {
+			dabuf->bps[i] = bp = bplist[i];
+			dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
+		}
+		dabuf->data = malloc(BBTOB(dabuf->bbcount));
+		if (dabuf->data == NULL) {
+			do_error("couldn't malloc dir2 buffer data\n");
+			exit(1);
+		}
+		for (i = off = 0; i < nex; i++, off += XFS_BUF_COUNT(bp)) {
+			bp = bplist[i];
+			bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
+				XFS_BUF_COUNT(bp));
+		}
+	}
+	return dabuf;
+failed:
+	for (i = 0; i < nex; i++)
+		libxfs_putbuf(bplist[i]);
+	free(bplist);
+	return NULL;
+}
+
+static void
+da_buf_clean(
+	xfs_dabuf_t	*dabuf)
+{
+	xfs_buf_t	*bp;
+	int		i;
+	int		off;
+
+	if (dabuf->dirty) {
+		dabuf->dirty = 0;
+		for (i=off=0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
+			bp = dabuf->bps[i];
+			bcopy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+				XFS_BUF_COUNT(bp));
+		}
+	}
+}
+
+static void
+da_buf_done(
+	xfs_dabuf_t	*dabuf)
+{
+	da_buf_clean(dabuf);
+	if (dabuf->nbuf > 1)
+		free(dabuf->data);
+	free(dabuf);
+}
+
+int
+da_bwrite(
+	xfs_mount_t	*mp,
+	xfs_dabuf_t	*dabuf)
+{
+	xfs_buf_t	*bp;
+	xfs_buf_t	**bplist;
+	int		e;
+	int		error;
+	int		i;
+	int		nbuf;
+
+	if ((nbuf = dabuf->nbuf) == 1) {
+		bplist = &bp;
+		bp = dabuf->bps[0];
+	} else {
+		bplist = malloc(nbuf * sizeof(*bplist));
+		if (bplist == NULL) {
+			do_error("couldn't malloc dir2 buffer list\n");
+			exit(1);
+		}
+		bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+	}
+	da_buf_done(dabuf);
+	for (i = error = 0; i < nbuf; i++) {
+		e = libxfs_writebuf(bplist[i], 0);
+		if (e)
+			error = e;
+	}
+	if (bplist != &bp)
+		free(bplist);
+	return error;
+}
+
+void
+da_brelse(
+	xfs_dabuf_t	*dabuf)
+{
+	xfs_buf_t	*bp;
+	xfs_buf_t	**bplist;
+	int		i;
+	int		nbuf;
+
+	if ((nbuf = dabuf->nbuf) == 1) {
+		bplist = &bp;
+		bp = dabuf->bps[0];
+	} else {
+		bplist = malloc(nbuf * sizeof(*bplist));
+		if (bplist == NULL) {
+			do_error("couldn't malloc dir2 buffer list\n");
+			exit(1);
+		}
+		bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+	}
+	da_buf_done(dabuf);
+	for (i = 0; i < nbuf; i++)
+		libxfs_putbuf(bplist[i]);
+	if (bplist != &bp)
+		free(bplist);
+}
+
+/*
+ * walk tree from root to the left-most leaf block reading in
+ * blocks and setting up cursor.  passes back file block number of the
+ * left-most leaf block if successful (bno).  returns 1 if successful,
+ * 0 if unsuccessful.
+ */
+int
+traverse_int_dir2block(xfs_mount_t	*mp,
+		dir2_bt_cursor_t	*da_cursor,
+		xfs_dablk_t		*rbno)
+{
+	bmap_ext_t		*bmp;
+	xfs_dablk_t		bno;
+	xfs_dabuf_t		*bp;
+	int			i;
+	int			nex;
+	xfs_da_intnode_t	*node;
+
+	/*
+	 * traverse down left-side of tree until we hit the
+	 * left-most leaf block setting up the btree cursor along
+	 * the way.
+	 */
+	bno = mp->m_dirleafblk;
+	i = -1;
+	node = NULL;
+	da_cursor->active = 0;
+
+	do {
+		/*
+		 * read in each block along the way and set up cursor
+		 */
+		nex = blkmap_getn(da_cursor->blkmap, bno, mp->m_dirblkfsbs,
+			&bmp);
+
+		if (nex == 0)
+			goto error_out;
+
+		bp = da_read_buf(mp, nex, bmp);
+		free(bmp);
+		if (bp == NULL) {
+			do_warn("can't read block %u for directory inode "
+				"%llu\n",
+				bno, da_cursor->ino);
+			goto error_out;
+		}
+
+		node = bp->data;
+
+		if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) ==
+					XFS_DIR2_LEAFN_MAGIC)  {
+			if ( i != -1 ) {
+				do_warn("found non-root LEAFN node in inode "
+					"%llu bno = %u\n",
+					da_cursor->ino, bno);
+			}
+			if (INT_GET(node->hdr.level, ARCH_CONVERT) >= 1) {
+				do_warn("LEAFN node level is %d inode %llu "
+					"bno = %u\n",
+					INT_GET(node->hdr.level, ARCH_CONVERT),
+						da_cursor->ino, bno);
+			}
+			*rbno = 0;
+			da_brelse(bp);
+			return(1);
+		} else if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) !=
+					XFS_DA_NODE_MAGIC)  {
+			da_brelse(bp);
+			do_warn("bad dir magic number 0x%x in inode %llu "
+				"bno = %u\n",
+				INT_GET(node->hdr.info.magic, ARCH_CONVERT),
+					da_cursor->ino, bno);
+			goto error_out;
+		}
+		if (INT_GET(node->hdr.count, ARCH_CONVERT) >
+					XFS_DA_NODE_ENTRIES(mp))  {
+			da_brelse(bp);
+			do_warn("bad record count in inode %llu, count = %d, "
+				"max = %d\n", da_cursor->ino,
+				INT_GET(node->hdr.count, ARCH_CONVERT),
+				XFS_DA_NODE_ENTRIES(mp));
+			goto error_out;
+		}
+
+		/*
+		 * maintain level counter
+		 */
+		if (i == -1)
+			i = da_cursor->active =
+				INT_GET(node->hdr.level, ARCH_CONVERT);
+		else  {
+			if (INT_GET(node->hdr.level, ARCH_CONVERT) == i - 1)  {
+				i--;
+			} else  {
+				do_warn("bad directory btree for directory "
+					"inode %llu\n",
+					da_cursor->ino);
+				da_brelse(bp);
+				goto error_out;
+			}
+		}
+
+		da_cursor->level[i].hashval =
+			INT_GET(node->btree[0].hashval, ARCH_CONVERT);
+		da_cursor->level[i].bp = bp;
+		da_cursor->level[i].bno = bno;
+		da_cursor->level[i].index = 0;
+
+		/*
+		 * set up new bno for next level down
+		 */
+		bno = INT_GET(node->btree[0].before, ARCH_CONVERT);
+	} while (node != NULL && i > 1);
+
+	/*
+	 * now return block number and get out
+	 */
+	*rbno = da_cursor->level[0].bno = bno;
+	return(1);
+
+error_out:
+	while (i > 1 && i <= da_cursor->active)  {
+		da_brelse(da_cursor->level[i].bp);
+		i++;
+	}
+
+	return(0);
+}
+
+/*
+ * blow out buffer for this level and all the rest above as well
+ * if error == 0, we are not expecting to encounter any unreleased
+ * buffers (e.g. if we do, it's a mistake).  if error == 1, we're
+ * in an error-handling case so unreleased buffers may exist.
+ */
+void
+release_dir2_cursor_int(xfs_mount_t		*mp,
+			dir2_bt_cursor_t	*cursor,
+			int			prev_level,
+			int			error)
+{
+	int	level = prev_level + 1;
+
+	if (cursor->level[level].bp != NULL)  {
+		if (!error)  {
+			do_warn("release_dir2_cursor_int got unexpected "
+				"non-null bp, dabno = %u\n",
+				cursor->level[level].bno);
+		}
+		ASSERT(error != 0);
+
+		da_brelse(cursor->level[level].bp);
+		cursor->level[level].bp = NULL;
+	}
+
+	if (level < cursor->active)
+		release_dir2_cursor_int(mp, cursor, level, error);
+
+	return;
+}
+
+void
+release_dir2_cursor(xfs_mount_t		*mp,
+		dir2_bt_cursor_t	*cursor,
+		int			prev_level)
+{
+	release_dir2_cursor_int(mp, cursor, prev_level, 0);
+}
+
+void
+err_release_dir2_cursor(xfs_mount_t		*mp,
+			dir2_bt_cursor_t	*cursor,
+			int			prev_level)
+{
+	release_dir2_cursor_int(mp, cursor, prev_level, 1);
+}
+
+/*
+ * make sure that all entries in all blocks along the right side of
+ * of the tree are used and hashval's are consistent.  level is the
+ * level of the descendent block.  returns 0 if good (even if it had
+ * to be fixed up), and 1 if bad.  The right edge of the tree is
+ * technically a block boundary.  This routine should be used then
+ * instead of verify_dir2_path().
+ */
+int
+verify_final_dir2_path(xfs_mount_t	*mp,
+		dir2_bt_cursor_t	*cursor,
+		const int		p_level)
+{
+	xfs_da_intnode_t	*node;
+	int			bad = 0;
+	int			entry;
+	int			this_level = p_level + 1;
+
+	/*
+	 * the index should point to the next "unprocessed" entry
+	 * in the block which should be the final (rightmost) entry
+	 */
+	entry = cursor->level[this_level].index;
+	node = (xfs_da_intnode_t *)(cursor->level[this_level].bp->data);
+	/*
+	 * check internal block consistency on this level -- ensure
+	 * that all entries are used, encountered and expected hashvals
+	 * match, etc.
+	 */
+	if (entry != INT_GET(node->hdr.count, ARCH_CONVERT) - 1)  {
+		do_warn("directory block used/count inconsistency - %d / %hu\n",
+			entry, INT_GET(node->hdr.count, ARCH_CONVERT));
+		bad++;
+	}
+	/*
+	 * hash values monotonically increasing ???
+	 */
+	if (cursor->level[this_level].hashval >= INT_GET(node->btree[entry].hashval, ARCH_CONVERT))  {
+		do_warn("directory/attribute block hashvalue inconsistency, "
+			"expected > %u / saw %u\n",
+			cursor->level[this_level].hashval,
+			INT_GET(node->btree[entry].hashval, ARCH_CONVERT));
+		bad++;
+	}
+	if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) != 0)  {
+		do_warn("bad directory/attribute forward block pointer, "
+			"expected 0, saw %u\n",
+			INT_GET(node->hdr.info.forw, ARCH_CONVERT));
+		bad++;
+	}
+	if (bad)  {
+		do_warn("bad directory block in inode %llu\n", cursor->ino);
+		return(1);
+	}
+	/*
+	 * keep track of greatest block # -- that gets
+	 * us the length of the directory
+	 */
+	if (cursor->level[this_level].bno > cursor->greatest_bno)
+		cursor->greatest_bno = cursor->level[this_level].bno;
+
+	/*
+	 * ok, now check descendant block number against this level
+	 */
+	if (cursor->level[p_level].bno != INT_GET(node->btree[entry].before, ARCH_CONVERT))  {
+		return(1);
+	}
+
+	if (cursor->level[p_level].hashval != INT_GET(node->btree[entry].hashval, ARCH_CONVERT))  {
+		if (!no_modify)  {
+			do_warn("correcting bad hashval in non-leaf dir "
+				"block\n");
+			do_warn("\tin (level %d) in inode %llu.\n",
+				this_level, cursor->ino);
+			INT_SET(node->btree[entry].hashval, ARCH_CONVERT, cursor->level[p_level].hashval);
+			cursor->level[this_level].dirty++;
+		} else  {
+			do_warn("would correct bad hashval in non-leaf dir "
+				"block\n");
+			do_warn("\tin (level %d) in inode %llu.\n",
+				this_level, cursor->ino);
+		}
+	}
+
+	/*
+	 * release/write buffer
+	 */
+	ASSERT(cursor->level[this_level].dirty == 0 ||
+		cursor->level[this_level].dirty && !no_modify);
+
+	if (cursor->level[this_level].dirty && !no_modify)
+		da_bwrite(mp, cursor->level[this_level].bp);
+	else
+		da_brelse(cursor->level[this_level].bp);
+
+	cursor->level[this_level].bp = NULL;
+
+	/*
+	 * bail out if this is the root block (top of tree)
+	 */
+	if (this_level >= cursor->active)  {
+		return(0);
+	}
+	/*
+	 * set hashvalue to correctl reflect the now-validated
+	 * last entry in this block and continue upwards validation
+	 */
+	cursor->level[this_level].hashval = INT_GET(node->btree[entry].hashval, ARCH_CONVERT);
+
+	return(verify_final_dir2_path(mp, cursor, this_level));
+}
+
+/*
+ * Verifies the path from a descendant block up to the root.
+ * Should be called when the descendant level traversal hits
+ * a block boundary before crossing the boundary (reading in a new
+ * block).
+ *
+ * the directory/attr btrees work differently to the other fs btrees.
+ * each interior block contains records that are <hashval, bno>
+ * pairs.  The bno is a file bno, not a filesystem bno.  The last
+ * hashvalue in the block <bno> will be <hashval>.  BUT unlike
+ * the freespace btrees, the *last* value in each block gets
+ * propagated up the tree instead of the first value in each block.
+ * that is, the interior records point to child blocks and the *greatest*
+ * hash value contained by the child block is the one the block above
+ * uses as the key for the child block.
+ *
+ * level is the level of the descendent block.  returns 0 if good,
+ * and 1 if bad.  The descendant block may be a leaf block.
+ *
+ * the invariant here is that the values in the cursor for the
+ * levels beneath this level (this_level) and the cursor index
+ * for this level *must* be valid.
+ *
+ * that is, the hashval/bno info is accurate for all
+ * DESCENDANTS and match what the node[index] information
+ * for the current index in the cursor for this level.
+ *
+ * the index values in the cursor for the descendant level
+ * are allowed to be off by one as they will reflect the
+ * next entry at those levels to be processed.
+ *
+ * the hashvalue for the current level can't be set until
+ * we hit the last entry in the block so, it's garbage
+ * until set by this routine.
+ *
+ * bno and bp for the current block/level are always valid
+ * since they have to be set so we can get a buffer for the
+ * block.
+ */
+int
+verify_dir2_path(xfs_mount_t	*mp,
+	dir2_bt_cursor_t	*cursor,
+	const int		p_level)
+{
+	xfs_da_intnode_t	*node;
+	xfs_da_intnode_t	*newnode;
+	xfs_dablk_t		dabno;
+	xfs_dabuf_t		*bp;
+	int			bad;
+	int			entry;
+	int			this_level = p_level + 1;
+	bmap_ext_t		*bmp;
+	int			nex;
+
+	/*
+	 * index is currently set to point to the entry that
+	 * should be processed now in this level.
+	 */
+	entry = cursor->level[this_level].index;
+	node = cursor->level[this_level].bp->data;
+
+	/*
+	 * if this block is out of entries, validate this
+	 * block and move on to the next block.
+	 * and update cursor value for said level
+	 */
+	if (entry >= INT_GET(node->hdr.count, ARCH_CONVERT))  {
+		/*
+		 * update the hash value for this level before
+		 * validating it.  bno value should be ok since
+		 * it was set when the block was first read in.
+		 */
+		cursor->level[this_level].hashval = 
+				INT_GET(node->btree[entry - 1].hashval, ARCH_CONVERT);
+
+		/*
+		 * keep track of greatest block # -- that gets
+		 * us the length of the directory
+		 */
+		if (cursor->level[this_level].bno > cursor->greatest_bno)
+			cursor->greatest_bno = cursor->level[this_level].bno;
+
+		/*
+		 * validate the path for the current used-up block
+		 * before we trash it
+		 */
+		if (verify_dir2_path(mp, cursor, this_level))
+			return(1);
+		/*
+		 * ok, now get the next buffer and check sibling pointers
+		 */
+		dabno = INT_GET(node->hdr.info.forw, ARCH_CONVERT);
+		ASSERT(dabno != 0);
+		nex = blkmap_getn(cursor->blkmap, dabno, mp->m_dirblkfsbs,
+			&bmp);
+		if (nex == 0) {
+			do_warn("can't get map info for block %u of directory "
+				"inode %llu\n",
+				dabno, cursor->ino);
+			return(1);
+		}
+
+		bp = da_read_buf(mp, nex, bmp);
+
+		if (bp == NULL) {
+			do_warn("can't read block %u for directory inode "
+				"%llu\n",
+				dabno, cursor->ino);
+			return(1);
+		}
+
+		newnode = bp->data;
+		/*
+		 * verify magic number and back pointer, sanity-check
+		 * entry count, verify level
+		 */
+		bad = 0;
+		if (INT_GET(newnode->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+			do_warn("bad magic number %x in block %u for directory "
+				"inode %llu\n",
+				INT_GET(newnode->hdr.info.magic, ARCH_CONVERT), dabno, cursor->ino);
+			bad++;
+		}
+		if (INT_GET(newnode->hdr.info.back, ARCH_CONVERT) != cursor->level[this_level].bno)  {
+			do_warn("bad back pointer in block %u for directory "
+				"inode %llu\n",
+				dabno, cursor->ino);
+			bad++;
+		}
+		if (INT_GET(newnode->hdr.count, ARCH_CONVERT) > XFS_DA_NODE_ENTRIES(mp))  {
+			do_warn("entry count %d too large in block %u for "
+				"directory inode %llu\n",
+				INT_GET(newnode->hdr.count, ARCH_CONVERT), dabno, cursor->ino);
+			bad++;
+		}
+		if (INT_GET(newnode->hdr.level, ARCH_CONVERT) != this_level)  {
+			do_warn("bad level %d in block %u for directory inode "
+				"%llu\n",
+				INT_GET(newnode->hdr.level, ARCH_CONVERT), dabno, cursor->ino);
+			bad++;
+		}
+		if (bad)  {
+			da_brelse(bp);
+			return(1);
+		}
+		/*
+		 * update cursor, write out the *current* level if
+		 * required.  don't write out the descendant level
+		 */
+		ASSERT(cursor->level[this_level].dirty == 0 ||
+			cursor->level[this_level].dirty && !no_modify);
+
+		if (cursor->level[this_level].dirty && !no_modify)
+			da_bwrite(mp, cursor->level[this_level].bp);
+		else
+			da_brelse(cursor->level[this_level].bp);
+		cursor->level[this_level].bp = bp;
+		cursor->level[this_level].dirty = 0;
+		cursor->level[this_level].bno = dabno;
+		cursor->level[this_level].hashval = INT_GET(newnode->btree[0].hashval, ARCH_CONVERT);
+		node = newnode;
+
+		entry = cursor->level[this_level].index = 0;
+	}
+	/*
+	 * ditto for block numbers
+	 */
+	if (cursor->level[p_level].bno != INT_GET(node->btree[entry].before, ARCH_CONVERT))  {
+		return(1);
+	}
+	/*
+	 * ok, now validate last hashvalue in the descendant
+	 * block against the hashval in the current entry
+	 */
+	if (cursor->level[p_level].hashval != INT_GET(node->btree[entry].hashval, ARCH_CONVERT))  {
+		if (!no_modify)  {
+			do_warn("correcting bad hashval in interior dir "
+				"block\n");
+			do_warn("\tin (level %d) in inode %llu.\n",
+				this_level, cursor->ino);
+			INT_SET(node->btree[entry].hashval, ARCH_CONVERT, cursor->level[p_level].hashval);
+			cursor->level[this_level].dirty++;
+		} else  {
+			do_warn("would correct bad hashval in interior dir "
+				"block\n");
+			do_warn("\tin (level %d) in inode %llu.\n",
+				this_level, cursor->ino);
+		}
+	}
+	/*
+	 * increment index for this level to point to next entry
+	 * (which should point to the next descendant block)
+	 */
+	cursor->level[this_level].index++;
+	return(0);
+}
+
+/*
+ * Fix up a shortform directory which was in long form (i8count set)
+ * and is now in short form (i8count clear).
+ * Return pointer to the end of the data when done.
+ */
+void
+process_sf_dir2_fixi8(
+	xfs_dir2_sf_t		*sfp,
+	xfs_dir2_sf_entry_t	**next_sfep)
+{
+	xfs_ino_t		ino;
+	xfs_dir2_sf_t		*newsfp;
+	xfs_dir2_sf_entry_t	*newsfep;
+	xfs_dir2_sf_t		*oldsfp;
+	xfs_dir2_sf_entry_t	*oldsfep;
+	int			oldsize;
+
+	newsfp = sfp;
+	oldsize = (__psint_t)*next_sfep - (__psint_t)sfp;
+	oldsfp = malloc(oldsize);
+	if (oldsfp == NULL) {
+		do_error("couldn't malloc dir2 shortform copy\n");
+		exit(1);
+	}
+	memmove(oldsfp, newsfp, oldsize);
+	INT_SET(newsfp->hdr.count, ARCH_CONVERT, INT_GET(oldsfp->hdr.count, ARCH_CONVERT));
+	newsfp->hdr.i8count = 0;
+	ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT);
+	XFS_DIR2_SF_PUT_INUMBER_ARCH(newsfp, &ino, &newsfp->hdr.parent, ARCH_CONVERT);
+	oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+	newsfep = XFS_DIR2_SF_FIRSTENTRY(newsfp);
+	while ((int)((char *)oldsfep - (char *)oldsfp) < oldsize) {
+		newsfep->namelen = oldsfep->namelen;
+		XFS_DIR2_SF_PUT_OFFSET_ARCH(newsfep,
+			XFS_DIR2_SF_GET_OFFSET_ARCH(oldsfep, ARCH_CONVERT), ARCH_CONVERT);
+		memmove(newsfep->name, oldsfep->name, newsfep->namelen);
+		ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
+			XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
+		XFS_DIR2_SF_PUT_INUMBER_ARCH(newsfp, &ino,
+			XFS_DIR2_SF_INUMBERP(newsfep), ARCH_CONVERT);
+		oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep);
+		newsfep = XFS_DIR2_SF_NEXTENTRY(newsfp, newsfep);
+	}
+	*next_sfep = newsfep;
+	free(oldsfp);
+}
+
+/*
+ * Regenerate legal (minimal) offsets for the shortform directory.
+ */
+static void
+process_sf_dir2_fixoff(
+	xfs_dinode_t	*dip)
+{
+	int			i;
+	int			offset;
+	xfs_dir2_sf_entry_t	*sfep;
+	xfs_dir2_sf_t		*sfp;
+
+	for (i = 0, sfp = &dip->di_u.di_dir2sf,
+		sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
+		offset = XFS_DIR2_DATA_FIRST_OFFSET;
+	     i < INT_GET(sfp->hdr.count, ARCH_CONVERT);
+	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+		XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
+		offset += XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+	}
+}
+
+/*
+ * this routine performs inode discovery and tries to fix things
+ * in place.  available redundancy -- inode data size should match
+ * used directory space in inode.
+ * a non-zero return value means the directory is bogus and should be blasted.
+ */
+/* ARGSUSED */
+static int
+process_sf_dir2(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dino_dirty,	/* out - 1 if dinode buffer dirty */
+	char		*dirname,	/* directory pathname */
+	xfs_ino_t	*parent,	/* out - NULLFSINO if entry not exist */
+	int		*repair)	/* out - 1 if dir was fixed up */
+{
+	int			bad_offset;
+	int			bad_sfnamelen;
+	int			i;
+	int			i8;
+	__int64_t		ino_dir_size;
+	int			ino_off;
+	ino_tree_node_t		*irec_p;
+	int			junkit;
+	char			*junkreason = NULL;
+	xfs_ino_t		lino;
+	int			max_size;
+	char			name[MAXNAMELEN + 1];
+	int			namelen;
+	xfs_dir2_sf_entry_t	*next_sfep;
+	int			num_entries;
+	int			offset;
+	xfs_dir2_sf_t		*sfp;
+	xfs_dir2_sf_entry_t	*sfep;
+	int			tmp_elen;
+	int			tmp_len;
+	xfs_dir2_sf_entry_t	*tmp_sfep;
+	xfs_ino_t		zero = 0;
+
+	sfp = &dip->di_u.di_dir2sf;
+	max_size = XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT);
+	num_entries = INT_GET(sfp->hdr.count, ARCH_CONVERT);
+	ino_dir_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
+	offset = XFS_DIR2_DATA_FIRST_OFFSET;
+	i8 = bad_offset = *repair = 0;
+
+	ASSERT(ino_dir_size <= max_size);
+
+	/* 
+	 * check for bad entry count
+	 */
+	if (num_entries * XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, 1) +
+		    XFS_DIR2_SF_HDR_SIZE(0) > max_size ||
+	    num_entries == 0)
+		num_entries = 0xFF;
+
+	/*
+	 * run through entries, stop at first bad entry, don't need
+	 * to check for .. since that's encoded in its own field
+	 */
+	sfep = next_sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0;
+	     i < num_entries && ino_dir_size > (char *)next_sfep - (char *)sfp;
+	     i++) {
+		tmp_sfep = NULL;
+		sfep = next_sfep;
+		junkit = 0;
+		bad_sfnamelen = 0;
+		lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+		/*
+		 * if entry points to self, junk it since only '.' or '..'
+		 * should do that and shortform dirs don't contain either
+		 * entry.  if inode number is invalid, trash entry.
+		 * if entry points to special inodes, trash it.
+		 * if inode is unknown but number is valid,
+		 * add it to the list of uncertain inodes.  don't
+		 * have to worry about an entry pointing to a
+		 * deleted lost+found inode because the entry was
+		 * deleted at the same time that the inode was cleared.
+		 */
+		if (lino == ino) {
+			junkit = 1;
+			junkreason = "current";
+		} else if (verify_inum(mp, lino)) {
+			junkit = 1;
+			junkreason = "invalid";
+		} else if (lino == mp->m_sb.sb_rbmino)  {
+			junkit = 1;
+			junkreason = "realtime bitmap";
+		} else if (lino == mp->m_sb.sb_rsumino)  {
+			junkit = 1;
+			junkreason = "realtime summary";
+		} else if (lino == mp->m_sb.sb_uquotino)  {
+			junkit = 1;
+			junkreason = "user quota";
+		} else if (lino == mp->m_sb.sb_pquotino)  {
+			junkit = 1;
+			junkreason = "project quota";
+		} else if ((irec_p = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+					XFS_INO_TO_AGINO(mp, lino))) != NULL) {
+			/*
+			 * if inode is marked free and we're in inode
+			 * discovery mode, leave the entry alone for now.
+			 * if the inode turns out to be used, we'll figure
+			 * that out when we scan it.  If the inode really
+			 * is free, we'll hit this code again in phase 4
+			 * after we've finished inode discovery and blow
+			 * out the entry then.
+			 */
+			ino_off = XFS_INO_TO_AGINO(mp, lino) -
+				irec_p->ino_startnum;
+			ASSERT(is_inode_confirmed(irec_p, ino_off));
+			if (is_inode_free(irec_p, ino_off) && !ino_discovery) {
+				junkit = 1;
+				junkreason = "free";
+			}
+		} else if (ino_discovery) {
+			/*
+			 * put the inode on the uncertain list.  we'll
+			 * pull the inode off the list and check it later.
+			 * if the inode turns out be bogus, we'll delete
+			 * this entry in phase 6.
+			 */
+			add_inode_uncertain(mp, lino, 0);
+		} else  {
+			/*
+			 * blow the entry out.  we know about all
+			 * undiscovered entries now (past inode discovery
+			 * phase) so this is clearly a bogus entry.
+			 */
+			junkit = 1;
+			junkreason = "non-existent";
+		}
+		namelen = sfep->namelen;
+		if (junkit)
+			do_warn("entry \"%*.*s\" in shortform directory %llu "
+				"references %s inode %llu\n",
+				namelen, namelen, sfep->name, ino, junkreason,
+				lino);
+		if (namelen == 0)  {
+			/*
+			 * if we're really lucky, this is
+			 * the last entry in which case we
+			 * can use the dir size to set the
+			 * namelen value.  otherwise, forget
+			 * it because we're not going to be
+			 * able to find the next entry.
+			 */
+			bad_sfnamelen = 1;
+
+			if (i == num_entries - 1)  {
+				namelen = ino_dir_size -
+					((__psint_t) &sfep->name[0] -
+					 (__psint_t) sfp);
+				if (!no_modify)  {
+					do_warn("zero length entry in "
+						"shortform dir %llu, resetting "
+						"to %d\n",
+						ino, namelen);
+					sfep->namelen = namelen;
+				} else  {
+					do_warn("zero length entry in "
+						"shortform dir %llu, would set "
+						"to %d\n",
+						ino, namelen);
+				}
+			} else  {
+				do_warn("zero length entry in shortform dir "
+					"%llu",
+					ino);
+				if (!no_modify)
+					do_warn(", junking %d entries\n",
+						num_entries - i);
+				else
+					do_warn(", would junk %d entries\n",
+						num_entries - i);
+				/*
+				 * don't process the rest of the directory,
+				 * break out of processing looop
+				 */
+				break;
+			}
+		} else if ((__psint_t) sfep - (__psint_t) sfp +
+				+ XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep)
+				> ino_dir_size)  {
+			bad_sfnamelen = 1;
+
+			if (i == num_entries - 1)  {
+				namelen = ino_dir_size -
+					((__psint_t) &sfep->name[0] -
+					 (__psint_t) sfp);
+				do_warn("size of last entry overflows space "
+					"left in in shortform dir %llu, ",
+					ino);
+				if (!no_modify)  {
+					do_warn("resetting to %d\n",
+						namelen);
+					sfep->namelen = namelen;
+					*dino_dirty = 1;
+				} else  {
+					do_warn("would reset to %d\n",
+						namelen);
+				}
+			} else  {
+				do_warn("size of entry #%d overflows space "
+					"left in in shortform dir %llu\n",
+					i, ino);
+				if (!no_modify)  {
+					if (i == num_entries - 1)
+						do_warn("junking entry #%d\n",
+							i);
+					else
+						do_warn("junking %d entries\n",
+							num_entries - i);
+				} else  {
+					if (i == num_entries - 1)
+						do_warn("would junk entry "
+							"#%d\n",
+							i);
+					else
+						do_warn("would junk %d "
+							"entries\n",
+							num_entries - i);
+				}
+
+				break;
+			}
+		}
+
+		/*
+		 * check for illegal chars in name.
+		 * no need to check for bad length because
+		 * the length value is stored in a byte
+		 * so it can't be too big, it can only wrap
+		 */
+		if (namecheck((char *)&sfep->name[0], namelen))  {
+			/*
+			 * junk entry
+			 */
+			do_warn("entry contains illegal character in shortform "
+				"dir %llu\n",
+				ino);
+			junkit = 1;
+		}
+
+		if (XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) < offset) {
+			do_warn("entry contains offset out of order in "
+				"shortform dir %llu\n",
+				ino);
+			bad_offset = 1;
+		}
+		offset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) +
+			 XFS_DIR2_DATA_ENTSIZE(namelen);
+
+		/*
+		 * junk the entry by copying up the rest of the
+		 * fork over the current entry and decrementing
+		 * the entry count.  if we're in no_modify mode,
+		 * just issue the warning instead.  then continue
+		 * the loop with the next_sfep pointer set to the
+		 * correct place in the fork and other counters
+		 * properly set to reflect the deletion if it
+		 * happened.
+		 */
+		if (junkit)  {
+			bcopy(sfep->name, name, namelen);
+			name[namelen] = '\0';
+
+			if (!no_modify)  {
+				tmp_elen =
+					XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep);
+				INT_MOD(dip->di_core.di_size, ARCH_CONVERT, -(tmp_elen));
+				ino_dir_size -= tmp_elen;
+
+				tmp_sfep = (xfs_dir2_sf_entry_t *)
+					((__psint_t) sfep + tmp_elen);
+				tmp_len = max_size - ((__psint_t) tmp_sfep
+							- (__psint_t) sfp);
+
+				memmove(sfep, tmp_sfep, tmp_len);
+
+				INT_MOD(sfp->hdr.count, ARCH_CONVERT, -1);
+				num_entries--;
+				bzero((void *) ((__psint_t) sfep + tmp_len),
+					tmp_elen);
+
+				/*
+				 * reset the tmp value to the current
+				 * pointer so we'll process the entry
+				 * we just moved up
+				 */
+				tmp_sfep = sfep;
+
+				/*
+				 * WARNING:  drop the index i by one
+				 * so it matches the decremented count
+				 * for accurate comparisons later
+				 */
+				i--;
+
+				*dino_dirty = 1;
+				*repair = 1;
+
+				do_warn("junking entry \"%s\" in directory "
+					"inode %llu\n",
+					name, ino);
+			} else  {
+				do_warn("would have junked entry \"%s\" in "
+					"directory inode %llu\n",
+					name, ino);
+			}
+		} else if (lino > XFS_DIR2_MAX_SHORT_INUM)
+			i8++;
+		/*
+		 * go onto next entry unless we've just junked an
+		 * entry in which the current entry pointer points
+		 * to an unprocessed entry.  have to take into zero-len
+		 * entries into account in no modify mode since we
+		 * calculate size based on next_sfep.
+		 */
+		next_sfep = (tmp_sfep == NULL)
+			? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep
+				+ ((!bad_sfnamelen)
+					? XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,
+						sfep)
+					: XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,
+						namelen)))
+			: tmp_sfep;
+	}
+
+	/* sync up sizes and entry counts */
+
+	if (INT_GET(sfp->hdr.count, ARCH_CONVERT) != i) {
+		if (no_modify) {
+			do_warn("would have corrected entry count in directory "
+				"%llu from %d to %d\n",
+				ino, INT_GET(sfp->hdr.count, ARCH_CONVERT), i);
+		} else {
+			do_warn("corrected entry count in directory %llu, was "
+				"%d, now %d\n",
+				ino, INT_GET(sfp->hdr.count, ARCH_CONVERT), i);
+			INT_SET(sfp->hdr.count, ARCH_CONVERT, i);
+			*dino_dirty = 1;
+			*repair = 1;
+		}
+	}
+
+	if (sfp->hdr.i8count != i8)  {
+		if (no_modify)  {
+			do_warn("would have corrected i8 count in directory "
+				"%llu from %d to %d\n",
+				ino, sfp->hdr.i8count, i8);
+		} else {
+			do_warn("corrected i8 count in directory %llu, was %d, "
+				"now %d\n",
+				ino, sfp->hdr.i8count, i8);
+			if (i8 == 0)
+				process_sf_dir2_fixi8(sfp, &next_sfep);
+			else
+				sfp->hdr.i8count = i8;
+			*dino_dirty = 1;
+			*repair = 1;
+		}
+	}
+
+	if ((__psint_t) next_sfep - (__psint_t) sfp != ino_dir_size)  {
+		if (no_modify)  {
+			do_warn("would have corrected directory %llu size from "
+				"%lld to %lld\n",
+				ino, (__int64_t) ino_dir_size,
+				(__int64_t)((__psint_t)next_sfep -
+					    (__psint_t)sfp));
+		} else  {
+			do_warn("corrected directory %llu size, was %lld, now "
+				"%lld\n",
+				ino, (__int64_t) ino_dir_size,
+				(__int64_t)((__psint_t)next_sfep -
+					    (__psint_t)sfp));
+
+			INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t)((__psint_t)next_sfep -
+					      (__psint_t)sfp));
+			*dino_dirty = 1;
+			*repair = 1;
+		}
+	}
+	if (offset + (INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2) * sizeof(xfs_dir2_leaf_entry_t) +
+	    sizeof(xfs_dir2_block_tail_t) > mp->m_dirblksize) {
+		do_warn("directory %llu offsets too high\n", ino);
+		bad_offset = 1;
+	}
+	if (bad_offset) {
+		if (no_modify) {
+			do_warn("would have corrected entry offsets in "
+				"directory %llu\n",
+				ino);
+		} else {
+			do_warn("corrected entry offsets in directory %llu\n",
+				ino);
+			process_sf_dir2_fixoff(dip);
+			*dino_dirty = 1;
+			*repair = 1;
+		}
+	}
+
+	/*
+	 * check parent (..) entry
+	 */
+	*parent = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT);
+
+	/*
+	 * if parent entry is bogus, null it out.  we'll fix it later .
+	 */
+	if (verify_inum(mp, *parent))  {
+
+		do_warn("bogus .. inode number (%llu) in directory inode "
+			"%llu, ",
+				*parent, ino);
+		*parent = NULLFSINO;
+		if (!no_modify)  {
+			do_warn("clearing inode number\n");
+
+			XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &zero, &sfp->hdr.parent, ARCH_CONVERT);
+			*dino_dirty = 1;
+			*repair = 1;
+		} else  {
+			do_warn("would clear inode number\n");
+		}
+	} else if (ino == mp->m_sb.sb_rootino && ino != *parent) {
+		/*
+		 * root directories must have .. == .
+		 */
+		if (!no_modify)  {
+			do_warn("corrected root directory %llu .. entry, was "
+				"%llu, now %llu\n",
+				ino, *parent, ino);
+			*parent = ino;
+			XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, parent, &sfp->hdr.parent, ARCH_CONVERT);
+			*dino_dirty = 1;
+			*repair = 1;
+		} else  {
+			do_warn("would have corrected root directory %llu .. "
+				"entry from %llu to %llu\n",
+				ino, *parent, ino);
+		}
+	} else if (ino == *parent && ino != mp->m_sb.sb_rootino)  {
+		/*
+		 * likewise, non-root directories can't have .. pointing
+		 * to .
+		 */
+		*parent = NULLFSINO;
+		do_warn("bad .. entry in directory inode %llu, points to "
+			"self,",
+			ino);
+		if (!no_modify)  {
+			do_warn(" clearing inode number\n");
+
+			XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &zero, &sfp->hdr.parent, ARCH_CONVERT);
+			*dino_dirty = 1;
+			*repair = 1;
+		} else  {
+			do_warn(" would clear inode number\n");
+		}
+	}
+
+	return(0);
+}
+
+/*
+ * Process one directory data block.
+ */
+/* ARGSUSED */
+static int
+process_dir2_data(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	char		*dirname,	/* directory pathname */
+	xfs_ino_t	*parent,	/* out - NULLFSINO if entry not exist */
+	xfs_dabuf_t	*bp,
+	int		*dot,		/* out - 1 if there is a dot, else 0 */
+	int		*dotdot,	/* out - 1 if there's a dotdot, else 0 */
+	xfs_dablk_t	da_bno,
+	char		*endptr)
+{
+	int			badbest;
+	xfs_dir2_data_free_t	*bf;
+	int			clearino;
+	char			*clearreason = NULL;
+	xfs_dir2_data_t		*d;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_free_t	*dfp;
+	xfs_dir2_data_unused_t	*dup;
+	int			freeseen;
+	int			i;
+	int			ino_off;
+	ino_tree_node_t		*irec_p;
+	int			junkit;
+	int			lastfree;
+	int			nm_illegal;
+	char			*ptr;
+
+	d = bp->data;
+	bf = d->hdr.bestfree;
+	ptr = (char *)d->u;
+	badbest = lastfree = freeseen = 0;
+	if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) {
+		badbest |= INT_GET(bf[0].offset, ARCH_CONVERT) != 0;
+		freeseen |= 1 << 0;
+	}
+	if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) {
+		badbest |= INT_GET(bf[1].offset, ARCH_CONVERT) != 0;
+		freeseen |= 1 << 1;
+	}
+	if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) {
+		badbest |= INT_GET(bf[2].offset, ARCH_CONVERT) != 0;
+		freeseen |= 1 << 2;
+	}
+	badbest |= INT_GET(bf[0].length, ARCH_CONVERT) < INT_GET(bf[1].length, ARCH_CONVERT);
+	badbest |= INT_GET(bf[1].length, ARCH_CONVERT) < INT_GET(bf[2].length, ARCH_CONVERT);
+	while (ptr < endptr) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		/*
+		 * If it's unused, look for the space in the bestfree table.
+		 * If we find it, account for that, else make sure it doesn't
+		 * need to be there.
+		 */
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr || INT_GET(dup->length, ARCH_CONVERT) == 0 ||
+			    (INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1)))
+				break;
+			if (INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) !=
+			    (char *)dup - (char *)d)
+				break;
+			badbest |= lastfree != 0;
+			dfp = xfs_dir2_data_freefind(d, dup);
+			if (dfp) {
+				i = dfp - bf;
+				badbest |= (freeseen & (1 << i)) != 0;
+				freeseen |= 1 << i;
+			} else
+				badbest |= INT_GET(dup->length, ARCH_CONVERT) > INT_GET(bf[2].length, ARCH_CONVERT);
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+			lastfree = 1;
+			continue;
+		}
+		dep = (xfs_dir2_data_entry_t *)ptr;
+		if (ptr + XFS_DIR2_DATA_ENTSIZE(dep->namelen) > endptr)
+			break;
+		if (INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) != (char *)dep - (char *)d)
+			break;
+		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		lastfree = 0;
+	}
+	/*
+	 * Dropped out before we processed everything, give up.
+	 * Phase 6 will kill this block if we don't kill the inode.
+	 */
+	if (ptr != endptr) {
+		do_warn("corrupt block %u in directory inode %llu\n",
+			da_bno, ino);
+		if (!no_modify)
+			do_warn("\twill junk block\n");
+		else
+			do_warn("\twould junk block\n");
+		return 1;
+	}
+	ptr = (char *)d->u;
+	/*
+	 * Process the entries now.
+	 */
+	while (ptr < endptr) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+			continue;
+		}
+		dep = (xfs_dir2_data_entry_t *)ptr;
+		/*
+		 * We may have to blow out an entry because of bad inode
+		 * numbers.  Do NOT touch the name until after we've computed
+		 * the hashvalue and done a namecheck() on the name.
+		 */
+		if (!ino_discovery && INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO) {
+			/*
+			 * Don't do a damned thing.  We already found this
+			 * (or did it ourselves) during phase 3.
+			 */
+			clearino = 0;
+		} else if (verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT))) {
+			/*
+			 * Bad inode number.  Clear the inode number and the
+			 * entry will get removed later.  We don't trash the
+			 * directory since it's still structurally intact.
+			 */
+			clearino = 1;
+			clearreason = "invalid";
+		} else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_rbmino) {
+			clearino = 1;
+			clearreason = "realtime bitmap";
+		} else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_rsumino) {
+			clearino = 1;
+			clearreason = "realtime summary";
+		} else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_uquotino) {
+			clearino = 1;
+			clearreason = "user quota";
+		} else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_pquotino) {
+			clearino = 1;
+			clearreason = "project quota";
+		} else if (INT_GET(dep->inumber, ARCH_CONVERT) == old_orphanage_ino) {
+			/*
+			 * Do nothing, silently ignore it, entry has already
+			 * been marked TBD since old_orphanage_ino is set
+			 * non-zero.
+			 */
+			clearino = 0;
+		} else if ((irec_p = find_inode_rec(
+				XFS_INO_TO_AGNO(mp, INT_GET(dep->inumber, ARCH_CONVERT)),
+				XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)))) != NULL) {
+			/*
+			 * Inode recs should have only confirmed inodes in them.
+			 */
+			ino_off =
+				XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)) -
+				irec_p->ino_startnum;
+			ASSERT(is_inode_confirmed(irec_p, ino_off));
+			/*
+			 * If inode is marked free and we're in inode discovery
+			 * mode, leave the entry alone for now.  If the inode
+			 * turns out to be used, we'll figure that out when we
+			 * scan it.  If the inode really is free, we'll hit this
+			 * code again in phase 4 after we've finished inode
+			 * discovery and blow out the entry then.
+			 */
+			if (!ino_discovery && is_inode_free(irec_p, ino_off)) {
+				clearino = 1;
+				clearreason = "free";
+			} else
+				clearino = 0;
+		} else if (ino_discovery) {
+			add_inode_uncertain(mp, INT_GET(dep->inumber, ARCH_CONVERT), 0);
+			clearino = 0;
+		} else {
+			clearino = 1;
+			clearreason = "non-existent";
+		}
+		if (clearino)
+			do_warn("entry \"%*.*s\" at block %u offset %d in "
+				"directory inode %llu references %s inode "
+				"%llu\n",
+				dep->namelen, dep->namelen, dep->name,
+				da_bno, (char *)ptr - (char *)d, ino,
+				clearreason, INT_GET(dep->inumber, ARCH_CONVERT));
+		/*
+		 * If the name length is 0 (illegal) make it 1 and blast
+		 * the entry.
+		 */
+		if (dep->namelen == 0) {
+			do_warn("entry at block %u offset %d in directory "
+				"inode %llu has 0 namelength\n",
+				da_bno, (char *)ptr - (char *)d, ino);
+			if (!no_modify)
+				dep->namelen = 1;
+			clearino = 1;
+		}
+		/*
+		 * If needed to clear the inode number, do it now.
+		 */
+		if (clearino) {
+			if (!no_modify) {
+				do_warn("\tclearing inode number in entry at "
+					"offset %d...\n",
+					(char *)ptr - (char *)d);
+				INT_SET(dep->inumber, ARCH_CONVERT, BADFSINO);
+				bp->dirty = 1;
+			} else {
+				do_warn("\twould clear inode number in entry "
+					"at offset %d...\n",
+					(char *)ptr - (char *)d);
+			}
+		}
+		/*
+		 * Only complain about illegal names in phase 3 (when inode
+		 * discovery is turned on).  Otherwise, we'd complain a lot
+		 * during phase 4.
+		 */
+		junkit = INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO;
+		nm_illegal = namecheck((char *)dep->name, dep->namelen);
+		if (ino_discovery && nm_illegal) {
+			do_warn("entry at block %u offset %d in directory "
+				"inode %llu has illegal name \"%*.*s\": ",
+				da_bno, (char *)ptr - (char *)d, ino,
+				dep->namelen, dep->namelen, dep->name);
+			junkit = 1;
+		}
+		/*
+		 * Now we can mark entries with BADFSINO's bad.
+		 */
+		if (!no_modify && INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO) {
+			dep->name[0] = '/';
+			bp->dirty = 1;
+			junkit = 0;
+		}
+		/*
+		 * Special .. entry processing.
+		 */
+		if (dep->namelen == 2 &&
+		    dep->name[0] == '.' && dep->name[1] == '.') {
+			if (!*dotdot) {
+				(*dotdot)++;
+				*parent = INT_GET(dep->inumber, ARCH_CONVERT);
+				/*
+				 * What if .. == .?  Legal only in the root
+				 * inode.  Blow out entry and set parent to
+				 * NULLFSINO otherwise.
+				 */
+				if (ino == INT_GET(dep->inumber, ARCH_CONVERT) &&
+				    ino != mp->m_sb.sb_rootino) {
+					*parent = NULLFSINO;
+					do_warn("bad .. entry in directory "
+						"inode %llu, points to self: ",
+						ino);
+					junkit = 1;
+				}
+				/*
+				 * We have to make sure that . == .. in the
+				 * root inode.
+				 */
+				else if (ino != INT_GET(dep->inumber, ARCH_CONVERT) &&
+					   ino == mp->m_sb.sb_rootino) {
+					do_warn("bad .. entry in root "
+						"directory inode %llu, was "
+						"%llu: ",
+						ino, INT_GET(dep->inumber, ARCH_CONVERT));
+					if (!no_modify) {
+						do_warn("correcting\n");
+						INT_SET(dep->inumber, ARCH_CONVERT, ino);
+						bp->dirty = 1;
+					} else {
+						do_warn("would correct\n");
+					}
+				}
+			}
+			/*
+			 * Can't fix the directory unless we know which ..
+			 * entry is the right one.  Both have valid inode
+			 * numbers or we wouldn't be here.  So since both
+			 * seem equally valid, trash this one.
+			 */
+			else {
+				do_warn("multiple .. entries in directory "
+					"inode %llu: ",
+					ino);
+				junkit = 1;
+			}
+		}
+		/*
+		 * Special . entry processing.
+		 */
+		else if (dep->namelen == 1 && dep->name[0] == '.') {
+			if (!*dot) {
+				(*dot)++;
+				if (INT_GET(dep->inumber, ARCH_CONVERT) != ino) {
+					do_warn("bad . entry in directory "
+						"inode %llu, was %llu: ",
+						ino, INT_GET(dep->inumber, ARCH_CONVERT));
+					if (!no_modify) {
+						do_warn("correcting\n");
+						INT_SET(dep->inumber, ARCH_CONVERT, ino);
+						bp->dirty = 1;
+					} else {
+						do_warn("would correct\n");
+					}
+				}
+			} else {
+				do_warn("multiple . entries in directory "
+					"inode %llu: ",
+					ino);
+				junkit = 1;
+			}
+		}
+		/*
+		 * All other entries -- make sure only . references self.
+		 */
+		else if (INT_GET(dep->inumber, ARCH_CONVERT) == ino) {
+			do_warn("entry \"%*.*s\" in directory inode %llu "
+				"points to self: ",
+				dep->namelen, dep->namelen, dep->name, ino);
+			junkit = 1;
+		}
+		/*
+		 * Clear junked entries.
+		 */
+		if (junkit) {
+			if (!no_modify) {
+				dep->name[0] = '/';
+				bp->dirty = 1;
+				do_warn("clearing entry\n");
+			} else {
+				do_warn("would clear entry\n");
+			}
+		}
+		/*
+		 * Advance to the next entry.
+		 */
+		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+	}
+	/*
+	 * Check the bestfree table.
+	 */
+	if (freeseen != 7 || badbest) {
+		do_warn("bad bestfree table in block %u in directory inode "
+			"%llu: ",
+			da_bno, ino);
+		if (!no_modify) {
+			do_warn("repairing table\n");
+			libxfs_dir2_data_freescan(mp, d, &i, endptr);
+			bp->dirty = 1;
+		} else {
+			do_warn("would repair table\n");
+		}
+	}
+	return 0;
+}
+
+/*
+ * Process a block-format directory.
+ */
+/* ARGSUSED */
+static int
+process_block_dir2(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dino_dirty,	/* out - 1 if dinode buffer dirty */
+	char		*dirname,	/* directory pathname */
+	xfs_ino_t	*parent,	/* out - NULLFSINO if entry not exist */
+	blkmap_t	*blkmap,
+	int		*dot,		/* out - 1 if there is a dot, else 0 */
+	int		*dotdot,	/* out - 1 if there's a dotdot, else 0 */
+	int		*repair)	/* out - 1 if something was fixed */
+{
+	xfs_dir2_block_t	*block;
+	xfs_dir2_leaf_entry_t	*blp;
+	bmap_ext_t		*bmp;
+	xfs_dabuf_t		*bp;
+	xfs_dir2_block_tail_t	*btp;
+	int			nex;
+	int			rval;
+
+	*repair = *dot = *dotdot = 0;
+	*parent = NULLFSINO;
+	nex = blkmap_getn(blkmap, mp->m_dirdatablk, mp->m_dirblkfsbs, &bmp);
+	if (nex == 0) {
+		do_warn("block %u for directory inode %llu is missing\n",
+			mp->m_dirdatablk, ino);
+		return 1;
+	}
+	bp = da_read_buf(mp, nex, bmp);
+	free(bmp);
+	if (bp == NULL) {
+		do_warn("can't read block %u for directory inode %llu\n",
+			mp->m_dirdatablk, ino);
+		return 1;
+	}
+	/*
+	 * Verify the block
+	 */
+	block = bp->data;
+	if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC)
+		do_warn("bad directory block magic # %#x in block %u for "
+			"directory inode %llu\n",
+			INT_GET(block->hdr.magic, ARCH_CONVERT), mp->m_dirdatablk, ino);
+	/*
+	 * process the data area
+	 * this also checks & fixes the bestfree
+	 */
+	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	/*
+	 * Don't let this go past the end of the block.
+	 */
+	if ((char *)blp > (char *)btp)
+		blp = (xfs_dir2_leaf_entry_t *)btp;
+	rval = process_dir2_data(mp, ino, dip, ino_discovery, dirname, parent,
+		bp, dot, dotdot, mp->m_dirdatablk, (char *)blp);
+	if (bp->dirty && !no_modify) {
+		*repair = 1;
+		da_bwrite(mp, bp);
+	} else
+		da_brelse(bp);
+	return rval;
+}
+
+/*
+ * Validates leaf contents, node format directories only.
+ * magic number and sibling pointers checked by caller.
+ * Returns 0 if block is ok, 1 if the block is bad.
+ * Looking for: out of order hash values, bad stale counts.
+ */
+static int
+process_leaf_block_dir2(
+	xfs_mount_t		*mp,
+	xfs_dir2_leaf_t		*leaf,
+	xfs_dablk_t		da_bno,
+	xfs_ino_t		ino,
+	xfs_dahash_t		last_hashval,
+	xfs_dahash_t		*next_hashval)
+{
+	int			i;
+	int			stale;
+
+	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
+		if ((char *)&leaf->ents[i] >= (char *)leaf + mp->m_dirblksize) {
+			do_warn("bad entry count in block %u of directory "
+				"inode %llu\n",
+				da_bno, ino);
+			return 1;
+		}
+		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+			stale++;
+		else if (INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) < last_hashval) {
+			do_warn("bad hash ordering in block %u of directory "
+				"inode %llu\n",
+				da_bno, ino);
+			return 1;
+		}
+		*next_hashval = last_hashval = INT_GET(leaf->ents[i].hashval, ARCH_CONVERT);
+	}
+	if (stale != INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
+		do_warn("bad stale count in block %u of directory inode %llu\n",
+			da_bno, ino);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Returns 0 if the directory is ok, 1 if it has to be rebuilt.
+ */
+static int
+process_leaf_level_dir2(
+	xfs_mount_t		*mp,
+	dir2_bt_cursor_t	*da_cursor,
+	int			*repair)
+{ 
+	bmap_ext_t		*bmp;
+	xfs_dabuf_t		*bp;
+	int			buf_dirty;
+	xfs_dahash_t		current_hashval;
+	xfs_dablk_t		da_bno;
+	xfs_dahash_t		greatest_hashval;
+	xfs_ino_t		ino;
+	xfs_dir2_leaf_t		*leaf;
+	int			nex;
+	xfs_dablk_t		prev_bno;
+
+	da_bno = da_cursor->level[0].bno;
+	ino = da_cursor->ino;
+	prev_bno = 0;
+	bmp = NULL;
+	current_hashval = 0;
+	buf_dirty = 0;
+
+	do {
+		nex = blkmap_getn(da_cursor->blkmap, da_bno, mp->m_dirblkfsbs,
+			&bmp);
+		/*
+		 * Directory code uses 0 as the NULL block pointer since 0
+		 * is the root block and no directory block pointer can point
+		 * to the root block of the btree.
+		 */
+		ASSERT(da_bno != 0);
+
+		if (nex == 0) {
+			do_warn("can't map block %u for directory inode %llu\n",
+				da_bno, ino);
+			goto error_out;
+		}
+		bp = da_read_buf(mp, nex, bmp);
+		free(bmp);
+		bmp = NULL;
+		if (bp == NULL) {
+			do_warn("can't read file block %u for directory inode "
+				"%llu\n",
+				da_bno, ino);
+			goto error_out;
+		}
+		leaf = bp->data;
+		/*
+		 * Check magic number for leaf directory btree block.
+		 */
+		if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) {
+			do_warn("bad directory leaf magic # %#x for directory "
+				"inode %llu block %u\n",
+				INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino, da_bno);
+			da_brelse(bp);
+			goto error_out;
+		}
+		buf_dirty = 0;
+		/*
+		 * For each block, process the block, verify its path,
+		 * then get next block.  Update cursor values along the way.
+		 */
+		if (process_leaf_block_dir2(mp, leaf, da_bno, ino,
+				current_hashval, &greatest_hashval)) {
+			da_brelse(bp);
+			goto error_out;
+		}
+		/*
+		 * Index can be set to hdr.count so match the indices of the
+		 * interior blocks -- which at the end of the block will point
+		 * to 1 after the final real entry in the block.
+		 */
+		da_cursor->level[0].hashval = greatest_hashval;
+		da_cursor->level[0].bp = bp;
+		da_cursor->level[0].bno = da_bno;
+		da_cursor->level[0].index = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+		da_cursor->level[0].dirty = buf_dirty;
+
+		if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno) {
+			do_warn("bad sibling back pointer for block %u in "
+				"directory inode %llu\n",
+				da_bno, ino);
+			da_brelse(bp);
+			goto error_out;
+		}
+		prev_bno = da_bno;
+		da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+		if (da_bno != 0) {
+			if (verify_dir2_path(mp, da_cursor, 0)) {
+				da_brelse(bp);
+				goto error_out;
+			}
+		}
+		current_hashval = greatest_hashval;
+		ASSERT(buf_dirty == 0 || buf_dirty && !no_modify);
+		if (buf_dirty && !no_modify) {
+			*repair = 1;
+			da_bwrite(mp, bp);
+		} else
+			da_brelse(bp);
+	} while (da_bno != 0);
+	if (verify_final_dir2_path(mp, da_cursor, 0)) {
+		/*
+		 * Verify the final path up (right-hand-side) if still ok.
+		 */
+		do_warn("bad hash path in directory %llu\n", ino);
+		goto error_out;
+	}
+	/*
+	 * Redundant but just for testing.
+	 */
+	release_dir2_cursor(mp, da_cursor, 0);
+	return 0;
+
+error_out:
+	/*
+	 * Release all buffers holding interior btree blocks.
+	 */
+	err_release_dir2_cursor(mp, da_cursor, 0);
+	if (bmp)
+		free(bmp);
+	return 1;
+}
+
+/*
+ * Return 1 if the directory's leaf/node space is corrupted and
+ * needs to be rebuilt, 0 if it's ok.
+ */
+static int
+process_node_dir2(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	blkmap_t	*blkmap,
+	int		*repair)
+{
+	xfs_dablk_t		bno;
+	dir2_bt_cursor_t	da_cursor;
+
+	/*
+	 * Try again -- traverse down left-side of tree until we hit the
+	 * left-most leaf block setting up the btree cursor along the way.
+	 * Then walk the leaf blocks left-to-right, calling a parent
+	 * verification routine each time we traverse a block.
+	 */
+	bzero(&da_cursor, sizeof(da_cursor));
+	da_cursor.ino = ino;
+	da_cursor.dip = dip;
+	da_cursor.blkmap = blkmap;
+
+	/*
+	 * Now process interior node.
+	 */
+	if (traverse_int_dir2block(mp, &da_cursor, &bno) == 0)
+		return 1;
+
+	/*
+	 * Skip directories with a root marked XFS_DIR2_LEAFN_MAGIC
+	 */
+	if (bno == 0) {
+		release_dir2_cursor(mp, &da_cursor, 0);
+		return 0;
+	} else {
+		/*
+		 * Now pass cursor and bno into leaf-block processing routine.
+		 * The leaf dir level routine checks the interior paths up to 
+		 * the root including the final right-most path.
+		 */
+		return process_leaf_level_dir2(mp, &da_cursor, repair);
+	}
+}
+
+/*
+ * Process leaf and node directories.
+ * Process the data blocks then, if it's a node directory, check
+ * the consistency of those blocks.
+ */
+static int
+process_leaf_node_dir2(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	char		*dirname,	/* directory pathname */
+	xfs_ino_t	*parent,	/* out - NULLFSINO if entry not exist */
+	blkmap_t	*blkmap,
+	int		*dot,		/* out - 1 if there is a dot, else 0 */
+	int		*dotdot,	/* out - 1 if there's a dotdot, else 0 */
+	int		*repair,	/* out - 1 if something was fixed */
+	int		isnode)		/* node directory not leaf */
+{
+	bmap_ext_t		*bmp;
+	xfs_dabuf_t		*bp;
+	xfs_dir2_data_t		*data;
+	xfs_dfiloff_t		dbno;
+	int			good;
+	int			i;
+	xfs_dfiloff_t		ndbno;
+	int			nex;
+	int			t;
+
+	*repair = *dot = *dotdot = good = 0;
+	*parent = NULLFSINO;
+	ndbno = NULLDFILOFF;
+	while ((dbno = blkmap_next_off(blkmap, ndbno, &t)) < mp->m_dirleafblk) {
+		nex = blkmap_getn(blkmap, dbno, mp->m_dirblkfsbs, &bmp);
+		ndbno = dbno + mp->m_dirblkfsbs - 1;
+		if (nex == 0) {
+			do_warn("block %llu for directory inode %llu is "
+				"missing\n",
+				dbno, ino);
+			continue;
+		}
+		bp = da_read_buf(mp, nex, bmp);
+		free(bmp);
+		if (bp == NULL) {
+			do_warn("can't read block %llu for directory inode "
+				"%llu\n",
+				dbno, ino);
+			continue;
+		}
+		data = bp->data;
+		if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC)
+			do_warn("bad directory block magic # %#x in block %llu "
+				"for directory inode %llu\n",
+				INT_GET(data->hdr.magic, ARCH_CONVERT), dbno, ino);
+		i = process_dir2_data(mp, ino, dip, ino_discovery, dirname,
+			parent, bp, dot, dotdot, (xfs_dablk_t)dbno,
+			(char *)data + mp->m_dirblksize);
+		if (i == 0)
+			good++;
+		if (bp->dirty && !no_modify) {
+			*repair = 1;
+			da_bwrite(mp, bp);
+		} else
+			da_brelse(bp);
+	}
+	if (good == 0)
+		return 1;
+	if (!isnode)
+		return 0;
+	if (dir2_is_badino(ino))
+		return 0;
+
+	if (process_node_dir2(mp, ino, dip, blkmap, repair))
+		dir2_add_badlist(ino);
+	return 0;
+
+}
+
+/*
+ * Returns 1 if things are bad (directory needs to be junked)
+ * and 0 if things are ok.  If ino_discovery is 1, add unknown
+ * inodes to uncertain inode list.
+ */
+int
+process_dir2(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dino_dirty,
+	char		*dirname,
+	xfs_ino_t	*parent,
+	blkmap_t	*blkmap)
+{
+	int		dot;
+	int		dotdot;
+	xfs_dfiloff_t	last;
+	int		repair;
+	int		res;
+
+	*parent = NULLFSINO;
+	dot = dotdot = 0;
+	last = 0;
+
+	/*
+	 * branch off depending on the type of inode.  This routine
+	 * is only called ONCE so all the subordinate routines will
+	 * fix '.' and junk '..' if they're bogus.
+	 */
+	if (blkmap)
+		last = blkmap_last_off(blkmap);
+	if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT) &&
+	    dip->di_core.di_format == XFS_DINODE_FMT_LOCAL) {
+		dot = dotdot = 1;
+		res = process_sf_dir2(mp, ino, dip, ino_discovery, dino_dirty,
+			dirname, parent, &repair);
+	} else if (last == mp->m_dirblkfsbs &&
+		 (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+		  dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) {
+		res = process_block_dir2(mp, ino, dip, ino_discovery,
+			dino_dirty, dirname, parent, blkmap, &dot, &dotdot,
+			&repair);
+	} else if (last >= mp->m_dirleafblk + mp->m_dirblkfsbs &&
+		 (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+		  dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) {
+		res = process_leaf_node_dir2(mp, ino, dip, ino_discovery,
+			dirname, parent, blkmap, &dot, &dotdot, &repair,
+			last > mp->m_dirleafblk + mp->m_dirblkfsbs);
+	} else {
+		do_warn("bad size/format for directory %llu\n", ino);
+		return 1;
+	}
+	/*
+	 * bad . entries in all directories will be fixed up in phase 6
+	 */
+	if (dot == 0) {
+		do_warn("no . entry for directory %llu\n", ino);
+	}
+
+	/*
+	 * shortform dirs always have a .. entry.  .. for all longform
+	 * directories will get fixed in phase 6. .. for other shortform
+	 * dirs also get fixed there.  .. for a shortform root was
+	 * fixed in place since we know what it should be
+	 */
+	if (dotdot == 0 && ino != mp->m_sb.sb_rootino) {
+		do_warn("no .. entry for directory %llu\n", ino);
+	} else if (dotdot == 0 && ino == mp->m_sb.sb_rootino) {
+		do_warn("no .. entry for root directory %llu\n", ino);
+		need_root_dotdot = 1;
+	}
+	
+	ASSERT(ino != mp->m_sb.sb_rootino && ino != *parent ||
+		ino == mp->m_sb.sb_rootino &&
+			(ino == *parent || need_root_dotdot == 1));
+
+	return res;
+}
diff --git a/repair/dir2.h b/repair/dir2.h
new file mode 100644
index 000000000..9583447fe
--- /dev/null
+++ b/repair/dir2.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XR_DIR2_H
+#define	_XR_DIR2_H
+
+struct blkmap;
+struct bmap_ext;
+
+/*
+ * the cursor gets passed up and down the da btree processing
+ * routines.  The interior block processing routines use the
+ * cursor to determine if the pointers to and from the preceding
+ * and succeeding sibling blocks are ok and whether the values in
+ * the current block are consistent with the entries in the parent
+ * nodes.  When a block is traversed, a parent-verification routine
+ * is called to verify if the next logical entry in the next level up
+ * is consistent with the greatest hashval in the next block of the
+ * current level.  The verification routine is itself recursive and
+ * calls itself if it has to traverse an interior block to get
+ * the next logical entry.  The routine recurses upwards through
+ * the tree until it finds a block where it can simply step to
+ * the next entry.  The hashval in that entry should be equal to
+ * the hashval being passed to it (the greatest hashval in the block
+ * that the entry points to).  If that isn't true, then the tree
+ * is blown and we need to trash it, salvage and trash it, or fix it.
+ * Currently, we just trash it.
+ */
+typedef struct dir2_level_state  {
+	xfs_dabuf_t	*bp;		/* block bp */
+	xfs_dablk_t	bno;		/* file block number */
+	xfs_dahash_t	hashval;	/* last verified hashval */
+	int		index;		/* current index in block */
+	int		dirty;		/* is buffer dirty ? (1 == yes) */
+} dir2_level_state_t;
+
+typedef struct dir2_bt_cursor  {
+	int			active;	/* highest level in tree (# levels-1) */
+	int			type;	/* 0 if dir, 1 if attr */
+	xfs_ino_t		ino;
+	xfs_dablk_t		greatest_bno;
+	xfs_dinode_t		*dip;
+	dir2_level_state_t	level[XFS_DA_NODE_MAXDEPTH];
+	struct blkmap		*blkmap;
+} dir2_bt_cursor_t;
+
+
+/* ROUTINES */
+
+void
+err_release_dir2_cursor(
+	xfs_mount_t		*mp,
+	dir2_bt_cursor_t	*cursor,
+	int			prev_level);
+
+xfs_dabuf_t *
+da_read_buf(
+	xfs_mount_t	*mp,
+	int		nex,
+	struct bmap_ext	*bmp);
+
+int
+da_bwrite(
+	xfs_mount_t	*mp,
+	xfs_dabuf_t	*bp);
+
+void
+da_brelse(
+	xfs_dabuf_t	*bp);
+
+int
+process_dir2(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	*dip,
+	int		ino_discovery,
+	int		*dirty,
+	char		*dirname,
+	xfs_ino_t	*parent,
+	struct blkmap	*blkmap);
+
+void
+process_sf_dir2_fixi8(
+	xfs_dir2_sf_t		*sfp,
+	xfs_dir2_sf_entry_t	**next_sfep);
+
+void
+dir2_add_badlist(
+	xfs_ino_t	ino);
+
+int
+dir2_is_badino(
+	xfs_ino_t	ino);
+
+#endif	/* _XR_DIR2_H */
diff --git a/repair/dir_stack.c b/repair/dir_stack.c
new file mode 100644
index 000000000..1d0aae4af
--- /dev/null
+++ b/repair/dir_stack.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "dir_stack.h"
+#include "err_protos.h"
+
+/*
+ * a directory stack for holding directories while
+ * we traverse filesystem hierarchy subtrees.
+ * names are kind of misleading as this is really
+ * implemented as an inode stack.  so sue me...
+ */
+
+static dir_stack_t	dirstack_freelist;
+static int		dirstack_init = 0;
+
+void
+dir_stack_init(dir_stack_t *stack)
+{
+	stack->cnt = 0;
+	stack->head = NULL;
+
+	if (dirstack_init == 0)  {
+		dirstack_init = 1;
+		dir_stack_init(&dirstack_freelist);
+	}
+
+	stack->cnt = 0;
+	stack->head = NULL;
+
+	return;
+}
+
+static void
+dir_stack_push(dir_stack_t *stack, dir_stack_elem_t *elem)
+{
+	ASSERT(stack->cnt > 0 || stack->cnt == 0 && stack->head == NULL);
+
+	elem->next = stack->head;
+	stack->head = elem;
+	stack->cnt++;
+
+	return;
+}
+
+static dir_stack_elem_t *
+dir_stack_pop(dir_stack_t *stack)
+{
+	dir_stack_elem_t *elem;
+
+	if (stack->cnt == 0)  {
+		ASSERT(stack->head == NULL);
+		return(NULL);
+	}
+
+	elem = stack->head;
+
+	ASSERT(elem != NULL);
+
+	stack->head = elem->next;
+	elem->next = NULL;
+	stack->cnt--;
+
+	return(elem);
+}
+
+void
+push_dir(dir_stack_t *stack, xfs_ino_t ino)
+{
+	dir_stack_elem_t *elem;
+
+	if (dirstack_freelist.cnt == 0)  {
+		if ((elem = malloc(sizeof(dir_stack_elem_t))) == NULL)  {
+			do_error(
+			"couldn't malloc dir stack element, try more swap\n");
+			exit(1);
+		}
+	} else  {
+		elem = dir_stack_pop(&dirstack_freelist);
+	}
+
+	elem->ino = ino;
+
+	dir_stack_push(stack, elem);
+
+	return;
+}
+
+xfs_ino_t
+pop_dir(dir_stack_t *stack)
+{
+	dir_stack_elem_t *elem;
+	xfs_ino_t ino;
+
+	elem = dir_stack_pop(stack);
+
+	if (elem == NULL)
+		return(NULLFSINO);
+
+	ino = elem->ino;
+	elem->ino = NULLFSINO;
+
+	dir_stack_push(&dirstack_freelist, elem);
+
+	return(ino);
+}
diff --git a/repair/dir_stack.h b/repair/dir_stack.h
new file mode 100644
index 000000000..9a8305be8
--- /dev/null
+++ b/repair/dir_stack.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef struct dir_stack_elem  {
+	xfs_ino_t		ino;
+	struct dir_stack_elem	*next;
+} dir_stack_elem_t;
+
+typedef struct dir_stack  {
+	int			cnt;
+	dir_stack_elem_t	*head;
+} dir_stack_t;
+
+
+void		dir_stack_init(dir_stack_t *stack);
+
+void		push_dir(dir_stack_t *stack, xfs_ino_t ino);
+xfs_ino_t	pop_dir(dir_stack_t *stack);
diff --git a/repair/err_protos.h b/repair/err_protos.h
new file mode 100644
index 000000000..7d5aa5c52
--- /dev/null
+++ b/repair/err_protos.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+void	do_abort(char const *, ...);		/* abort, internal error */
+void	do_error(char const *, ...);		/* abort, system error */
+void	do_warn(char const *, ...);		/* issue warning */
+void	do_log(char const *, ...);		/* issue log message */
diff --git a/repair/globals.c b/repair/globals.c
new file mode 100644
index 000000000..206d08410
--- /dev/null
+++ b/repair/globals.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+
+#define EXTERN
+#include "globals.h"
+
diff --git a/repair/globals.h b/repair/globals.h
new file mode 100644
index 000000000..5c33d5f6f
--- /dev/null
+++ b/repair/globals.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XFS_REPAIR_GLOBAL_H
+#define _XFS_REPAIR_GLOBAL_H
+
+#ifndef EXTERN
+#define EXTERN extern
+#endif
+
+/* useful macros */
+
+#define rounddown(x, y) (((x)/(y))*(y))
+
+/* error flags */
+
+#define XR_OK			0	/* good */
+#define XR_BAD_MAGIC		1	/* bad magic number */
+#define XR_BAD_BLOCKSIZE	2	/* bad block size */
+#define XR_BAD_BLOCKLOG		3	/* bad sb_blocklog field */
+#define XR_BAD_VERSION		4	/* bad version number */
+#define XR_BAD_INPROGRESS	5	/* in progress set */
+#define XR_BAD_FS_SIZE_DATA	6	/* ag sizes, number, fs size mismatch */
+#define XR_BAD_INO_SIZE_DATA	7	/* bad inode size or perblock fields */
+#define XR_BAD_SECT_SIZE_DATA	8	/* bad sector size info */
+#define XR_AGF_GEO_MISMATCH	9	/* agf info conflicts with sb */
+#define XR_AGI_GEO_MISMATCH	10	/* agf info conflicts with sb */
+#define XR_SB_GEO_MISMATCH	11	/* sb geo conflicts with fs sb geo */
+#define XR_EOF			12	/* seeked beyond EOF */
+#define XR_BAD_RT_GEO_DATA	13	/* realtime geometry inconsistent */
+#define XR_BAD_INO_MAX_PCT	14	/* max % of inodes > 100% */
+#define XR_BAD_INO_ALIGN	15	/* bad inode alignment value */
+#define XR_INSUFF_SEC_SB	16	/* not enough matching secondary sbs */
+#define XR_BAD_SB_UNIT		17	/* bad stripe unit */
+#define XR_BAD_SB_WIDTH		18	/* bad stripe width */
+#define XR_BAD_SVN		19	/* bad shared version number */
+#define XR_BAD_ERR_CODE		20	/* Bad error code */
+
+/* XFS filesystem (il)legal values */
+
+#define XR_LOG2BSIZE_MIN	9	/* min/max fs blocksize (log2) */
+#define XR_LOG2BSIZE_MAX	16	/* 2^XR_* == blocksize */
+
+#define	NUM_SBS			8	/* max # of sbs to verify */
+#define NUM_AGH_SECTS		4	/* # of components in an ag header */
+
+#define	MEM_ALIGN		128	/* I/O buf alignment - a cache line */
+
+/*
+ * secondary sb mask -- if the secondary sb feature bits has a
+ * the partial sb mask bit set, then you depend on the fields
+ * in it up to and including sb_inoalignmt but the unused part of the
+ * sector may have trash in it.  If the sb has any bits set that are in
+ * the good mask, then the entire sb and sector are good (was bzero'ed
+ * by mkfs).  The third mask is for filesystems made by pre-6.5 campus
+ * alpha mkfs's.  Those are rare so we'll check for those under
+ * a special option.
+ */
+#define XR_PART_SECSB_VNMASK	0x0F80	/* >= XFS_SB_VERSION_ALIGNBIT */
+#define XR_GOOD_SECSB_VNMASK	0x0F00	/* >= XFS_SB_VERSION_DALIGNBIT */
+#define XR_ALPHA_SECSB_VNMASK	0x0180	/* DALIGN|ALIGN bits */
+
+/* global variables for xfs_repair */
+
+/* arguments and argument flag variables */
+
+EXTERN char	*fs_name;		/* name of filesystem */
+EXTERN int	verbose;		/* verbose flag, mostly for debugging */
+
+
+/* for reading stuff in manually (bypassing libsim) */
+
+EXTERN char	*iobuf;			/* large buffer */
+EXTERN int	iobuf_size;
+EXTERN char	*smallbuf;		/* small (1-4 page) buffer */
+EXTERN int	smallbuf_size;
+EXTERN char	*sb_bufs[NUM_SBS];	/* superblock buffers */
+EXTERN int	sbbuf_size;
+
+/* direct I/O info */
+
+EXTERN int	minio_align;		/* min I/O size and alignment */
+EXTERN int	mem_align;		/* memory alignment */
+EXTERN int	max_iosize;		/* max I/O size */
+
+/* file descriptors */
+
+EXTERN int	fs_fd;			/* filesystem fd */
+
+/* command-line flags */
+
+EXTERN int	verbose;
+EXTERN int	no_modify;
+EXTERN int	isa_file;
+EXTERN int	dumpcore;		/* abort, not exit on fatal errs */
+EXTERN int	delete_attr_ok;		/* can clear attrs w/o clearing files */
+EXTERN int	force_geo;		/* can set geo on low confidence info */
+EXTERN int	assume_xfs;		/* assume we have an xfs fs */
+EXTERN int	pre_65_beta;		/* fs was mkfs'ed by a version earlier * than 6.5-beta */
+EXTERN char *log_name;			/* Name of log device */
+EXTERN int log_spec;			/* Log dev specified as option */
+
+/* misc status variables */
+
+EXTERN int		primary_sb_modified;
+EXTERN int		bad_ino_btree;
+EXTERN int		clear_sunit;
+EXTERN int		fs_is_dirty;
+
+/* for hunting down the root inode */
+
+EXTERN int		need_root_inode;
+EXTERN int		need_root_dotdot;
+
+EXTERN int		need_rbmino;
+EXTERN int		need_rsumino;
+
+EXTERN int		lost_quotas;
+EXTERN int		have_uquotino;
+EXTERN int		have_pquotino;
+EXTERN int		lost_uquotino;
+EXTERN int		lost_pquotino;
+
+EXTERN xfs_agino_t	first_prealloc_ino;
+EXTERN xfs_agino_t	last_prealloc_ino;
+EXTERN xfs_agblock_t	bnobt_root;
+EXTERN xfs_agblock_t	bcntbt_root;
+EXTERN xfs_agblock_t	inobt_root;
+
+/* configuration vars -- fs geometry dependent */
+
+EXTERN int		inodes_per_block;
+EXTERN int		inodes_per_cluster;	/* inodes per inode buffer */
+EXTERN unsigned int	glob_agcount;
+EXTERN int		chunks_pblock;	/* # of 64-ino chunks per allocation */
+EXTERN int		max_symlink_blocks;
+EXTERN __int64_t	fs_max_file_offset;
+
+/* block allocation bitmaps */
+
+EXTERN __uint64_t	**ba_bmap;	/* see incore.h */
+EXTERN __uint64_t	*rt_ba_bmap;	/* see incore.h */
+
+/* realtime info */
+
+EXTERN xfs_rtword_t	*btmcompute;
+EXTERN xfs_suminfo_t	*sumcompute;
+
+/* inode tree records have full or partial backptr fields ? */
+
+EXTERN int		full_backptrs;	/*
+					 * if 1, use backptrs_t component
+					 * of ino_un union, if 0, use
+					 * parent_list_t component.  see
+					 * incore.h for more details
+					 */
+
+#define ORPHANAGE	"lost+found"
+
+/* superblock counters */
+
+EXTERN __uint64_t	sb_icount;	/* allocated (made) inodes */
+EXTERN __uint64_t	sb_ifree;	/* free inodes */
+EXTERN __uint64_t	sb_fdblocks;	/* free data blocks */
+EXTERN __uint64_t	sb_frextents;	/* free realtime extents */
+
+EXTERN xfs_ino_t	orphanage_ino;
+EXTERN xfs_ino_t	old_orphanage_ino;
+
+/* superblock geometry info */
+
+EXTERN xfs_extlen_t	sb_inoalignmt;
+EXTERN __uint32_t	sb_unit;
+EXTERN __uint32_t	sb_width;
+
+#endif /* _XFS_REPAIR_GLOBAL_H */
diff --git a/repair/incore.c b/repair/incore.c
new file mode 100644
index 000000000..499854188
--- /dev/null
+++ b/repair/incore.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <malloc.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+/*
+ * push a block allocation record onto list.  assumes list
+ * if set to NULL if empty.
+ */
+void
+record_allocation(ba_rec_t *addr, ba_rec_t *list)
+{
+	addr->next = list;
+	list = addr;
+
+	return;
+}
+
+void
+free_allocations(ba_rec_t *list)
+{
+	ba_rec_t *current = list;
+
+	while (list != NULL)  {
+		list = list->next;
+		free(current);
+		current = list;
+	}
+
+	return;
+}
+
+/* ba bmap setupstuff.  setting/getting state is in incore.h  */
+
+void
+setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks)
+{
+	int i;
+	xfs_drfsbno_t size;
+
+        ba_bmap = (__uint64_t**)malloc(agno*sizeof(__uint64_t *));
+        if (!ba_bmap)  {
+		do_error("couldn't allocate block map pointers\n");
+		return;
+	}
+	for (i = 0; i < agno; i++)  {
+                int size;
+                
+                size = roundup(numblocks * (NBBY/XR_BB),sizeof(__uint64_t));
+                
+                ba_bmap[i] = (__uint64_t*)memalign(sizeof(__uint64_t), size);
+                if (!ba_bmap[i]) {
+			do_error("couldn't allocate block map, size = %d\n",
+				numblocks);
+			return;
+		}
+		bzero(ba_bmap[i], size);
+	}
+
+	if (rtblocks == 0)  {
+		rt_ba_bmap = NULL;
+		return;
+	}
+
+	size = roundup(rtblocks * (NBBY/XR_BB), sizeof(__uint64_t));
+
+        rt_ba_bmap=(__uint64_t*)memalign(sizeof(__uint64_t), size);
+	if (!rt_ba_bmap) {
+			do_error(
+			"couldn't allocate real-time block map, size = %llu\n",
+				rtblocks);
+			return;
+	}
+
+	/*
+	 * start all real-time as free blocks
+	 */
+	set_bmap_rt(rtblocks);
+
+	return;
+}
+
+/* ARGSUSED */
+void
+teardown_rt_bmap(xfs_mount_t *mp)
+{
+	if (rt_ba_bmap != NULL)  {
+		free(rt_ba_bmap);
+		rt_ba_bmap = NULL;
+	}
+
+	return;
+}
+
+/* ARGSUSED */
+void
+teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+	ASSERT(ba_bmap[agno] != NULL);
+
+	free(ba_bmap[agno]);
+	ba_bmap[agno] = NULL;
+
+	return;
+}
+
+/* ARGSUSED */
+void
+teardown_bmap_finish(xfs_mount_t *mp)
+{
+	free(ba_bmap);
+	ba_bmap = NULL;
+
+	return;
+}
+
+void
+teardown_bmap(xfs_mount_t *mp)
+{
+	xfs_agnumber_t i;
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		teardown_ag_bmap(mp, i);
+	}
+
+	teardown_rt_bmap(mp);
+	teardown_bmap_finish(mp);
+
+	return;
+}
+
+/*
+ * block map initialization routines -- realtime, log, fs
+ */
+void
+set_bmap_rt(xfs_drtbno_t num)
+{
+	xfs_drtbno_t j;
+	xfs_drtbno_t size;
+
+	/*
+	 * for now, initialize all realtime blocks to be free
+	 * (state == XR_E_FREE)
+	 */
+	size = howmany(num * (NBBY/XR_BB), sizeof(__uint64_t));
+
+	for (j = 0; j < size; j++)
+		rt_ba_bmap[j] = 0x2222222222222222LL;
+	
+	return;
+}
+
+void
+set_bmap_log(xfs_mount_t *mp)
+{
+	xfs_dfsbno_t	logend, i;
+
+	if (mp->m_sb.sb_logstart == 0)
+		return;
+
+	logend = mp->m_sb.sb_logstart + mp->m_sb.sb_logblocks;
+
+	for (i = mp->m_sb.sb_logstart; i < logend ; i++)  {
+		set_fsbno_state(mp, i, XR_E_INUSE_FS);
+	}
+
+	return;
+}
+
+void
+set_bmap_fs(xfs_mount_t *mp)
+{
+	xfs_agnumber_t	i;
+	xfs_agblock_t	j;
+	xfs_agblock_t	end;
+
+	/*
+	 * AG header is 4 sectors
+	 */
+	end = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)
+		for (j = 0; j < end; j++)
+			set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+
+	return;
+}
+
+#if 0
+void
+set_bmap_fs_bt(xfs_mount_t *mp)
+{
+	xfs_agnumber_t	i;
+	xfs_agblock_t	j;
+	xfs_agblock_t	begin;
+	xfs_agblock_t	end;
+
+	begin = bnobt_root;
+	end = inobt_root + 1;
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		/*
+		 * account for btree roots
+		 */
+		for (j = begin; j < end; j++)
+			set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+	}
+
+	return;
+}
+#endif
+
+void
+incore_init(xfs_mount_t *mp)
+{
+	int agcount = mp->m_sb.sb_agcount;
+	extern void incore_ino_init(xfs_mount_t *);
+	extern void incore_ext_init(xfs_mount_t *);
+
+	/* init block alloc bmap */
+
+	setup_bmap(agcount, mp->m_sb.sb_agblocks, mp->m_sb.sb_rextents);
+	incore_ino_init(mp);
+	incore_ext_init(mp);
+
+	/* initialize random globals now that we know the fs geometry */
+
+	inodes_per_block = mp->m_sb.sb_inopblock;
+
+	return;
+}
+
+#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
+int
+get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+		xfs_agblock_t ag_blockno)
+{
+	__uint64_t *addr;
+
+	addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM;
+
+	return((*addr >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK);
+}
+
+void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+	xfs_agblock_t ag_blockno, int state)
+{
+	__uint64_t *addr;
+
+	addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM;
+
+	*addr = (((*addr) &
+	  (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) |
+	 (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)));
+}
+
+int
+get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno)
+{
+	return(get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno),
+			XFS_FSB_TO_AGBNO(mp, blockno)));
+}
+
+void
+set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state)
+{
+	set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno),
+		XFS_FSB_TO_AGBNO(mp, blockno), state);
+
+	return;
+}
+#endif
diff --git a/repair/incore.h b/repair/incore.h
new file mode 100644
index 000000000..22ffdea4f
--- /dev/null
+++ b/repair/incore.h
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * contains definition information.  implementation (code)
+ * is spread out in separate files.
+ */
+
+/*
+ * block allocation lists
+ */
+typedef struct ba_rec  {
+	void		*addr;
+	struct ba_rec	*next;
+} ba_rec_t;
+
+void			record_allocation(ba_rec_t *addr, ba_rec_t *list);
+void			free_allocations(ba_rec_t *list);
+
+/*
+ * block bit map defs -- track state of each filesystem block.
+ * ba_bmap is an array of bitstrings declared in the globals.h file.
+ * the bitstrings are broken up into 64-bit chunks.  one bitstring per AG.
+ */
+#define BA_BMAP_SIZE(x)		(howmany(x, 4))
+
+void			set_bmap_rt(xfs_drfsbno_t numblocks);
+void			set_bmap_log(xfs_mount_t *mp);
+void			set_bmap_fs(xfs_mount_t *mp);
+void			teardown_bmap(xfs_mount_t *mp);
+
+void			teardown_rt_bmap(xfs_mount_t *mp);
+void			teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno);
+void			teardown_bmap_finish(xfs_mount_t *mp);
+
+/* blocks are numbered from zero */
+
+/* block records fit into __uint64_t's units */
+
+#define XR_BB_UNIT	64			/* number of bits/unit */
+#define XR_BB		4			/* bits per block record */
+#define XR_BB_NUM	(XR_BB_UNIT/XR_BB)	/* number of records per unit */
+#define XR_BB_MASK	0xF			/* block record mask */
+
+/*
+ * bitstring ops -- set/get block states, either in filesystem
+ * bno's or in agbno's.  turns out that fsbno addressing is
+ * more convenient when dealing with bmap extracted addresses
+ * and agbno addressing is more convenient when dealing with
+ * meta-data extracted addresses.  So the fsbno versions use
+ * mtype (which can be one of the block map types above) to
+ * set the correct block map while the agbno versions assume
+ * you want to use the regular block map.
+ */
+
+#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
+/*
+ * implemented as functions for debugging purposes
+ */
+int get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+	xfs_agblock_t ag_blockno);
+void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+	xfs_agblock_t ag_blockno, int state);
+
+int get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno);
+void set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state);
+#else
+/*
+ * implemented as macros for performance purposes
+ */
+
+#define get_agbno_state(mp, agno, ag_blockno) \
+			((int) (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) \
+				 >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) \
+				& XR_BB_MASK)
+#define set_agbno_state(mp, agno, ag_blockno, state) \
+	*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) = \
+		((*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) & \
+	  (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) | \
+	 (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)))
+
+#define get_fsbno_state(mp, blockno) \
+		get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
+				XFS_FSB_TO_AGBNO(mp, (blockno)))
+#define set_fsbno_state(mp, blockno, state) \
+		set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
+			XFS_FSB_TO_AGBNO(mp, (blockno)), (state))
+
+
+#define get_agbno_rec(mp, agno, ag_blockno) \
+			(*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM))
+#endif /* XR_BMAP_TRACE */
+
+/*
+ * these work in real-time extents (e.g. fsbno == rt extent number)
+ */
+#define get_rtbno_state(mp, fsbno) \
+			((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) >> \
+			(((fsbno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK)
+#define set_rtbno_state(mp, fsbno, state) \
+	*(rt_ba_bmap + (fsbno)/XR_BB_NUM) = \
+	 ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) & \
+	  (~((__uint64_t) XR_BB_MASK << (((fsbno)%XR_BB_NUM)*XR_BB)))) | \
+	 (((__uint64_t) (state)) << (((fsbno)%XR_BB_NUM)*XR_BB)))
+
+
+/*
+ * extent tree definitions
+ * right now, there are 3 trees per AG, a bno tree, a bcnt tree
+ * and a tree for dup extents.  If the code is modified in the
+ * future to use an extent tree instead of a bitmask for tracking
+ * fs blocks, then we could lose the dup extent tree if we labelled
+ * each extent with the inode that owned it.
+ */
+
+typedef unsigned char extent_state_t;
+
+typedef struct extent_tree_node  {
+	avlnode_t		avl_node;
+	xfs_agblock_t		ex_startblock;	/* starting block (agbno) */
+	xfs_extlen_t		ex_blockcount;	/* number of blocks in extent */
+	extent_state_t		ex_state;	/* see state flags below */
+
+	struct extent_tree_node		*next;	/* for bcnt extent lists */
+#if 0
+	xfs_ino_t		ex_inode;	/* owner, NULL if free or  */
+						/*	multiply allocated */
+#endif
+} extent_tree_node_t;
+
+typedef struct rt_extent_tree_node  {
+	avlnode_t		avl_node;
+	xfs_drtbno_t		rt_startblock;	/* starting realtime block */
+	xfs_extlen_t		rt_blockcount;	/* number of blocks in extent */
+	extent_state_t		rt_state;	/* see state flags below */
+
+#if 0
+	xfs_ino_t		ex_inode;	/* owner, NULL if free or  */
+						/*	multiply allocated */
+#endif
+} rt_extent_tree_node_t;
+
+/* extent states, prefix with XR_ to avoid conflict with buffer cache defines */
+
+#define XR_E_UNKNOWN	0	/* unknown state */
+#define XR_E_FREE1	1	/* free block (marked by one fs space tree) */
+#define XR_E_FREE	2	/* free block (marked by both fs space trees) */
+#define XR_E_INUSE	3	/* extent used by file/dir data or metadata */
+#define XR_E_INUSE_FS	4	/* extent used by fs ag header or log */
+#define XR_E_MULT	5	/* extent is multiply referenced */
+#define XR_E_INO	6	/* extent used by inodes (inode blocks) */
+#define XR_E_FS_MAP	7	/* extent used by fs space/inode maps */
+#define XR_E_BAD_STATE	8
+
+/* separate state bit, OR'ed into high (4th) bit of ex_state field */
+
+#define XR_E_WRITTEN	0x8	/* extent has been written out, can't reclaim */
+#define good_state(state)	(((state) & (~XR_E_WRITTEN)) >= XR_E_UNKNOWN && \
+				((state) & (~XR_E_WRITTEN) < XF_E_BAD_STATE))
+#define written(state)		((state) & XR_E_WRITTEN)
+#define set_written(state)	(state) &= XR_E_WRITTEN
+
+/*
+ * bno extent tree functions
+ */
+void
+add_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+		xfs_extlen_t blockcount);
+
+extent_tree_node_t *
+findfirst_bno_extent(xfs_agnumber_t agno);
+
+extent_tree_node_t *
+find_bno_extent(xfs_agnumber_t agno, xfs_agblock_t agbno);
+
+extent_tree_node_t *
+findfirst_bno_extent(xfs_agnumber_t agno);
+
+#define findnext_bno_extent(exent_ptr)	\
+		((extent_tree_node_t *) ((exent_ptr)->avl_node.avl_nextino))
+
+void
+get_bno_extent(xfs_agnumber_t agno, extent_tree_node_t *ext);
+
+/*
+ * bcnt tree functions
+ */
+void
+add_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+		xfs_extlen_t blockcount);
+
+extent_tree_node_t *
+findfirst_bcnt_extent(xfs_agnumber_t agno);
+
+extent_tree_node_t *
+find_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t agbno);
+
+extent_tree_node_t *
+findbiggest_bcnt_extent(xfs_agnumber_t agno);
+
+extent_tree_node_t *
+findnext_bcnt_extent(xfs_agnumber_t agno, extent_tree_node_t *ext);
+
+extent_tree_node_t *
+get_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+		xfs_extlen_t blockcount);
+
+/*
+ * duplicate extent tree functions
+ */
+void		add_dup_extent(xfs_agnumber_t agno,
+				xfs_agblock_t startblock,
+				xfs_extlen_t blockcount);
+
+int		search_dup_extent(xfs_mount_t *mp,
+				xfs_agnumber_t agno,
+				xfs_agblock_t agbno);
+
+void		add_rt_dup_extent(xfs_drtbno_t	startblock,
+				xfs_extlen_t	blockcount);
+
+int		search_rt_dup_extent(xfs_mount_t	*mp,
+					xfs_drtbno_t	bno);
+
+/*
+ * extent/tree recyling and deletion routines
+ */
+
+/*
+ * return an extent node to the extent node free list
+ */
+void		release_extent_tree_node(extent_tree_node_t *node);
+
+/*
+ * recycle all the nodes in the per-AG tree
+ */
+void		release_dup_extent_tree(xfs_agnumber_t agno);
+void		release_agbno_extent_tree(xfs_agnumber_t agno);
+void		release_agbcnt_extent_tree(xfs_agnumber_t agno);
+
+/*
+ * realtime duplicate extent tree - this one actually frees the memory
+ */
+void		free_rt_dup_extent_tree(xfs_mount_t *mp);
+
+/*
+ * per-AG extent trees shutdown routine -- all (bno, bcnt and dup)
+ * at once.  this one actually frees the memory instead of just recyling
+ * the nodes.
+ */
+void		incore_ext_teardown(xfs_mount_t *mp);
+
+/*
+ * inode definitions
+ */
+
+/* inode types */
+
+#define XR_INO_UNKNOWN	0		/* unknown */
+#define XR_INO_DIR	1		/* directory */
+#define XR_INO_RTDATA	2		/* realtime file */
+#define XR_INO_RTBITMAP	3		/* realtime bitmap inode */
+#define XR_INO_RTSUM	4		/* realtime summary inode */
+#define XR_INO_DATA	5		/* regular file */
+#define XR_INO_SYMLINK	6		/* symlink */
+#define XR_INO_CHRDEV	7		/* character device */
+#define XR_INO_BLKDEV	8		/* block device */
+#define XR_INO_SOCK	9		/* socket */
+#define XR_INO_FIFO	10		/* fifo */
+#define XR_INO_MOUNTPOINT 11		/* mountpoint */
+
+/* inode allocation tree */
+
+/*
+ * Inodes in the inode allocation trees are allocated in chunks.
+ * Those groups can be easily duplicated in our trees.
+ * Disconnected inodes are harder.  We can do one of two
+ * things in that case:  if we know the inode allocation btrees
+ * are good, then we can disallow directory references to unknown
+ * inode chunks.  If the inode allocation trees have been trashed or
+ * we feel like being aggressive, then as we hit unknown inodes,
+ * we can search on the disk for all contiguous inodes and see if
+ * they fit into chunks.  Before putting them into the inode tree,
+ * we can scan each inode starting at the earliest inode to see which
+ * ones are good.  This protects us from the pathalogical case of
+ * inodes appearing in user-data.  We still may have to mark the
+ * inodes as "possibly fake" so that if a file claims the blocks,
+ * we decide to believe the inodes, especially if they're not
+ * connected.
+ */
+
+#define PLIST_CHUNK_SIZE	4
+
+typedef xfs_ino_t parent_entry_t;
+
+typedef struct parent_list  {
+	__uint64_t		pmask;
+	parent_entry_t		*pentries;
+#ifdef DEBUG
+	short			cnt;
+#endif
+} parent_list_t;
+
+typedef struct backptrs  {
+	__uint64_t		ino_reached;	/* bit == 1 if reached */
+	__uint64_t		ino_processed;	/* reference checked bit mask */
+	__uint32_t		nlinks[XFS_INODES_PER_CHUNK];
+	parent_list_t		*parents;
+} backptrs_t;
+
+typedef struct ino_tree_node  {
+	avlnode_t		avl_node;
+	xfs_agino_t		ino_startnum;	/* starting inode # */
+	xfs_inofree_t		ir_free;	/* inode free bit mask */
+	__uint64_t		ino_confirmed;	/* confirmed bitmask */
+	__uint64_t		ino_isa_dir;	/* bit == 1 if a directory */
+	union  {
+		backptrs_t	*backptrs;
+		parent_list_t	*plist;
+	} ino_un;
+} ino_tree_node_t;
+
+#define INOS_PER_IREC		(sizeof(__uint64_t) * NBBY)
+void				add_ino_backptrs(xfs_mount_t *mp);
+
+/*
+ * return an inode record to the free inode record pool
+ */
+void		free_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec);
+
+/*
+ * get pulls the inode record from the good inode tree
+ */
+void		get_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec);
+
+ino_tree_node_t *findfirst_inode_rec(xfs_agnumber_t agno);
+ino_tree_node_t *find_inode_rec(xfs_agnumber_t agno, xfs_agino_t ino);
+void		find_inode_rec_range(xfs_agnumber_t agno,
+			xfs_agino_t start_ino, xfs_agino_t end_ino,
+			ino_tree_node_t **first, ino_tree_node_t **last);
+
+/*
+ * set inode states -- setting an inode to used or free also
+ * automatically marks it as "existing".  Note -- all the inode
+ * add/set/get routines assume a valid inode number.
+ */
+ino_tree_node_t	*set_inode_used_alloc(xfs_agnumber_t agno, xfs_agino_t ino);
+ino_tree_node_t	*set_inode_free_alloc(xfs_agnumber_t agno, xfs_agino_t ino);
+
+void		print_inode_list(xfs_agnumber_t agno);
+void		print_uncertain_inode_list(xfs_agnumber_t agno);
+
+/*
+ * separate trees for uncertain inodes (they may not exist).
+ */
+ino_tree_node_t		*findfirst_uncertain_inode_rec(xfs_agnumber_t agno);
+void			add_inode_uncertain(xfs_mount_t *mp,
+						xfs_ino_t ino, int free);
+void			add_aginode_uncertain(xfs_agnumber_t agno,
+						xfs_agino_t agino, int free);
+void			get_uncertain_inode_rec(xfs_agnumber_t agno,
+						ino_tree_node_t *ino_rec);
+void			clear_uncertain_ino_cache(xfs_agnumber_t agno);
+
+/*
+ * return next in-order inode tree node.  takes an "ino_tree_node_t *"
+ */
+#define next_ino_rec(ino_node_ptr)	\
+		((ino_tree_node_t *) ((ino_node_ptr)->avl_node.avl_nextino))
+/*
+ * return the next linked inode (forward avl tree link)-- meant to be used
+ * by linked list routines (uncertain inode routines/records)
+ */
+#define next_link_rec(ino_node_ptr)	\
+		((ino_tree_node_t *) ((ino_node_ptr)->avl_node.avl_forw))
+
+/*
+ * Bit manipulations for processed field
+ */
+#define	XFS_INOPROC_MASK(i)	((__uint64_t)1 << (i))
+#define	XFS_INOPROC_MASKN(i,n)	((__uint64_t)((1 << (n)) - 1) << (i))
+
+#define	XFS_INOPROC_IS_PROC(rp, i) \
+	(((rp)->ino_un.backptrs->ino_processed & XFS_INOPROC_MASK((i))) == 0LL \
+		? 0 : 1)
+#define	XFS_INOPROC_SET_PROC(rp, i) \
+	((rp)->ino_un.backptrs->ino_processed |= XFS_INOPROC_MASK((i)))
+/*
+#define	XFS_INOPROC_CLR_PROC(rp, i) \
+	((rp)->ino_un.backptrs->ino_processed &= ~XFS_INOPROC_MASK((i)))
+*/
+
+/*
+ * same for ir_confirmed.
+ */
+#define	XFS_INOCF_MASK(i)	((__uint64_t)1 << (i))
+#define	XFS_INOCF_MASKN(i,n)	((__uint64_t)((1 << (n)) - 1) << (i))
+
+#define	XFS_INOCF_IS_CF(rp, i) \
+		(((rp)->ino_confirmed & XFS_INOCF_MASK((i))) == 0LL \
+			? 0 : 1)
+#define	XFS_INOCF_SET_CF(rp, i) \
+			((rp)->ino_confirmed |= XFS_INOCF_MASK((i)))
+#define	XFS_INOCF_CLR_CF(rp, i) \
+			((rp)->ino_confirmed &= ~XFS_INOCF_MASK((i)))
+
+/*
+ * same for backptr->ino_reached
+ */
+#define	XFS_INO_RCHD_MASK(i)	((__uint64_t)1 << (i))
+
+#define	XFS_INO_RCHD_IS_RCHD(rp, i) \
+	(((rp)->ino_un.backptrs->ino_reached & XFS_INO_RCHD_MASK((i))) == 0LL \
+		? 0 : 1)
+#define	XFS_INO_RCHD_SET_RCHD(rp, i) \
+		((rp)->ino_un.backptrs->ino_reached |= XFS_INO_RCHD_MASK((i)))
+#define	XFS_INO_RCHD_CLR_RCHD(rp, i) \
+		((rp)->ino_un.backptrs->ino_reached &= ~XFS_INO_RCHD_MASK((i)))
+/*
+ * set/clear/test is inode a directory inode
+ */
+#define	XFS_INO_ISADIR_MASK(i)	((__uint64_t)1 << (i))
+
+#define inode_isadir(ino_rec, ino_offset) \
+	(((ino_rec)->ino_isa_dir & XFS_INO_ISADIR_MASK((ino_offset))) == 0LL \
+		? 0 : 1)
+#define set_inode_isadir(ino_rec, ino_offset) \
+		((ino_rec)->ino_isa_dir |= XFS_INO_ISADIR_MASK((ino_offset)))
+#define clear_inode_isadir(ino_rec, ino_offset) \
+		((ino_rec)->ino_isa_dir &= ~XFS_INO_ISADIR_MASK((ino_offset)))
+
+
+/*
+ * set/clear/test is inode known to be valid (although perhaps corrupt)
+ */
+#define clear_inode_confirmed(ino_rec, ino_offset) \
+			XFS_INOCF_CLR_CF((ino_rec), (ino_offset))
+
+#define set_inode_confirmed(ino_rec, ino_offset) \
+			XFS_INOCF_SET_CF((ino_rec), (ino_offset))
+
+#define is_inode_confirmed(ino_rec, ino_offset) \
+			XFS_INOCF_IS_CF(ino_rec, ino_offset)
+
+/*
+ * set/clear/test is inode free or used
+ */
+#define set_inode_free(ino_rec, ino_offset) \
+	XFS_INOCF_SET_CF((ino_rec), (ino_offset)), \
+	XFS_INOBT_SET_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
+
+#define set_inode_used(ino_rec, ino_offset) \
+	XFS_INOCF_SET_CF((ino_rec), (ino_offset)), \
+	XFS_INOBT_CLR_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
+
+#define is_inode_used(ino_rec, ino_offset)	\
+	!XFS_INOBT_IS_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
+
+#define is_inode_free(ino_rec, ino_offset)	\
+	XFS_INOBT_IS_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
+
+/*
+ * add_inode_reached() is set on inode I only if I has been reached
+ * by an inode P claiming to be the parent and if I is a directory,
+ * the .. link in the I says that P is I's parent.
+ *
+ * add_inode_ref() is called every time a link to an inode is
+ * detected and drop_inode_ref() is called every time a link to
+ * an inode that we've counted is removed.
+ */
+
+void		add_inode_reached(ino_tree_node_t *ino_rec, int ino_offset);
+void		add_inode_ref(ino_tree_node_t *ino_rec, int ino_offset);
+void		drop_inode_ref(ino_tree_node_t *ino_rec, int ino_offset);
+int		is_inode_reached(ino_tree_node_t *ino_rec, int ino_offset);
+int		is_inode_referenced(ino_tree_node_t *ino_rec, int ino_offset);
+__uint32_t	num_inode_references(ino_tree_node_t *ino_rec, int ino_offset);
+
+/*
+ * has an inode been processed for phase 6 (reference count checking)?
+ * add_inode_refchecked() is set on an inode when it gets traversed
+ * during the reference count phase (6).  It's set so that if the inode
+ * is a directory, it's traversed (and it's links counted) only once.
+ */
+#ifndef XR_INO_REF_DEBUG
+#define add_inode_refchecked(ino, ino_rec, ino_offset) \
+		XFS_INOPROC_SET_PROC((ino_rec), (ino_offset))
+#define is_inode_refchecked(ino, ino_rec, ino_offset) \
+		(XFS_INOPROC_IS_PROC(ino_rec, ino_offset) == 0LL ? 0 : 1)
+#else
+void add_inode_refchecked(xfs_ino_t ino,
+			ino_tree_node_t *ino_rec, int ino_offset);
+int is_inode_refchecked(xfs_ino_t ino,
+			ino_tree_node_t *ino_rec, int ino_offset);
+#endif /* XR_INO_REF_DEBUG */
+
+/*
+ * set/get inode number of parent -- works for directory inodes only
+ */
+void		set_inode_parent(ino_tree_node_t *irec, int ino_offset,
+					xfs_ino_t ino);
+#if 0
+void		clear_inode_parent(ino_tree_node_t *irec, int offset);
+#endif
+xfs_ino_t	get_inode_parent(ino_tree_node_t *irec, int ino_offset);
+
+/*
+ * bmap cursor for tracking and fixing bmap btrees.  All xfs btrees number
+ * the levels with 0 being the leaf and every level up being 1 greater.
+ */
+
+#define XR_MAX_BMLEVELS		10	/* XXX - rcc need to verify number */
+
+typedef struct bm_level_state  {
+	xfs_dfsbno_t		fsbno;
+	xfs_dfsbno_t		left_fsbno;
+	xfs_dfsbno_t		right_fsbno;
+	__uint64_t		first_key;
+	__uint64_t		last_key;
+/*
+	int			level;
+	__uint64_t		prev_last_key;
+	xfs_buf_t		*bp;
+	xfs_bmbt_block_t	*block;
+*/
+} bm_level_state_t;
+
+typedef struct bm_cursor  {
+	int			num_levels;
+	xfs_ino_t		ino;
+	xfs_dinode_t		*dip;
+	bm_level_state_t	level[XR_MAX_BMLEVELS];
+} bmap_cursor_t;
+
+void init_bm_cursor(bmap_cursor_t *cursor, int num_level);
diff --git a/repair/incore_bmc.c b/repair/incore_bmc.c
new file mode 100644
index 000000000..89111feb5
--- /dev/null
+++ b/repair/incore_bmc.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+void
+init_bm_cursor(bmap_cursor_t *cursor, int num_levels)
+{
+	int i;
+
+	bzero(cursor, sizeof(bmap_cursor_t));
+	cursor->ino = NULLFSINO;
+	cursor->num_levels = num_levels;
+
+	for (i = 0; i < XR_MAX_BMLEVELS; i++)  {
+		cursor->level[i].fsbno = NULLDFSBNO;
+		cursor->level[i].right_fsbno = NULLDFSBNO;
+		cursor->level[i].left_fsbno = NULLDFSBNO;
+		cursor->level[i].first_key = NULLDFILOFF;
+		cursor->level[i].last_key = NULLDFILOFF;
+	}
+}
diff --git a/repair/incore_ext.c b/repair/incore_ext.c
new file mode 100644
index 000000000..5c3708b66
--- /dev/null
+++ b/repair/incore_ext.c
@@ -0,0 +1,1000 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "avl64.h"
+#define ALLOC_NUM_EXTS		100
+
+/*
+ * paranoia -- account for any weird padding, 64/32-bit alignment, etc.
+ */
+typedef struct extent_alloc_rec  {
+	ba_rec_t		alloc_rec;
+	extent_tree_node_t	extents[ALLOC_NUM_EXTS];
+} extent_alloc_rec_t;
+
+typedef struct rt_extent_alloc_rec  {
+	ba_rec_t		alloc_rec;
+	rt_extent_tree_node_t	extents[ALLOC_NUM_EXTS];
+} rt_extent_alloc_rec_t;
+
+/*
+ * note:  there are 4 sets of incore things handled here:
+ * block bitmaps, extent trees, uncertain inode list,
+ * and inode tree.  The tree-based code uses the AVL
+ * tree package used by the IRIX kernel VM code
+ * (sys/avl.h).  The inode list code uses the same records
+ * as the inode tree code for convenience.  The bitmaps
+ * and bitmap operators are mostly macros defined in incore.h.
+ * There are one of everything per AG except for extent
+ * trees.  There's one duplicate extent tree, one bno and
+ * one bcnt extent tree per AG.  Not all of the above exist
+ * through all phases.  The duplicate extent tree gets trashed
+ * at the end of phase 4.  The bno/bcnt trees don't appear until
+ * phase 5.  The uncertain inode list goes away at the end of
+ * phase 3.  The inode tree and bno/bnct trees go away after phase 5.
+ */
+typedef struct ext_flist_s  {
+	extent_tree_node_t	*list;
+	int			cnt;
+} ext_flist_t;
+
+static ext_flist_t ext_flist;
+
+typedef struct rt_ext_flist_s  {
+	rt_extent_tree_node_t	*list;
+	int			cnt;
+} rt_ext_flist_t;
+
+static rt_ext_flist_t rt_ext_flist;
+
+static avl64tree_desc_t	*rt_ext_tree_ptr;	/* dup extent tree for rt */
+
+static avltree_desc_t	**extent_tree_ptrs;	/* array of extent tree ptrs */
+						/* one per ag for dups */
+static avltree_desc_t	**extent_bno_ptrs;	/*
+						 * array of extent tree ptrs
+						 * one per ag for free extents
+						 * sorted by starting block
+						 * number
+						 */
+static avltree_desc_t	**extent_bcnt_ptrs;	/*
+						 * array of extent tree ptrs
+						 * one per ag for free extents
+						 * sorted by size
+						 */
+
+/*
+ * list of allocated "blocks" for easy freeing later
+ */
+static ba_rec_t		*ba_list;
+static ba_rec_t		*rt_ba_list;
+
+/*
+ * extent tree stuff is avl trees of duplicate extents,
+ * sorted in order by block number.  there is one tree per ag.
+ */
+
+static extent_tree_node_t *
+mk_extent_tree_nodes(xfs_agblock_t new_startblock,
+	xfs_extlen_t new_blockcount, extent_state_t new_state)
+{
+	int i;
+	extent_tree_node_t *new;
+	extent_alloc_rec_t *rec;
+
+	if (ext_flist.cnt == 0)  {
+		ASSERT(ext_flist.list == NULL);
+
+		if ((rec = malloc(sizeof(extent_alloc_rec_t))) == NULL)
+			do_error("couldn't allocate new extent descriptors.\n");
+
+		record_allocation(&rec->alloc_rec, ba_list);
+
+		new = &rec->extents[0];
+
+		for (i = 0; i < ALLOC_NUM_EXTS; i++)  {
+			new->avl_node.avl_nextino = (avlnode_t *)
+							ext_flist.list;
+			ext_flist.list = new;
+			ext_flist.cnt++;
+			new++;
+		}
+	}
+
+	ASSERT(ext_flist.list != NULL);
+
+	new = ext_flist.list;
+	ext_flist.list = (extent_tree_node_t *) new->avl_node.avl_nextino;
+	ext_flist.cnt--;
+	new->avl_node.avl_nextino = NULL;
+
+	/* initialize node */
+
+	new->ex_startblock = new_startblock;
+	new->ex_blockcount = new_blockcount;
+	new->ex_state = new_state;
+	new->next = NULL;
+
+	return(new);
+}
+
+void
+release_extent_tree_node(extent_tree_node_t *node)
+{
+	node->avl_node.avl_nextino = (avlnode_t *) ext_flist.list;
+	ext_flist.list = node;
+	ext_flist.cnt++;
+
+	return;
+}
+
+/*
+ * routines to recycle all nodes in a tree.  it walks the tree
+ * and puts all nodes back on the free list so the nodes can be
+ * reused.  the duplicate and bno/bcnt extent trees for each AG
+ * are recycled after they're no longer needed to save memory
+ */
+void
+release_extent_tree(avltree_desc_t *tree)
+{
+	extent_tree_node_t	*ext;
+	extent_tree_node_t	*tmp;
+	extent_tree_node_t	*lext;
+	extent_tree_node_t	*ltmp;
+
+	if (tree->avl_firstino == NULL)
+		return;
+
+	ext = (extent_tree_node_t *) tree->avl_firstino;
+
+	while (ext != NULL)  {
+		tmp = (extent_tree_node_t *) ext->avl_node.avl_nextino;
+
+		/*
+		 * ext->next is guaranteed to be set only in bcnt trees
+		 */
+		if (ext->next != NULL)  {
+			lext = ext->next;
+			while (lext != NULL)  {
+				ltmp = lext->next;
+				release_extent_tree_node(lext);
+				lext = ltmp;
+			}
+		}
+
+		release_extent_tree_node(ext);
+		ext = tmp;
+	}
+
+	tree->avl_root = tree->avl_firstino = NULL;
+
+	return;
+}
+
+/*
+ * top-level (visible) routines
+ */
+void
+release_dup_extent_tree(xfs_agnumber_t agno)
+{
+	release_extent_tree(extent_tree_ptrs[agno]);
+
+	return;
+}
+
+void
+release_agbno_extent_tree(xfs_agnumber_t agno)
+{
+	release_extent_tree(extent_bno_ptrs[agno]);
+
+	return;
+}
+
+void
+release_agbcnt_extent_tree(xfs_agnumber_t agno)
+{
+	release_extent_tree(extent_bcnt_ptrs[agno]);
+
+	return;
+}
+
+/*
+ * the next 4 routines manage the trees of free extents -- 2 trees
+ * per AG.  The first tree is sorted by block number.  The second
+ * tree is sorted by extent size.  This is the bno tree.
+ */
+void
+add_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+		xfs_extlen_t blockcount)
+{
+	extent_tree_node_t *ext;
+
+	ASSERT(extent_bno_ptrs != NULL);
+	ASSERT(extent_bno_ptrs[agno] != NULL);
+
+	ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_FREE);
+
+	if (avl_insert(extent_bno_ptrs[agno], (avlnode_t *) ext) == NULL)  {
+		do_error("xfs_repair:  duplicate bno extent range\n");
+	}
+}
+
+extent_tree_node_t *
+findfirst_bno_extent(xfs_agnumber_t agno)
+{
+	ASSERT(extent_bno_ptrs != NULL);
+	ASSERT(extent_bno_ptrs[agno] != NULL);
+
+	return((extent_tree_node_t *) extent_bno_ptrs[agno]->avl_firstino);
+}
+
+extent_tree_node_t *
+find_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock)
+{
+	ASSERT(extent_bno_ptrs != NULL);
+	ASSERT(extent_bno_ptrs[agno] != NULL);
+
+	return((extent_tree_node_t *) avl_find(extent_bno_ptrs[agno],
+						startblock));
+}
+
+/*
+ * delete a node that's in the tree (pointer obtained by a find routine)
+ */
+void
+get_bno_extent(xfs_agnumber_t agno, extent_tree_node_t *ext)
+{
+	ASSERT(extent_bno_ptrs != NULL);
+	ASSERT(extent_bno_ptrs[agno] != NULL);
+
+	avl_delete(extent_bno_ptrs[agno], &ext->avl_node);
+
+	return;
+}
+
+/*
+ * normalizing constant for bcnt size -> address conversion (see avl ops)
+ * used by the AVL tree code to convert sizes and must be used when
+ * doing an AVL search in the tree (e.g. avl_findrange(s))
+ */
+#define MAXBCNT		0xFFFFFFFF
+#define BCNT_ADDR(cnt)	((unsigned int) MAXBCNT - (cnt))
+
+/*
+ * the next 4 routines manage the trees of free extents -- 2 trees
+ * per AG.  The first tree is sorted by block number.  The second
+ * tree is sorted by extent size.  This is the bcnt tree.
+ */
+void
+add_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+		xfs_extlen_t blockcount)
+{
+	extent_tree_node_t *ext, *prev, *current, *top;
+	xfs_agblock_t		tmp_startblock;
+	xfs_extlen_t		tmp_blockcount;
+	extent_state_t		tmp_state;
+
+	ASSERT(extent_bcnt_ptrs != NULL);
+	ASSERT(extent_bcnt_ptrs[agno] != NULL);
+
+	ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_FREE);
+
+	ASSERT(ext->next == NULL);
+
+#ifdef XR_BCNT_TRACE
+	fprintf(stderr, "adding bcnt: agno = %d, start = %u, count = %u\n",
+			agno, startblock, blockcount);
+#endif
+	if ((current = (extent_tree_node_t *) avl_find(extent_bcnt_ptrs[agno],
+							blockcount)) != NULL)  {
+		/*
+		 * avl tree code doesn't handle dups so insert
+		 * onto linked list in increasing startblock order
+		 */
+		top = prev = current;
+		while (current != NULL &&
+				startblock > current->ex_startblock)  {
+			prev = current;
+			current = current->next;
+		}
+
+		if (top == current)  {
+			ASSERT(top == prev);
+			/*
+			 * swap the values of to-be-inserted element
+			 * and the values of the head of the list.
+			 * then insert as the 2nd element on the list.
+			 *
+			 * see the comment in get_bcnt_extent()
+			 * as to why we have to do this.
+			 */
+			tmp_startblock = top->ex_startblock;
+			tmp_blockcount = top->ex_blockcount;
+			tmp_state = top->ex_state;
+
+			top->ex_startblock = ext->ex_startblock;
+			top->ex_blockcount = ext->ex_blockcount;
+			top->ex_state = ext->ex_state;
+
+			ext->ex_startblock = tmp_startblock;
+			ext->ex_blockcount = tmp_blockcount;
+			ext->ex_state = tmp_state;
+
+			current = top->next;
+			prev = top;
+		}
+
+		prev->next = ext;
+		ext->next = current;
+
+		return;
+	}
+
+	if (avl_insert(extent_bcnt_ptrs[agno], (avlnode_t *) ext) == NULL)  {
+		do_error("xfs_repair:  duplicate bno extent range\n");
+	}
+
+	return;
+}
+
+extent_tree_node_t *
+findfirst_bcnt_extent(xfs_agnumber_t agno)
+{
+	ASSERT(extent_bcnt_ptrs != NULL);
+	ASSERT(extent_bcnt_ptrs[agno] != NULL);
+
+	return((extent_tree_node_t *) extent_bcnt_ptrs[agno]->avl_firstino);
+}
+
+extent_tree_node_t *
+findbiggest_bcnt_extent(xfs_agnumber_t agno)
+{
+	extern avlnode_t *avl_lastino(avlnode_t *root);
+
+	ASSERT(extent_bcnt_ptrs != NULL);
+	ASSERT(extent_bcnt_ptrs[agno] != NULL);
+
+	return((extent_tree_node_t *) avl_lastino(extent_bcnt_ptrs[agno]->avl_root));
+}
+
+extent_tree_node_t *
+findnext_bcnt_extent(xfs_agnumber_t agno, extent_tree_node_t *ext)
+{
+	avlnode_t *nextino;
+
+	if (ext->next != NULL)  {
+		ASSERT(ext->ex_blockcount == ext->next->ex_blockcount);
+		ASSERT(ext->ex_startblock < ext->next->ex_startblock);
+		return(ext->next);
+	} else  {
+		/*
+		 * have to look at the top of the list to get the
+		 * correct avl_nextino pointer since that pointer
+		 * is maintained and altered by the AVL code.
+		 */
+		nextino = avl_find(extent_bcnt_ptrs[agno], ext->ex_blockcount);
+		ASSERT(nextino != NULL);
+		if (nextino->avl_nextino != NULL)  {
+			ASSERT(ext->ex_blockcount < ((extent_tree_node_t *)
+					nextino->avl_nextino)->ex_blockcount);
+		}
+		return((extent_tree_node_t *) nextino->avl_nextino);
+	}
+}
+
+/*
+ * this is meant to be called after you walk the bno tree to
+ * determine exactly which extent you want (so you'll know the
+ * desired value for startblock when you call this routine).
+ */
+extent_tree_node_t *
+get_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+		xfs_extlen_t blockcount)
+{
+	extent_tree_node_t	*ext, *prev, *top;
+	xfs_agblock_t		tmp_startblock;
+	xfs_extlen_t		tmp_blockcount;
+	extent_state_t		tmp_state;
+
+	prev = NULL;
+	ASSERT(extent_bcnt_ptrs != NULL);
+	ASSERT(extent_bcnt_ptrs[agno] != NULL);
+
+	if ((ext = (extent_tree_node_t *) avl_find(extent_bcnt_ptrs[agno],
+							blockcount)) == NULL)
+		return(NULL);
+	
+	top = ext;
+
+	if (ext->next != NULL)  {
+		/*
+		 * pull it off the list
+		 */
+		while (ext != NULL && startblock != ext->ex_startblock)  {
+			prev = ext;
+			ext = ext->next;
+		}
+		ASSERT(ext != NULL);
+		if (ext == top)  {
+			/*
+			 * this node is linked into the tree so we
+			 * swap the core values so we can delete
+			 * the next item on the list instead of
+			 * the head of the list.  This is because
+			 * the rest of the tree undoubtedly has
+			 * pointers to the piece of memory that
+			 * is the head of the list so pulling
+			 * the item out of the list and hence
+			 * the avl tree would be a bad idea.
+			 * 
+			 * (cheaper than the alternative, a tree
+			 * delete of this node followed by a tree
+			 * insert of the next node on the list).
+			 */
+			tmp_startblock = ext->next->ex_startblock;
+			tmp_blockcount = ext->next->ex_blockcount;
+			tmp_state = ext->next->ex_state;
+
+			ext->next->ex_startblock = ext->ex_startblock;
+			ext->next->ex_blockcount = ext->ex_blockcount;
+			ext->next->ex_state = ext->ex_state;
+
+			ext->ex_startblock = tmp_startblock;
+			ext->ex_blockcount = tmp_blockcount;
+			ext->ex_state = tmp_state;
+
+			ext = ext->next;
+			prev = top;
+		}
+		/*
+		 * now, a simple list deletion
+		 */
+		prev->next = ext->next;
+		ext->next = NULL;
+	} else  {
+		/*
+		 * no list, just one node.  simply delete
+		 */
+		avl_delete(extent_bcnt_ptrs[agno], &ext->avl_node);
+	}
+
+	ASSERT(ext->ex_startblock == startblock);
+	ASSERT(ext->ex_blockcount == blockcount);
+	return(ext);
+}
+
+/*
+ * the next 2 routines manage the trees of duplicate extents -- 1 tree
+ * per AG
+ */
+void
+add_dup_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+		xfs_extlen_t blockcount)
+{
+	extent_tree_node_t *first, *last, *ext, *next_ext;
+	xfs_agblock_t new_startblock;
+	xfs_extlen_t new_blockcount;
+
+	ASSERT(agno < glob_agcount);
+
+#ifdef XR_DUP_TRACE
+	fprintf(stderr, "Adding dup extent - %d/%d %d\n", agno, startblock, blockcount);
+#endif
+	avl_findranges(extent_tree_ptrs[agno], startblock - 1,
+		startblock + blockcount + 1,
+		(avlnode_t **) &first, (avlnode_t **) &last);
+	/*
+	 * find adjacent and overlapping extent blocks
+	 */
+	if (first == NULL && last == NULL)  {
+		/* nothing, just make and insert new extent */
+
+		ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_MULT);
+
+		if (avl_insert(extent_tree_ptrs[agno],
+				(avlnode_t *) ext) == NULL)  {
+			do_error("xfs_repair:  duplicate extent range\n");
+		}
+
+		return;
+	}
+
+	ASSERT(first != NULL && last != NULL);
+
+	/*
+	 * find the new composite range, delete old extent nodes
+	 * as we go
+	 */
+	new_startblock = startblock;
+	new_blockcount = blockcount;
+
+	for (ext = first;
+		ext != (extent_tree_node_t *) last->avl_node.avl_nextino;
+		ext = next_ext)  {
+		/*
+		 * preserve the next inorder node
+		 */
+		next_ext = (extent_tree_node_t *) ext->avl_node.avl_nextino;
+		/*
+		 * just bail if the new extent is contained within an old one
+		 */
+		if (ext->ex_startblock <= startblock && 
+				ext->ex_blockcount >= blockcount)
+			return;
+		/*
+		 * now check for overlaps and adjacent extents
+		 */
+		if (ext->ex_startblock + ext->ex_blockcount >= startblock
+			|| ext->ex_startblock <= startblock + blockcount)  {
+
+			if (ext->ex_startblock < new_startblock)
+				new_startblock = ext->ex_startblock;
+
+			if (ext->ex_startblock + ext->ex_blockcount >
+					new_startblock + new_blockcount)
+				new_blockcount = ext->ex_startblock +
+							ext->ex_blockcount -
+							new_startblock;
+
+			avl_delete(extent_tree_ptrs[agno], (avlnode_t *) ext);
+			continue;
+		}
+	}
+
+	ext = mk_extent_tree_nodes(new_startblock, new_blockcount, XR_E_MULT);
+
+	if (avl_insert(extent_tree_ptrs[agno], (avlnode_t *) ext) == NULL)  {
+		do_error("xfs_repair:  duplicate extent range\n");
+	}
+
+	return;
+}
+
+/*
+ * returns 1 if block is a dup, 0 if not
+ */
+/* ARGSUSED */
+int
+search_dup_extent(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agblock_t agbno)
+{
+	ASSERT(agno < glob_agcount);
+
+	if (avl_findrange(extent_tree_ptrs[agno], agbno) != NULL)
+		return(1);
+
+	return(0);
+}
+
+static __psunsigned_t
+avl_ext_start(avlnode_t *node)
+{
+	return((__psunsigned_t)
+		((extent_tree_node_t *) node)->ex_startblock);
+}
+
+static __psunsigned_t
+avl_ext_end(avlnode_t *node)
+{
+	return((__psunsigned_t) (
+		((extent_tree_node_t *) node)->ex_startblock +
+		((extent_tree_node_t *) node)->ex_blockcount));
+}
+
+/*
+ * convert size to an address for the AVL tree code -- the bigger the size,
+ * the lower the address so the biggest extent will be first in the tree
+ */
+static __psunsigned_t
+avl_ext_bcnt_start(avlnode_t *node)
+{
+/*
+	return((__psunsigned_t) (BCNT_ADDR(((extent_tree_node_t *)
+						node)->ex_blockcount)));
+*/
+	return((__psunsigned_t) ((extent_tree_node_t *)node)->ex_blockcount);
+}
+
+static __psunsigned_t
+avl_ext_bcnt_end(avlnode_t *node)
+{
+/*
+	return((__psunsigned_t) (BCNT_ADDR(((extent_tree_node_t *)
+						node)->ex_blockcount)));
+*/
+	return((__psunsigned_t) ((extent_tree_node_t *)node)->ex_blockcount);
+}
+
+avlops_t avl_extent_bcnt_tree_ops = {
+	avl_ext_bcnt_start,
+	avl_ext_bcnt_end
+};
+
+avlops_t avl_extent_tree_ops = {
+	avl_ext_start,
+	avl_ext_end
+};
+
+/*
+ * for real-time extents -- have to dup code since realtime extent
+ * startblocks can be 64-bit values.
+ */
+static rt_extent_tree_node_t *
+mk_rt_extent_tree_nodes(xfs_drtbno_t new_startblock,
+	xfs_extlen_t new_blockcount, extent_state_t new_state)
+{
+	int i;
+	rt_extent_tree_node_t *new;
+	rt_extent_alloc_rec_t *rec;
+
+	if (rt_ext_flist.cnt == 0)  {
+		ASSERT(rt_ext_flist.list == NULL);
+
+		if ((rec = malloc(sizeof(rt_extent_alloc_rec_t))) == NULL)
+			do_error("couldn't allocate new extent descriptors.\n");
+
+		record_allocation(&rec->alloc_rec, rt_ba_list);
+
+		new = &rec->extents[0];
+
+		for (i = 0; i < ALLOC_NUM_EXTS; i++)  {
+			new->avl_node.avl_nextino = (avlnode_t *)
+							rt_ext_flist.list;
+			rt_ext_flist.list = new;
+			rt_ext_flist.cnt++;
+			new++;
+		}
+	}
+
+	ASSERT(rt_ext_flist.list != NULL);
+
+	new = rt_ext_flist.list;
+	rt_ext_flist.list = (rt_extent_tree_node_t *) new->avl_node.avl_nextino;
+	rt_ext_flist.cnt--;
+	new->avl_node.avl_nextino = NULL;
+
+	/* initialize node */
+
+	new->rt_startblock = new_startblock;
+	new->rt_blockcount = new_blockcount;
+	new->rt_state = new_state;
+
+	return(new);
+}
+
+#if 0
+void
+release_rt_extent_tree_node(rt_extent_tree_node_t *node)
+{
+	node->avl_node.avl_nextino = (avlnode_t *) rt_ext_flist.list;
+	rt_ext_flist.list = node;
+	rt_ext_flist.cnt++;
+
+	return;
+}
+
+void
+release_rt_extent_tree()
+{
+	extent_tree_node_t	*ext;
+	extent_tree_node_t	*tmp;
+	extent_tree_node_t	*lext;
+	extent_tree_node_t	*ltmp;
+	avl64tree_desc_t	*tree;
+
+	tree = rt_extent_tree_ptr;
+
+	if (tree->avl_firstino == NULL)
+		return;
+
+	ext = (extent_tree_node_t *) tree->avl_firstino;
+
+	while (ext != NULL)  {
+		tmp = (extent_tree_node_t *) ext->avl_node.avl_nextino;
+		release_rt_extent_tree_node(ext);
+		ext = tmp;
+	}
+
+	tree->avl_root = tree->avl_firstino = NULL;
+
+	return;
+}
+#endif
+
+/*
+ * don't need release functions for realtime tree teardown
+ * since we only have one tree, not one per AG
+ */
+/* ARGSUSED */
+void
+free_rt_dup_extent_tree(xfs_mount_t *mp)
+{
+	ASSERT(mp->m_sb.sb_rblocks != 0);
+
+	free_allocations(rt_ba_list);
+	free(rt_ext_tree_ptr);
+
+	rt_ba_list = NULL;
+	rt_ext_tree_ptr = NULL;
+
+	return;
+}
+
+/*
+ * add a duplicate real-time extent
+ */
+void
+add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount)
+{
+	rt_extent_tree_node_t *first, *last, *ext, *next_ext;
+	xfs_drtbno_t new_startblock;
+	xfs_extlen_t new_blockcount;
+
+	avl64_findranges(rt_ext_tree_ptr, startblock - 1,
+		startblock + blockcount + 1,
+		(avl64node_t **) &first, (avl64node_t **) &last);
+	/*
+	 * find adjacent and overlapping extent blocks
+	 */
+	if (first == NULL && last == NULL)  {
+		/* nothing, just make and insert new extent */
+
+		ext = mk_rt_extent_tree_nodes(startblock,
+				blockcount, XR_E_MULT);
+
+		if (avl64_insert(rt_ext_tree_ptr,
+				(avl64node_t *) ext) == NULL)  {
+			do_error("xfs_repair:  duplicate extent range\n");
+		}
+
+		return;
+	}
+
+	ASSERT(first != NULL && last != NULL);
+
+	/*
+	 * find the new composite range, delete old extent nodes
+	 * as we go
+	 */
+	new_startblock = startblock;
+	new_blockcount = blockcount;
+
+	for (ext = first;
+		ext != (rt_extent_tree_node_t *) last->avl_node.avl_nextino;
+		ext = next_ext)  {
+		/*
+		 * preserve the next inorder node
+		 */
+		next_ext = (rt_extent_tree_node_t *) ext->avl_node.avl_nextino;
+		/*
+		 * just bail if the new extent is contained within an old one
+		 */
+		if (ext->rt_startblock <= startblock && 
+				ext->rt_blockcount >= blockcount)
+			return;
+		/*
+		 * now check for overlaps and adjacent extents
+		 */
+		if (ext->rt_startblock + ext->rt_blockcount >= startblock
+			|| ext->rt_startblock <= startblock + blockcount)  {
+
+			if (ext->rt_startblock < new_startblock)
+				new_startblock = ext->rt_startblock;
+
+			if (ext->rt_startblock + ext->rt_blockcount >
+					new_startblock + new_blockcount)
+				new_blockcount = ext->rt_startblock +
+							ext->rt_blockcount -
+							new_startblock;
+
+			avl64_delete(rt_ext_tree_ptr, (avl64node_t *) ext);
+			continue;
+		}
+	}
+
+	ext = mk_rt_extent_tree_nodes(new_startblock,
+				new_blockcount, XR_E_MULT);
+
+	if (avl64_insert(rt_ext_tree_ptr, (avl64node_t *) ext) == NULL)  {
+		do_error("xfs_repair:  duplicate extent range\n");
+	}
+
+	return;
+}
+
+/*
+ * returns 1 if block is a dup, 0 if not
+ */
+/* ARGSUSED */
+int
+search_rt_dup_extent(xfs_mount_t *mp, xfs_drtbno_t bno)
+{
+	if (avl64_findrange(rt_ext_tree_ptr, bno) != NULL)
+		return(1);
+
+	return(0);
+}
+
+static __uint64_t
+avl64_rt_ext_start(avl64node_t *node)
+{
+	return(((rt_extent_tree_node_t *) node)->rt_startblock);
+}
+
+static __uint64_t
+avl64_ext_end(avl64node_t *node)
+{
+	return(((rt_extent_tree_node_t *) node)->rt_startblock +
+		((rt_extent_tree_node_t *) node)->rt_blockcount);
+}
+
+avl64ops_t avl64_extent_tree_ops = {
+	avl64_rt_ext_start,
+	avl64_ext_end
+};
+
+void
+incore_ext_init(xfs_mount_t *mp)
+{
+	int i;
+	xfs_agnumber_t agcount = mp->m_sb.sb_agcount;
+
+	ba_list = NULL;
+	rt_ba_list = NULL;
+
+	if ((extent_tree_ptrs = malloc(agcount *
+					sizeof(avltree_desc_t *))) == NULL)
+		do_error("couldn't malloc dup extent tree descriptor table\n");
+
+	if ((extent_bno_ptrs = malloc(agcount *
+					sizeof(avltree_desc_t *))) == NULL)
+		do_error("couldn't malloc free by-bno extent tree descriptor table\n");
+
+	if ((extent_bcnt_ptrs = malloc(agcount *
+					sizeof(avltree_desc_t *))) == NULL)
+		do_error("couldn't malloc free by-bcnt extent tree descriptor table\n");
+
+	for (i = 0; i < agcount; i++)  {
+		if ((extent_tree_ptrs[i] =
+				malloc(sizeof(avltree_desc_t))) == NULL)
+			do_error("couldn't malloc dup extent tree descriptor\n");
+		if ((extent_bno_ptrs[i] =
+				malloc(sizeof(avltree_desc_t))) == NULL)
+			do_error("couldn't malloc bno extent tree descriptor\n");
+		if ((extent_bcnt_ptrs[i] =
+				malloc(sizeof(avltree_desc_t))) == NULL)
+			do_error("couldn't malloc bcnt extent tree descriptor\n");
+	}
+
+	for (i = 0; i < agcount; i++)  {
+		avl_init_tree(extent_tree_ptrs[i], &avl_extent_tree_ops);
+		avl_init_tree(extent_bno_ptrs[i], &avl_extent_tree_ops);
+		avl_init_tree(extent_bcnt_ptrs[i], &avl_extent_bcnt_tree_ops);
+	}
+
+	if ((rt_ext_tree_ptr = malloc(sizeof(avltree_desc_t))) == NULL)
+		do_error("couldn't malloc dup rt extent tree descriptor\n");
+
+	avl64_init_tree(rt_ext_tree_ptr, &avl64_extent_tree_ops);
+
+	ext_flist.cnt = 0;
+	ext_flist.list = NULL;
+
+	return;
+}
+
+/*
+ * this routine actually frees all the memory used to track per-AG trees
+ */
+void
+incore_ext_teardown(xfs_mount_t *mp)
+{
+	xfs_agnumber_t i;
+
+	free_allocations(ba_list);
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		free(extent_tree_ptrs[i]);
+		free(extent_bno_ptrs[i]);
+		free(extent_bcnt_ptrs[i]);
+	}
+
+	free(extent_bcnt_ptrs);
+	free(extent_bno_ptrs);
+	free(extent_tree_ptrs);
+
+	extent_bcnt_ptrs = extent_bno_ptrs = extent_tree_ptrs = NULL;
+
+	return;
+}
+
+int
+count_extents(xfs_agnumber_t agno, avltree_desc_t *tree, int whichtree)
+{
+	extent_tree_node_t *node;
+	int i = 0;
+
+	node = (extent_tree_node_t *) tree->avl_firstino;
+
+	while (node != NULL)  {
+		i++;
+		if (whichtree)
+			node = findnext_bcnt_extent(agno, node);
+		else
+			node = findnext_bno_extent(node);
+	}
+
+	return(i);
+}
+
+int
+count_bno_extents_blocks(xfs_agnumber_t agno, uint *numblocks)
+{
+	__uint64_t nblocks;
+	extent_tree_node_t *node;
+	int i = 0;
+
+	ASSERT(agno < glob_agcount);
+
+	nblocks = 0;
+
+	node = (extent_tree_node_t *) extent_bno_ptrs[agno]->avl_firstino;
+
+	while (node != NULL) {
+		nblocks += node->ex_blockcount;
+		i++;
+		node = findnext_bno_extent(node);
+	}
+
+	*numblocks = nblocks;
+	return(i);
+}
+
+int
+count_bno_extents(xfs_agnumber_t agno)
+{
+	ASSERT(agno < glob_agcount);
+	return(count_extents(agno, extent_bno_ptrs[agno], 0));
+}
+
+int
+count_bcnt_extents(xfs_agnumber_t agno)
+{
+	ASSERT(agno < glob_agcount);
+	return(count_extents(agno, extent_bcnt_ptrs[agno], 1));
+}
diff --git a/repair/incore_ino.c b/repair/incore_ino.c
new file mode 100644
index 000000000..bb14a370f
--- /dev/null
+++ b/repair/incore_ino.c
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <malloc.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+extern avlnode_t	*avl_firstino(avlnode_t *root);
+
+/*
+ * array of inode tree ptrs, one per ag
+ */
+static avltree_desc_t	**inode_tree_ptrs;
+
+/*
+ * ditto for uncertain inodes
+ */
+static avltree_desc_t	**inode_uncertain_tree_ptrs;
+
+#define ALLOC_NUM_INOS		100
+
+/* free lists -- inode nodes and extent nodes */
+
+typedef struct ino_flist_s  {
+	ino_tree_node_t		*list;
+	ino_tree_node_t		*last;
+	long long		cnt;
+} ino_flist_t;
+
+static ino_flist_t ino_flist;	/* free list must be initialized before use */
+
+/*
+ * next is the uncertain inode list -- a sorted (in ascending order)
+ * list of inode records sorted on the starting inode number.  There
+ * is one list per ag.
+ */
+
+/*
+ * common code for creating inode records for use by trees and lists.
+ * called only from add_inodes and add_inodes_uncertain
+ *
+ * IMPORTANT:  all inodes (inode records) start off as free and
+ *		unconfirmed.
+ */
+/* ARGSUSED */
+static ino_tree_node_t *
+mk_ino_tree_nodes(xfs_agino_t starting_ino)
+{
+	int i;
+	ino_tree_node_t *new;
+	avlnode_t *node;
+
+	if (ino_flist.cnt == 0)  {
+		ASSERT(ino_flist.list == NULL);
+
+		if ((new = malloc(sizeof(ino_tree_node_t[ALLOC_NUM_INOS])))
+					== NULL)
+			do_error("inode map malloc failed\n");
+
+		for (i = 0; i < ALLOC_NUM_INOS; i++)  {
+			new->avl_node.avl_nextino =
+				(avlnode_t *) ino_flist.list;
+			ino_flist.list = new;
+			ino_flist.cnt++;
+			new++;
+		}
+	}
+
+	ASSERT(ino_flist.list != NULL);
+
+	new = ino_flist.list;
+	ino_flist.list = (ino_tree_node_t *) new->avl_node.avl_nextino;
+	ino_flist.cnt--;
+	node = &new->avl_node;
+	node->avl_nextino = node->avl_forw = node->avl_back = NULL;
+
+	/* initialize node */
+
+	new->ino_startnum = 0;
+	new->ino_confirmed = 0;
+	new->ino_isa_dir = 0;
+	new->ir_free = (xfs_inofree_t) - 1;
+	new->ino_un.backptrs = NULL;
+
+	return(new);
+}
+
+/*
+ * return inode record to free list, will be initialized when
+ * it gets pulled off list
+ */
+static void
+free_ino_tree_node(ino_tree_node_t *ino_rec)
+{
+	ino_rec->avl_node.avl_nextino = NULL;
+	ino_rec->avl_node.avl_forw = NULL;
+	ino_rec->avl_node.avl_back = NULL;
+
+	if (ino_flist.list != NULL)  {
+		ASSERT(ino_flist.cnt > 0);
+		ino_rec->avl_node.avl_nextino = (avlnode_t *) ino_flist.list;
+	} else  {
+		ASSERT(ino_flist.cnt == 0);
+		ino_rec->avl_node.avl_nextino = NULL;
+	}
+
+	ino_flist.list = ino_rec;
+	ino_flist.cnt++;
+
+	if (ino_rec->ino_un.backptrs != NULL)  {
+		if (full_backptrs && ino_rec->ino_un.backptrs->parents != NULL)
+			free(ino_rec->ino_un.backptrs->parents);
+		if (ino_rec->ino_un.plist != NULL)
+			free(ino_rec->ino_un.plist);
+	}
+
+	return;
+}
+
+/*
+ * last referenced cache for uncertain inodes
+ */
+static ino_tree_node_t **last_rec;
+
+/*
+ * ok, the uncertain inodes are a set of trees just like the
+ * good inodes but all starting inode records are (arbitrarily)
+ * aligned on XFS_CHUNK_PER_INODE boundaries to prevent overlaps.
+ * this means we may have partials records in the tree (e.g. records
+ * without 64 confirmed uncertain inodes).  Tough.
+ *
+ * free is set to 1 if the inode is thought to be free, 0 if used
+ */
+void
+add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free)
+{
+	ino_tree_node_t		*ino_rec;
+	xfs_agino_t		s_ino;
+	int			offset;
+
+	ASSERT(agno < glob_agcount);
+	ASSERT(last_rec != NULL);
+
+	s_ino = rounddown(ino, XFS_INODES_PER_CHUNK);
+
+	/*
+	 * check for a cache hit
+	 */
+	if (last_rec[agno] != NULL && last_rec[agno]->ino_startnum == s_ino)  {
+		offset = ino - s_ino;
+		if (free)
+			set_inode_free(last_rec[agno], offset);
+		else
+			set_inode_used(last_rec[agno], offset);
+
+		return;
+	}
+
+	/*
+	 * check to see if record containing inode is already in the tree.
+	 * if not, add it
+	 */
+	if ((ino_rec = (ino_tree_node_t *)
+			avl_findrange(inode_uncertain_tree_ptrs[agno],
+				s_ino)) == NULL)  {
+		ino_rec = mk_ino_tree_nodes(s_ino);
+		ino_rec->ino_startnum = s_ino;
+
+		if (avl_insert(inode_uncertain_tree_ptrs[agno],
+				(avlnode_t *) ino_rec) == NULL)  {
+			do_error("xfs_repair:  duplicate inode range\n");
+		}
+	}
+
+	if (free)
+		set_inode_free(ino_rec, ino - s_ino);
+	else
+		set_inode_used(ino_rec, ino - s_ino);
+
+	/*
+	 * set cache entry
+	 */
+	last_rec[agno] = ino_rec;
+
+	return;
+}
+
+/*
+ * like add_aginode_uncertain() only it needs an xfs_mount_t *
+ * to perform the inode number conversion.
+ */
+void
+add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free)
+{
+	add_aginode_uncertain(XFS_INO_TO_AGNO(mp, ino),
+				XFS_INO_TO_AGINO(mp, ino), free);
+}
+
+/*
+ * pull the indicated inode record out of the uncertain inode tree
+ */
+void
+get_uncertain_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec)
+{
+	ASSERT(inode_tree_ptrs != NULL);
+	ASSERT(inode_tree_ptrs[agno] != NULL);
+
+	avl_delete(inode_uncertain_tree_ptrs[agno], &ino_rec->avl_node);
+
+	ino_rec->avl_node.avl_nextino = NULL;
+	ino_rec->avl_node.avl_forw = NULL;
+	ino_rec->avl_node.avl_back = NULL;
+}
+
+ino_tree_node_t *
+findfirst_uncertain_inode_rec(xfs_agnumber_t agno)
+{
+	return((ino_tree_node_t *)
+		inode_uncertain_tree_ptrs[agno]->avl_firstino);
+}
+
+void
+clear_uncertain_ino_cache(xfs_agnumber_t agno)
+{
+	last_rec[agno] = NULL;
+
+	return;
+}
+
+
+/*
+ * next comes the inode trees.  One per ag.  AVL trees
+ * of inode records, each inode record tracking 64 inodes
+ */
+/*
+ * set up an inode tree record for a group of inodes that will
+ * include the requested inode.
+ *
+ * does NOT error-check for duplicate records.  Caller is
+ * responsible for checking that.
+ *
+ * ino must be the start of an XFS_INODES_PER_CHUNK (64) inode chunk
+ *
+ * Each inode resides in a 64-inode chunk which can be part
+ * one or more chunks (MAX(64, inodes-per-block).  The fs allocates
+ * in chunks (as opposed to 1 chunk) when a block can hold more than
+ * one chunk (inodes per block > 64).  Allocating in one chunk pieces
+ * causes us problems when it takes more than one fs block to contain
+ * an inode chunk because the chunks can start on *any* block boundary.
+ * So we assume that the caller has a clue because at this level, we
+ * don't.
+ */
+static ino_tree_node_t *
+add_inode(xfs_agnumber_t agno, xfs_agino_t ino)
+{
+	ino_tree_node_t *ino_rec;
+
+	/* no record exists, make some and put them into the tree */
+
+	ino_rec = mk_ino_tree_nodes(ino);
+	ino_rec->ino_startnum = ino;
+
+	if (avl_insert(inode_tree_ptrs[agno],
+			(avlnode_t *) ino_rec) == NULL)  {
+		do_error("xfs_repair:  duplicate inode range\n");
+	}
+
+	return(ino_rec);
+}
+
+/*
+ * pull the indicated inode record out of the inode tree
+ */
+void
+get_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec)
+{
+	ASSERT(inode_tree_ptrs != NULL);
+	ASSERT(inode_tree_ptrs[agno] != NULL);
+
+	avl_delete(inode_tree_ptrs[agno], &ino_rec->avl_node);
+
+	ino_rec->avl_node.avl_nextino = NULL;
+	ino_rec->avl_node.avl_forw = NULL;
+	ino_rec->avl_node.avl_back = NULL;
+}
+
+/*
+ * free the designated inode record (return it to the free pool)
+ */
+/* ARGSUSED */
+void
+free_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec)
+{
+	free_ino_tree_node(ino_rec);
+
+	return;
+}
+
+/*
+ * returns the inode record desired containing the inode
+ * returns NULL if inode doesn't exist.  The tree-based find
+ * routines do NOT pull records out of the tree.
+ */
+ino_tree_node_t *
+find_inode_rec(xfs_agnumber_t agno, xfs_agino_t ino)
+{
+	return((ino_tree_node_t *)
+		avl_findrange(inode_tree_ptrs[agno], ino));
+}
+
+void
+find_inode_rec_range(xfs_agnumber_t agno, xfs_agino_t start_ino,
+			xfs_agino_t end_ino, ino_tree_node_t **first,
+			ino_tree_node_t **last)
+{
+	*first = *last = NULL;
+
+	avl_findranges(inode_tree_ptrs[agno], start_ino,
+		end_ino, (avlnode_t **) first, (avlnode_t **) last);
+	return;
+}
+
+/*
+ * if ino doesn't exist, it must be properly aligned -- on a
+ * filesystem block boundary or XFS_INODES_PER_CHUNK boundary,
+ * whichever alignment is larger.
+ */
+ino_tree_node_t *
+set_inode_used_alloc(xfs_agnumber_t agno, xfs_agino_t ino)
+{
+	ino_tree_node_t *ino_rec;
+
+	/*
+	 * check alignment -- the only way to detect this
+	 * is too see if the chunk overlaps another chunk
+	 * already in the tree
+	 */
+	ino_rec = add_inode(agno, ino);
+
+	ASSERT(ino_rec != NULL);
+	ASSERT(ino >= ino_rec->ino_startnum &&
+		ino - ino_rec->ino_startnum < XFS_INODES_PER_CHUNK);
+
+	set_inode_used(ino_rec, ino - ino_rec->ino_startnum);
+
+	return(ino_rec);
+}
+
+ino_tree_node_t *
+set_inode_free_alloc(xfs_agnumber_t agno, xfs_agino_t ino)
+{
+	ino_tree_node_t *ino_rec;
+
+	ino_rec = add_inode(agno, ino);
+
+	ASSERT(ino_rec != NULL);
+	ASSERT(ino >= ino_rec->ino_startnum &&
+		ino - ino_rec->ino_startnum < XFS_INODES_PER_CHUNK);
+
+	set_inode_free(ino_rec, ino - ino_rec->ino_startnum);
+
+	return(ino_rec);
+}
+
+ino_tree_node_t *
+findfirst_inode_rec(xfs_agnumber_t agno)
+{
+	return((ino_tree_node_t *) inode_tree_ptrs[agno]->avl_firstino);
+}
+
+void
+print_inode_list_int(xfs_agnumber_t agno, int uncertain)
+{
+	ino_tree_node_t *ino_rec;
+
+	if (!uncertain)  {
+		fprintf(stderr, "good inode list is --\n");
+		ino_rec = findfirst_inode_rec(agno);
+	} else  {
+		fprintf(stderr, "uncertain inode list is --\n");
+		ino_rec = findfirst_uncertain_inode_rec(agno);
+	}
+
+	if (ino_rec == NULL)  {
+		fprintf(stderr, "agno %d -- no inodes\n", agno);
+		return;
+	}
+
+	printf("agno %d\n", agno);
+
+	while(ino_rec != NULL)  {
+		fprintf(stderr,
+	"\tptr = %p, start = 0x%x, free = 0x%llx, confirmed = 0x%llx\n",
+			ino_rec,
+			ino_rec->ino_startnum,
+			ino_rec->ir_free,
+			ino_rec->ino_confirmed);
+		if (ino_rec->ino_startnum == 0)
+			ino_rec = ino_rec;
+		ino_rec = next_ino_rec(ino_rec);
+	}
+}
+
+void
+print_inode_list(xfs_agnumber_t agno)
+{
+	print_inode_list_int(agno, 0);
+}
+
+void
+print_uncertain_inode_list(xfs_agnumber_t agno)
+{
+	print_inode_list_int(agno, 1);
+}
+
+/*
+ * set parent -- use a bitmask and a packed array.  The bitmask
+ * indicate which inodes have an entry in the array.  An inode that
+ * is the Nth bit set in the mask is stored in the Nth location in
+ * the array where N starts at 0.
+ */
+void
+set_inode_parent(ino_tree_node_t *irec, int offset, xfs_ino_t parent)
+{
+	int		i;
+	int		cnt;
+	int		target;
+	__uint64_t	bitmask;
+	parent_entry_t	*tmp;
+
+	ASSERT(full_backptrs == 0);
+
+	if (irec->ino_un.plist == NULL)  {
+                irec->ino_un.plist = 
+                        (parent_list_t*)malloc(sizeof(parent_list_t));
+                if (!irec->ino_un.plist)
+			do_error("couldn't malloc parent list table\n");
+                
+		irec->ino_un.plist->pmask = 1LL << offset;
+		irec->ino_un.plist->pentries = 
+                        (xfs_ino_t*)memalign(sizeof(xfs_ino_t), sizeof(xfs_ino_t));
+                if (!irec->ino_un.plist->pentries)
+                        do_error("couldn't memalign pentries table\n");
+#ifdef DEBUG
+		irec->ino_un.plist->cnt = 1;
+#endif
+		irec->ino_un.plist->pentries[0] = parent;
+
+		return;
+	}
+
+	if (irec->ino_un.plist->pmask & (1LL << offset))  {
+		bitmask = 1LL;
+		target = 0;
+
+		for (i = 0; i < offset; i++)  {
+			if (irec->ino_un.plist->pmask & bitmask)
+				target++;
+			bitmask <<= 1;
+		}
+#ifdef DEBUG
+		ASSERT(target < irec->ino_un.plist->cnt);
+#endif
+		irec->ino_un.plist->pentries[target] = parent;
+
+		return;
+	}
+
+	bitmask = 1LL;
+	cnt = target = 0;
+
+	for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+		if (irec->ino_un.plist->pmask & bitmask)  {
+			cnt++;
+			if (i < offset)
+				target++;
+		}
+
+		bitmask <<= 1;
+	}
+
+#ifdef DEBUG
+	ASSERT(cnt == irec->ino_un.plist->cnt);
+#endif
+	ASSERT(cnt >= target);
+
+	tmp = (xfs_ino_t*)memalign(sizeof(xfs_ino_t), (cnt + 1) * sizeof(xfs_ino_t));
+        if (!tmp)
+                do_error("couldn't memalign pentries table\n");
+
+	(void) bcopy(irec->ino_un.plist->pentries, tmp,
+			target * sizeof(parent_entry_t));
+
+	if (cnt > target)
+		(void) bcopy(irec->ino_un.plist->pentries + target,
+				tmp + target + 1,
+				(cnt - target) * sizeof(parent_entry_t));
+
+	free(irec->ino_un.plist->pentries);
+
+	irec->ino_un.plist->pentries = tmp;
+
+#ifdef DEBUG
+	irec->ino_un.plist->cnt++;
+#endif
+	irec->ino_un.plist->pentries[target] = parent;
+	irec->ino_un.plist->pmask |= (1LL << offset);
+
+	return;
+}
+
+#if 0
+/*
+ * not needed for now since we don't set the parent info
+ * until phase 4 -- at which point we know that the directory
+ * inode won't be going away -- so we won't ever need to clear
+ * directory parent data that we set.
+ */
+void
+clear_inode_parent(ino_tree_node_t *irec, int offset)
+{
+	ASSERT(full_backptrs == 0);
+	ASSERT(irec->ino_un.plist != NULL);
+
+	return;
+}
+#endif
+
+xfs_ino_t
+get_inode_parent(ino_tree_node_t *irec, int offset)
+{
+	__uint64_t	bitmask;
+	parent_list_t	*ptbl;
+	int		i;
+	int		target;
+
+	if (full_backptrs)
+		ptbl = irec->ino_un.backptrs->parents;
+	else
+		ptbl = irec->ino_un.plist;
+
+	if (ptbl->pmask & (1LL << offset))  {
+		bitmask = 1LL;
+		target = 0;
+
+		for (i = 0; i < offset; i++)  {
+			if (ptbl->pmask & bitmask)
+				target++;
+			bitmask <<= 1;
+		}
+#ifdef DEBUG
+		ASSERT(target < ptbl->cnt);
+#endif
+		return(ptbl->pentries[target]);
+	}
+
+	return(0LL);
+}
+
+/*
+ * code that deals with the inode descriptor appendages -- the back
+ * pointers, link counts and reached bits for phase 6 and phase 7.
+ */
+
+void
+add_inode_reached(ino_tree_node_t *ino_rec, int ino_offset)
+{
+	ASSERT(ino_rec->ino_un.backptrs != NULL);
+
+	ino_rec->ino_un.backptrs->nlinks[ino_offset]++;
+	XFS_INO_RCHD_SET_RCHD(ino_rec, ino_offset);
+
+	ASSERT(is_inode_reached(ino_rec, ino_offset));
+
+	return;
+}
+
+int
+is_inode_reached(ino_tree_node_t *ino_rec, int ino_offset)
+{
+	ASSERT(ino_rec->ino_un.backptrs != NULL);
+	return(XFS_INO_RCHD_IS_RCHD(ino_rec, ino_offset));
+}
+
+void
+add_inode_ref(ino_tree_node_t *ino_rec, int ino_offset)
+{
+	ASSERT(ino_rec->ino_un.backptrs != NULL);
+
+	ino_rec->ino_un.backptrs->nlinks[ino_offset]++;
+
+	return;
+}
+
+void
+drop_inode_ref(ino_tree_node_t *ino_rec, int ino_offset)
+{
+	ASSERT(ino_rec->ino_un.backptrs != NULL);
+	ASSERT(ino_rec->ino_un.backptrs->nlinks[ino_offset] > 0);
+
+	if (--ino_rec->ino_un.backptrs->nlinks[ino_offset] == 0)
+		XFS_INO_RCHD_CLR_RCHD(ino_rec, ino_offset);
+
+	return;
+}
+
+int
+is_inode_referenced(ino_tree_node_t *ino_rec, int ino_offset)
+{
+	ASSERT(ino_rec->ino_un.backptrs != NULL);
+	return(ino_rec->ino_un.backptrs->nlinks[ino_offset] > 0);
+}
+
+__uint32_t
+num_inode_references(ino_tree_node_t *ino_rec, int ino_offset)
+{
+	ASSERT(ino_rec->ino_un.backptrs != NULL);
+	return(ino_rec->ino_un.backptrs->nlinks[ino_offset]);
+}
+
+#if 0
+static backptrs_t	*bptrs;
+static int		bptrs_index;
+#define BPTR_ALLOC_NUM	1000
+
+backptrs_t *
+get_backptr(void)
+{
+	backptrs_t *bptr;
+
+	if (bptrs_index == BPTR_ALLOC_NUM)  {
+		ASSERT(bptrs == NULL);
+
+		if ((bptrs = malloc(sizeof(backptrs_t[BPTR_ALLOC_NUM])))
+				== NULL)  {
+			do_error("couldn't malloc ino rec backptrs.\n");
+		}
+
+		bptrs_index = 0;
+	}
+
+	ASSERT(bptrs != NULL);
+
+	bptr = &bptrs[bptrs_index];
+	bptrs_index++;
+
+	if (bptrs_index == BPTR_ALLOC_NUM)
+		bptrs = NULL;
+
+	bzero(bptr, sizeof(backptrs_t));
+
+	return(bptr);
+}
+#endif
+
+backptrs_t *
+get_backptr(void)
+{
+	backptrs_t *ptr;
+
+	if ((ptr = malloc(sizeof(backptrs_t))) == NULL)
+		do_error("could not malloc back pointer table\n");
+	
+	bzero(ptr, sizeof(backptrs_t));
+
+	return(ptr);
+}
+
+void
+add_ino_backptrs(xfs_mount_t *mp)
+{
+#ifdef XR_BCKPTR_DBG
+	xfs_ino_t ino;
+	int j, k;
+#endif /* XR_BCKPTR_DBG */
+	ino_tree_node_t *ino_rec;
+	parent_list_t *tmp;
+	xfs_agnumber_t i;
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		ino_rec = findfirst_inode_rec(i);
+
+		while (ino_rec != NULL)  {
+			tmp = ino_rec->ino_un.plist;
+			ino_rec->ino_un.backptrs = get_backptr(); 
+			ino_rec->ino_un.backptrs->parents = tmp;
+
+#ifdef XR_BCKPTR_DBG
+			if (tmp != NULL)  {
+				k = 0;
+				for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
+					ino = XFS_AGINO_TO_INO(mp, i,
+						ino_rec->ino_startnum + j);
+					if (ino == 25165846)  {
+						do_warn("THERE 1 !!!\n");
+					}
+					if (tmp->pentries[j] != 0)  {
+						k++;
+						do_warn(
+						"inode %llu - parent %llu\n",
+							ino,
+							tmp->pentries[j]);
+						if (ino == 25165846)  {
+							do_warn("THERE!!!\n");
+						}
+					}
+				}
+
+				if (k != tmp->cnt)  {
+					do_warn(
+					"ERROR - count = %d, counted %d\n",
+						tmp->cnt, k);
+				}
+			}
+#endif /* XR_BCKPTR_DBG */
+			ino_rec = next_ino_rec(ino_rec);
+		}
+	}
+
+	full_backptrs = 1;
+
+	return;
+}
+
+static __psunsigned_t
+avl_ino_start(avlnode_t *node)
+{
+	return((__psunsigned_t) ((ino_tree_node_t *) node)->ino_startnum);
+}
+
+static __psunsigned_t
+avl_ino_end(avlnode_t *node)
+{
+	return((__psunsigned_t) (
+		((ino_tree_node_t *) node)->ino_startnum +
+		XFS_INODES_PER_CHUNK));
+}
+
+avlops_t avl_ino_tree_ops = {
+	avl_ino_start,
+	avl_ino_end
+};
+
+void
+incore_ino_init(xfs_mount_t *mp)
+{
+	int i;
+	int agcount = mp->m_sb.sb_agcount;
+
+	if ((inode_tree_ptrs = malloc(agcount *
+					sizeof(avltree_desc_t *))) == NULL)
+		do_error("couldn't malloc inode tree descriptor table\n");
+	if ((inode_uncertain_tree_ptrs = malloc(agcount *
+					sizeof(avltree_desc_t *))) == NULL)
+		do_error("couldn't malloc uncertain ino tree descriptor table\n");
+
+	for (i = 0; i < agcount; i++)  {
+		if ((inode_tree_ptrs[i] =
+				malloc(sizeof(avltree_desc_t))) == NULL)
+			do_error("couldn't malloc inode tree descriptor\n");
+		if ((inode_uncertain_tree_ptrs[i] =
+				malloc(sizeof(avltree_desc_t))) == NULL)
+			do_error(
+			"couldn't malloc uncertain ino tree descriptor\n");
+	}
+	for (i = 0; i < agcount; i++)  {
+		avl_init_tree(inode_tree_ptrs[i], &avl_ino_tree_ops);
+		avl_init_tree(inode_uncertain_tree_ptrs[i], &avl_ino_tree_ops);
+	}
+
+	ino_flist.cnt = 0;
+	ino_flist.list = NULL;
+
+	if ((last_rec = malloc(sizeof(ino_tree_node_t *) * agcount)) == NULL)
+		do_error("couldn't malloc uncertain inode cache area\n");
+
+	bzero(last_rec, sizeof(ino_tree_node_t *) * agcount);
+
+	full_backptrs = 0;
+
+	return;
+}
+
+#ifdef XR_INO_REF_DEBUG
+void
+add_inode_refchecked(xfs_ino_t ino, ino_tree_node_t *ino_rec, int ino_offset)
+{
+	XFS_INOPROC_SET_PROC((ino_rec), (ino_offset));
+
+	ASSERT(is_inode_refchecked(ino, ino_rec, ino_offset));
+
+	return;
+}
+
+int
+is_inode_refchecked(xfs_ino_t ino, ino_tree_node_t *ino_rec, int ino_offset)
+{
+	return(XFS_INOPROC_IS_PROC(ino_rec, ino_offset) == 0LL ? 0 : 1);
+}
+#endif /* XR_INO_REF_DEBUG */
diff --git a/repair/init.c b/repair/init.c
new file mode 100644
index 000000000..cc61650c7
--- /dev/null
+++ b/repair/init.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+void
+xfs_init(libxfs_init_t *args)
+{
+	memset(args, 0, sizeof(libxfs_init_t));
+
+	if (isa_file)  {
+		args->disfile = 1;
+		args->dname = fs_name;
+		args->volname = NULL;
+	} else  {
+		args->disfile = 0;
+		args->volname = fs_name;
+		args->dname = NULL;
+	}
+
+	if (log_spec)  {	/* External log specified */
+		args->logname = log_name;
+		args->lisfile = (isa_file?1:0);
+		/* XXX assume data file also means log file */
+		/* REVISIT: Need to do fs sanity / log validity checking */
+	}
+
+	args->notvolmsg = "you should never get this message - %s";
+	args->notvolok = 1;
+
+	if (no_modify)
+		args->isreadonly = (LIBXFS_ISREADONLY | LIBXFS_ISINACTIVE);
+
+	if (!libxfs_init(args))
+		do_error("couldn't initialize XFS library\n");
+}
diff --git a/repair/io.c b/repair/io.c
new file mode 100644
index 000000000..0b400ceef
--- /dev/null
+++ b/repair/io.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <fcntl.h>
+#include <libxfs.h>
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+void
+io_init(void)
+{
+	int i;
+
+	/* open up filesystem device */
+
+	ASSERT(fs_name != NULL && *fs_name != '\0');
+
+	if ((fs_fd = open (fs_name, O_RDWR)) < 0)  {
+		do_error("couldn't open filesystem \"%s\"\n",
+			 fs_name);
+	}
+
+	/* initialize i/o buffers */
+
+	iobuf_size = 1000 * 1024;
+	smallbuf_size = 4 * 4096;	/* enough for an ag */
+
+	/*
+	 * sbbuf_size must be < XFS_MIN_AG_BLOCKS (64) * smallest block size,
+	 * otherwise you might get an EOF when reading in the sb/agf from
+	 * the last ag if that ag is small
+	 */
+	sbbuf_size = 2 * 4096;		/* 2 * max sector size */
+
+	if ((iobuf = malloc(iobuf_size)) == NULL)
+		do_error("couldn't malloc io buffer\n");
+
+	if ((smallbuf = malloc(smallbuf_size)) == NULL)
+		do_error("couldn't malloc secondary io buffer\n");
+
+	for (i = 0; i < NUM_SBS; i++)  {
+		if ((sb_bufs[i] = malloc(sbbuf_size)) == NULL)
+			do_error("couldn't malloc sb io buffers\n");
+	}
+}
diff --git a/repair/phase1.c b/repair/phase1.c
new file mode 100644
index 000000000..a3bc895a9
--- /dev/null
+++ b/repair/phase1.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <malloc.h>
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+void
+no_sb(void)
+{
+	do_warn("Sorry, could not find valid secondary superblock\n");
+	do_warn("Exiting now.\n");
+	exit(1);
+}
+
+char *
+alloc_ag_buf(int size)
+{
+	char 	*bp;
+
+        bp = (char *)memalign(MEM_ALIGN, size);
+        if (!bp)
+		do_error("could not allocate ag header buffer (%d bytes)\n",
+			size);
+	return(bp);
+}
+
+/*
+ * this has got to be big enough to hold 4 sectors
+ */
+#define MAX_SECTSIZE		(512 * 1024)
+
+/* ARGSUSED */
+void
+phase1(xfs_mount_t *mp)
+{
+	xfs_sb_t		*sb;
+	char			*ag_bp;
+	int			rval;
+
+	io_init();
+
+	do_log("Phase 1 - find and verify superblock...\n");
+
+	primary_sb_modified = 0;
+	need_root_inode = 0;
+	need_root_dotdot = 0;
+	need_rbmino = 0;
+	need_rsumino = 0;
+	lost_quotas = 0;
+	old_orphanage_ino = (xfs_ino_t) 0;
+
+	/*
+	 * get AG 0 into ag header buf
+	 */
+	ag_bp = alloc_ag_buf(MAX_SECTSIZE);
+	sb = (xfs_sb_t *) ag_bp;
+
+	if (get_sb(sb, 0LL, MAX_SECTSIZE, 0) == XR_EOF)  {
+		do_error("error reading primary superblock\n");
+	}
+
+	/*
+	 * is this really an sb, verify internal consistency
+	 */
+	if ((rval = verify_sb(sb, 1)) != XR_OK)  {
+		do_warn("bad primary superblock - %s !!!\n",
+			err_string(rval));
+		if (!find_secondary_sb(sb))
+			no_sb();
+		primary_sb_modified = 1;
+	} else if ((rval = verify_set_primary_sb(sb, 0,
+					&primary_sb_modified)) != XR_OK)  {
+		do_warn("couldn't verify primary superblock - %s !!!\n",
+			err_string(rval));
+		if (!find_secondary_sb(sb))
+			no_sb();
+		primary_sb_modified = 1;
+	}
+	
+	if (primary_sb_modified)  {
+		if (!no_modify)  {
+			do_warn("writing modified primary superblock\n");
+			write_primary_sb(sb, sb->sb_sectsize);
+		} else  {
+			do_warn("would write modified primary superblock\n");
+		}
+	}
+
+	/*
+	 * misc. global var initialization
+	 */
+	sb_ifree = sb_icount = sb_fdblocks = sb_frextents = 0;
+
+	free(sb);
+}
diff --git a/repair/phase2.c b/repair/phase2.c
new file mode 100644
index 000000000..a906892b1
--- /dev/null
+++ b/repair/phase2.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "incore.h"
+
+void	set_mp(xfs_mount_t *mpp);
+void	scan_ag(xfs_agnumber_t agno);
+
+static void
+zero_log(xfs_mount_t *mp, libxfs_init_t *args)
+{
+        int logdev = (mp->m_sb.sb_logstart == 0) ? args->logdev : args->ddev;
+        
+	libxfs_log_clear(logdev, 
+		XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart),
+		(xfs_extlen_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks),
+                &mp->m_sb.sb_uuid,
+                XLOG_FMT);
+}
+
+/*
+ * ok, at this point, the fs is mounted but the root inode may be
+ * trashed and the ag headers haven't been checked.  So we have
+ * a valid xfs_mount_t and superblock but that's about it.  That
+ * means we can use macros that use mount/sb fields in calculations
+ * but I/O or btree routines that depend on space maps or inode maps
+ * being correct are verboten.
+ */
+
+void
+phase2(xfs_mount_t *mp, libxfs_init_t *args)
+{
+	xfs_agnumber_t		i;
+	xfs_agblock_t		b;
+	int			j;
+	ino_tree_node_t		*ino_rec;
+
+	/* now we can start using the buffer cache routines */
+	set_mp(mp);
+
+	/* Check whether this fs has internal or external log */
+	if (mp->m_sb.sb_logstart == 0) {
+		if (!args->logname) {
+			fprintf (stderr,
+				"This filesystem has an external log.  "
+				"Specify log device with the -l option.\n");
+			exit (1);
+		}
+		
+		fprintf (stderr, "Phase 2 - using external log on %s\n", 
+			 args->logname);
+	} else
+		fprintf (stderr, "Phase 2 - using internal log\n");
+
+	/* Zero log if applicable */
+	if (!no_modify)  {
+		do_log("        - zero log...\n");
+		zero_log(mp, args);
+	}
+
+	do_log("        - scan filesystem freespace and inode maps...\n");
+
+	/*
+	 * account for space used by ag headers and log if internal
+	 */
+	set_bmap_log(mp);
+	set_bmap_fs(mp);
+
+	bad_ino_btree = 0;
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		scan_ag(i);
+#ifdef XR_INODE_TRACE
+		print_inode_list(i);
+#endif
+	}
+
+	/*
+	 * make sure we know about the root inode chunk
+	 */
+	if ((ino_rec = find_inode_rec(0, mp->m_sb.sb_rootino)) == NULL)  {
+		ASSERT(mp->m_sb.sb_rbmino == mp->m_sb.sb_rootino + 1 &&
+			mp->m_sb.sb_rsumino == mp->m_sb.sb_rootino + 2);
+		do_warn("root inode chunk not found\n");
+
+		/*
+		 * mark the first 3 used, the rest are free
+		 */
+		ino_rec = set_inode_used_alloc(0,
+				(xfs_agino_t) mp->m_sb.sb_rootino);
+		set_inode_used(ino_rec, 1);
+		set_inode_used(ino_rec, 2);
+
+		for (j = 3; j < XFS_INODES_PER_CHUNK; j++)
+			set_inode_free(ino_rec, j);
+
+		/*
+		 * also mark blocks
+		 */
+		for (b = 0; b < mp->m_ialloc_blks; b++)  {
+			set_agbno_state(mp, 0,
+				b + XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino),
+				XR_E_INO);
+		}
+	} else  {
+		do_log("        - found root inode chunk\n");
+
+		/*
+		 * blocks are marked, just make sure they're in use
+		 */
+		if (is_inode_free(ino_rec, 0))  {
+			do_warn("root inode marked free, ");
+			set_inode_used(ino_rec, 0);
+			if (!no_modify)
+				do_warn("correcting\n");
+			else
+				do_warn("would correct\n");
+		}
+
+		if (is_inode_free(ino_rec, 1))  {
+			do_warn("realtime bitmap inode marked free, ");
+			set_inode_used(ino_rec, 1);
+			if (!no_modify)
+				do_warn("correcting\n");
+			else
+				do_warn("would correct\n");
+		}
+
+		if (is_inode_free(ino_rec, 2))  {
+			do_warn("realtime summary inode marked free, ");
+			set_inode_used(ino_rec, 2);
+			if (!no_modify)
+				do_warn("correcting\n");
+			else
+				do_warn("would correct\n");
+		}
+	}
+}
diff --git a/repair/phase3.c b/repair/phase3.c
new file mode 100644
index 000000000..e9f7af53d
--- /dev/null
+++ b/repair/phase3.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+
+/*
+ * walks an unlinked list, returns 1 on an error (bogus pointer) or
+ * I/O error
+ */
+int
+walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino)
+{
+	xfs_buf_t *bp;
+	xfs_dinode_t *dip;
+	xfs_agino_t current_ino = start_ino;
+	xfs_agblock_t agbno;
+	int state;
+
+	while (current_ino != NULLAGINO)  {
+		if (!verify_aginum(mp, agno, current_ino))
+			return(1);
+		if ((bp = get_agino_buf(mp, agno, current_ino, &dip)) == NULL)
+			return(1);
+		/*
+		 * if this looks like a decent inode, then continue
+		 * following the unlinked pointers.  If not, bail.
+		 */
+		if (verify_dinode(mp, dip, agno, current_ino) == 0)  {
+			/*
+			 * check if the unlinked list points to an unknown
+			 * inode.  if so, put it on the uncertain inode list
+			 * and set block map appropriately.
+			 */
+			if (find_inode_rec(agno, current_ino) == NULL)  {
+				add_aginode_uncertain(agno, current_ino, 1);
+				agbno = XFS_AGINO_TO_AGBNO(mp, current_ino);
+
+				switch (state = get_agbno_state(mp,
+							agno, agbno))  {
+				case XR_E_UNKNOWN:
+				case XR_E_FREE:
+				case XR_E_FREE1:
+					set_agbno_state(mp, agno, agbno,
+						XR_E_INO);
+					break;
+				case XR_E_BAD_STATE:
+					do_error(
+						"bad state in block map %d\n",
+						state);
+					abort();
+					break;
+				default:
+					/*
+					 * the block looks like inodes
+					 * so be conservative and try
+					 * to scavenge what's in there.
+					 * if what's there is completely
+					 * bogus, it'll show up later
+					 * and the inode will be trashed
+					 * anyway, hopefully without
+					 * losing too much other data
+					 */
+					set_agbno_state(mp, agno, agbno,
+						XR_E_INO);
+					break;
+				}
+			}
+			current_ino = dip->di_next_unlinked;
+		} else  {
+			current_ino = NULLAGINO;;
+		}
+		libxfs_putbuf(bp);
+	}
+
+	return(0);
+}
+
+void
+process_agi_unlinked(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+	xfs_agnumber_t i;
+	xfs_buf_t *bp;
+	xfs_agi_t *agip;
+	int err = 0;
+	int agi_dirty = 0;
+
+	bp = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR),
+				mp->m_sb.sb_sectsize/BBSIZE, 0);
+	if (!bp) {
+		do_error("cannot read agi block %lld for ag %u\n",
+			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), agno);
+		exit(1);
+	}
+
+	agip = XFS_BUF_TO_AGI(bp);
+
+	ASSERT(no_modify || INT_GET(agip->agi_seqno, ARCH_CONVERT) == agno);
+
+	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)  {
+		if (INT_GET(agip->agi_unlinked[i], ARCH_CONVERT) != NULLAGINO)  {
+			err += walk_unlinked_list(mp, agno,
+						INT_GET(agip->agi_unlinked[i], ARCH_CONVERT));
+			/*
+			 * clear the list
+			 */
+			if (!no_modify)  {
+				INT_SET(agip->agi_unlinked[i], ARCH_CONVERT, NULLAGINO);
+				agi_dirty = 1;
+			}
+		}
+	}
+
+	if (err)
+		do_warn("error following ag %d unlinked list\n", agno);
+
+	ASSERT(agi_dirty == 0 || agi_dirty && !no_modify);
+
+	if (agi_dirty && !no_modify)
+		libxfs_writebuf(bp, 0);
+	else
+		libxfs_putbuf(bp);
+}
+
+void
+phase3(xfs_mount_t *mp)
+{
+	int i, j;
+
+	printf("Phase 3 - for each AG...\n");
+	if (!no_modify)
+		printf("        - scan and clear agi unlinked lists...\n");
+	else
+		printf("        - scan (but don't clear) agi unlinked lists...\n");
+
+	/*
+	 * first, let's look at the possibly bogus inodes
+	 */
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		/*
+		 * walk unlinked list to add more potential inodes to list
+		 */
+		process_agi_unlinked(mp, i);
+		check_uncertain_aginodes(mp, i);
+	}
+
+	/* ok, now that the tree's ok, let's take a good look */
+
+	printf(
+	    "        - process known inodes and perform inode discovery...\n");
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		do_log("        - agno = %d\n", i);
+		/*
+		 * turn on directory processing (inode discovery) and 
+		 * attribute processing (extra_attr_check)
+		 */
+		process_aginodes(mp, i, 1, 0, 1);
+	}
+
+	/*
+	 * process newly discovered inode chunks
+	 */
+	printf("        - process newly discovered inodes...\n");
+	do  {
+		/*
+		 * have to loop until no ag has any uncertain
+		 * inodes
+		 */
+		j = 0;
+		for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+			j += process_uncertain_aginodes(mp, i);
+#ifdef XR_INODE_TRACE
+			fprintf(stderr,
+				"\t\t phase 3 - process_uncertain_inodes returns %d\n", j);
+#endif
+		}
+	} while (j != 0);
+}
+
diff --git a/repair/phase4.c b/repair/phase4.c
new file mode 100644
index 000000000..d3e0bd10d
--- /dev/null
+++ b/repair/phase4.c
@@ -0,0 +1,1337 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "dir.h"
+#include "bmap.h"
+#include "versions.h"
+#include "dir2.h"
+
+
+/* ARGSUSED */
+int
+lf_block_delete_orphanage(xfs_mount_t		*mp,
+			xfs_ino_t		ino,
+			xfs_dir_leafblock_t	*leaf,
+			int			*dirty,
+			xfs_buf_t		*rootino_bp,
+			int			*rbuf_dirty)
+{
+	xfs_dir_leaf_entry_t	*entry;
+	xfs_dinode_t		*dino;
+	xfs_buf_t		*bp;
+	ino_tree_node_t		*irec;
+	xfs_ino_t		lino;
+	xfs_dir_leaf_name_t	*namest;
+	xfs_agino_t		agino;
+	xfs_agnumber_t		agno;
+	xfs_agino_t		root_agino;
+	xfs_agnumber_t		root_agno;
+	int			i;
+	int			ino_offset;
+	int			ino_dirty;
+	int			use_rbuf;
+	int			len;
+	char			fname[MAXNAMELEN + 1];
+	int			res;
+
+	entry = &leaf->entries[0];
+	*dirty = 0;
+	use_rbuf = 0;
+	res = 0;
+	root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino);
+	root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino);
+
+	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
+			INT_GET(entry->nameidx, ARCH_CONVERT));
+		XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT);
+		bcopy(namest->name, fname, entry->namelen);
+		fname[entry->namelen] = '\0';
+
+		if (fname[0] != '/' && !strcmp(fname, ORPHANAGE))  {
+			agino = XFS_INO_TO_AGINO(mp, lino);
+			agno = XFS_INO_TO_AGNO(mp, lino);
+
+			old_orphanage_ino = lino;
+
+			irec = find_inode_rec(agno, agino);
+
+			/*
+			 * if the orphange inode is in the tree,
+			 * get it, clear it, and mark it free.
+			 * the inodes in the orphanage will get
+			 * reattached to the new orphanage.
+			 */
+			if (irec != NULL)  {
+				ino_offset = agino - irec->ino_startnum;
+
+				/*
+				 * check if we have to use the root inode
+				 * buffer or read one in ourselves.  Note
+				 * that the root inode is always the first
+				 * inode of the chunk that it's in so there
+				 * are two possible cases where lost+found
+				 * might be in the same buffer as the root
+				 * inode.  One case is a large block
+				 * filesystem where the two inodes are
+				 * in different inode chunks but wind
+				 * up in the same block (multiple chunks
+				 * per block) and the second case (one or
+				 * more blocks per chunk) is where the two
+				 * inodes are in the same chunk. Note that
+				 * inodes are allocated on disk in units
+				 * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock).
+				 */
+				if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino)
+						== XFS_INO_TO_FSB(mp, lino) ||
+				    (agno == root_agno &&
+				     agino < root_agino + XFS_INODES_PER_CHUNK)) {
+					use_rbuf = 1;
+					bp = rootino_bp;
+					dino = XFS_MAKE_IPTR(mp, bp, agino -
+						XFS_INO_TO_AGINO(mp,
+							mp->m_sb.sb_rootino));
+				} else {
+					len = (int)XFS_FSB_TO_BB(mp,
+						MAX(1, XFS_INODES_PER_CHUNK/
+							inodes_per_block));
+					bp = libxfs_readbuf(mp->m_dev,
+						XFS_AGB_TO_DADDR(mp, agno,
+							XFS_AGINO_TO_AGBNO(mp,
+								irec->ino_startnum)),
+						len, 0);
+					if (!bp)
+						do_error("couldn't read %s inode %llu\n",
+							ORPHANAGE, lino);
+
+					/*
+					 * get the agbno containing the first
+					 * inode in the chunk.  In multi-block
+					 * chunks, this gets us the offset
+					 * relative to the beginning of a
+					 * properly aligned buffer.  In
+					 * multi-chunk blocks, this gets us
+					 * the correct block number.  Then
+					 * turn the block number back into
+					 * an agino and calculate the offset
+					 * from there to feed to make the iptr.
+					 * the last term in effect rounds down
+					 * to the first agino in the buffer.
+					 */
+					dino = XFS_MAKE_IPTR(mp, bp,
+						agino - XFS_OFFBNO_TO_AGINO(mp,
+							XFS_AGINO_TO_AGBNO(mp,
+							irec->ino_startnum),
+							0));
+				}
+
+				do_warn("        - clearing existing \"%s\" inode\n",
+					ORPHANAGE);
+
+				ino_dirty = clear_dinode(mp, dino, lino);
+
+				if (!use_rbuf)  {
+					ASSERT(ino_dirty == 0 ||
+						ino_dirty && !no_modify);
+
+					if (ino_dirty && !no_modify)
+						libxfs_writebuf(bp, 0);
+					else
+						libxfs_putbuf(bp);
+				} else  {
+					if (ino_dirty)
+						*rbuf_dirty = 1;
+				}
+				
+				if (inode_isadir(irec, ino_offset))
+					clear_inode_isadir(irec, ino_offset);
+
+				set_inode_free(irec, ino_offset);
+			}
+
+			/*
+			 * regardless of whether the inode num is good or
+			 * bad, mark the entry to be junked so the
+			 * createname in phase 6 will succeed.
+			 */
+			namest->name[0] = '/';
+			*dirty = 1;
+			do_warn("        - marking entry \"%s\" to be deleted\n", fname);
+			res++;
+		}
+	}
+
+	return(res);
+}
+
+int
+longform_delete_orphanage(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_dinode_t	*dino,
+			xfs_buf_t	*rootino_bp,
+			int		*rbuf_dirty)
+{
+	xfs_dir_leafblock_t	*leaf;
+	xfs_buf_t		*bp;
+	xfs_dfsbno_t		fsbno;
+	xfs_dablk_t		da_bno;
+	int			dirty;
+	int			res;
+
+	da_bno = 0;
+	*rbuf_dirty = 0;
+
+	if ((fsbno = get_first_dblock_fsbno(mp, ino, dino)) == NULLDFSBNO)  {
+		do_error("couldn't map first leaf block of directory inode %llu\n", ino);
+		exit(1);
+	}
+
+	/*
+	 * cycle through the entire directory looking to delete
+	 * every "lost+found" entry.  make sure to catch duplicate
+	 * entries.
+	 *
+	 * We could probably speed this up by doing a smarter lookup
+	 * to get us to the first block that contains the hashvalue
+	 * of "lost+found" but what the heck.  that would require a
+	 * double lookup for each level.  and how big can '/' get???
+	 * It's probably not worth it.
+	 */
+	res = 0;
+
+	do {
+		ASSERT(fsbno != NULLDFSBNO);
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+					XFS_FSB_TO_BB(mp, 1), 0);
+		if (!bp) {
+			do_error("can't read block %u (fsbno %llu) for directory inode "
+				"%llu\n", da_bno, fsbno, ino);
+			exit(1);
+		}
+
+		leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+
+		if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+			do_error("bad magic # (0x%x) for directory leaf block "
+				"(bno %u fsbno %llu)\n",
+				INT_GET(leaf->hdr.info.magic, ARCH_CONVERT),
+				da_bno, fsbno);
+			exit(1);
+		}
+
+		da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+		res += lf_block_delete_orphanage(mp, ino, leaf, &dirty,
+					rootino_bp, rbuf_dirty);
+
+		ASSERT(dirty == 0 || dirty && !no_modify);
+
+		if (dirty && !no_modify)
+			libxfs_writebuf(bp, 0);
+		else
+			libxfs_putbuf(bp);
+
+		if (da_bno != 0)
+			fsbno = get_bmapi(mp, dino, ino, da_bno, XFS_DATA_FORK);
+
+	} while (da_bno != 0);
+
+	return(res);
+}
+
+/*
+ * returns 1 if a deletion happened, 0 otherwise.
+ */
+/* ARGSUSED */
+int
+shortform_delete_orphanage(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_dinode_t	*root_dino,
+			xfs_buf_t	*rootino_bp,
+			int		*ino_dirty)
+{
+	xfs_dir_shortform_t	*sf;
+	xfs_dinode_t		*dino;
+	xfs_dir_sf_entry_t	*sf_entry, *next_sfe, *tmp_sfe;
+	xfs_buf_t		*bp;
+	xfs_ino_t		lino;
+	xfs_agino_t		agino;
+	xfs_agino_t		root_agino;
+	int			max_size;
+	xfs_agnumber_t		agno;
+	xfs_agnumber_t		root_agno;
+	int			ino_dir_size;
+	ino_tree_node_t		*irec;
+	int			ino_offset;
+	int			i;
+	int			dirty;
+	int			tmp_len;
+	int			tmp_elen;
+	int			len;
+	int			use_rbuf;
+	char			fname[MAXNAMELEN + 1];
+	int			res;
+
+	sf = &root_dino->di_u.di_dirsf;
+	*ino_dirty = 0;
+	res = 0;
+	irec = NULL;
+	ino_dir_size = INT_GET(root_dino->di_core.di_size, ARCH_CONVERT);
+	max_size = XFS_DFORK_DSIZE_ARCH(root_dino, mp, ARCH_CONVERT);
+	use_rbuf = 0;
+	root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino);
+	root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino);
+
+	/*
+	 * run through entries looking for "lost+found".
+	 */
+	sf_entry = next_sfe = &sf->list[0];
+	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && ino_dir_size >
+			(__psint_t)next_sfe - (__psint_t)sf; i++)  {
+		tmp_sfe = NULL;
+		sf_entry = next_sfe;
+		XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT);
+		bcopy(sf_entry->name, fname, sf_entry->namelen);
+		fname[sf_entry->namelen] = '\0';
+
+		if (!strcmp(ORPHANAGE, fname))  {
+			agno = XFS_INO_TO_AGNO(mp, lino);
+			agino = XFS_INO_TO_AGINO(mp, lino);
+
+			irec = find_inode_rec(agno, agino);
+
+			/*
+			 * if the orphange inode is in the tree,
+			 * get it, clear it, and mark it free.
+			 * the inodes in the orphanage will get
+			 * reattached to the new orphanage.
+			 */
+			if (irec != NULL) {
+				do_warn("        - clearing existing \"%s\" inode\n",
+					ORPHANAGE);
+
+				ino_offset = agino - irec->ino_startnum;
+
+				/*
+				 * check if we have to use the root inode
+				 * buffer or read one in ourselves.  Note
+				 * that the root inode is always the first
+				 * inode of the chunk that it's in so there
+				 * are two possible cases where lost+found
+				 * might be in the same buffer as the root
+				 * inode.  One case is a large block
+				 * filesystem where the two inodes are
+				 * in different inode chunks but wind
+				 * up in the same block (multiple chunks
+				 * per block) and the second case (one or
+				 * more blocks per chunk) is where the two
+				 * inodes are in the same chunk. Note that
+				 * inodes are allocated on disk in units
+				 * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock).
+				 */
+				if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino)
+						== XFS_INO_TO_FSB(mp, lino) ||
+				    (agno == root_agno &&
+				     agino < root_agino + XFS_INODES_PER_CHUNK)) {
+					use_rbuf = 1;
+					bp = rootino_bp;
+
+					dino = XFS_MAKE_IPTR(mp, bp, agino -
+						XFS_INO_TO_AGINO(mp,
+							mp->m_sb.sb_rootino));
+				} else {
+					len = (int)XFS_FSB_TO_BB(mp,
+						MAX(1, XFS_INODES_PER_CHUNK/
+							inodes_per_block));
+					bp = libxfs_readbuf(mp->m_dev,
+						XFS_AGB_TO_DADDR(mp, agno,
+							XFS_AGINO_TO_AGBNO(mp,
+								irec->ino_startnum)),
+						len, 0);
+					if (!bp)
+						do_error("could not read %s inode "
+							"%llu\n", ORPHANAGE, lino);
+					/*
+					 * get the agbno containing the first
+					 * inode in the chunk.  In multi-block
+					 * chunks, this gets us the offset
+					 * relative to the beginning of a
+					 * properly aligned buffer.  In
+					 * multi-chunk blocks, this gets us
+					 * the correct block number.  Then
+					 * turn the block number back into
+					 * an agino and calculate the offset
+					 * from there to feed to make the iptr.
+					 * the last term in effect rounds down
+					 * to the first agino in the buffer.
+					 */
+					dino = XFS_MAKE_IPTR(mp, bp,
+						agino - XFS_OFFBNO_TO_AGINO(mp,
+							XFS_AGINO_TO_AGBNO(mp,
+							irec->ino_startnum),
+							0));
+				}
+
+				dirty = clear_dinode(mp, dino, lino);
+
+				ASSERT(dirty == 0 || dirty && !no_modify);
+
+				/*
+				 * if we read the lost+found inode in to
+				 * it, get rid of it here.  if the lost+found
+				 * inode is in the root inode buffer, the
+				 * buffer will be marked dirty anyway since
+				 * the lost+found entry in the root inode is
+				 * also being deleted which makes the root
+				 * inode buffer automatically dirty.
+				 */
+				if (!use_rbuf)  {
+					dino = NULL;
+					if (dirty && !no_modify)
+						libxfs_writebuf(bp, 0);
+					else
+						libxfs_putbuf(bp);
+				}
+
+				if (inode_isadir(irec, ino_offset))
+					clear_inode_isadir(irec, ino_offset);
+
+				set_inode_free(irec, ino_offset);
+			}
+
+			do_warn("        - deleting existing \"%s\" entry\n",
+				ORPHANAGE);
+
+			/*
+			 * note -- exactly the same deletion code as in
+			 * process_shortform_dir()
+			 */
+			tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry);
+			INT_MOD(root_dino->di_core.di_size, ARCH_CONVERT, -(tmp_elen));
+
+			tmp_sfe = (xfs_dir_sf_entry_t *)
+				((__psint_t) sf_entry + tmp_elen);
+			tmp_len = max_size - ((__psint_t) tmp_sfe
+					- (__psint_t) sf);
+
+			memmove(sf_entry, tmp_sfe, tmp_len);
+
+			INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+
+			bzero((void *) ((__psint_t) sf_entry + tmp_len),
+				tmp_elen);
+
+			/*
+			 * set the tmp value to the current
+			 * pointer so we'll process the entry
+			 * we just moved up
+			 */
+			tmp_sfe = sf_entry;
+
+			/*
+			 * WARNING:  drop the index i by one
+			 * so it matches the decremented count for
+			 * accurate comparisons in the loop test.
+			 * mark root inode as dirty to make deletion
+			 * permanent.
+			 */
+			i--;
+
+			*ino_dirty = 1;
+			res++;
+
+		}
+		next_sfe = (tmp_sfe == NULL)
+			? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry +
+				XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry))
+			: tmp_sfe;
+	}
+
+	return(res);
+}
+
+/* ARGSUSED */
+int
+lf2_block_delete_orphanage(xfs_mount_t		*mp,
+			xfs_ino_t		ino,
+			xfs_dir2_data_t		*data,
+			int			*dirty,
+			xfs_buf_t		*rootino_bp,
+			int			*rbuf_dirty)
+{
+	xfs_dinode_t		*dino;
+	xfs_buf_t		*bp;
+	ino_tree_node_t		*irec;
+	xfs_ino_t		lino;
+	xfs_agino_t		agino;
+	xfs_agnumber_t		agno;
+	xfs_agino_t		root_agino;
+	xfs_agnumber_t		root_agno;
+	int			ino_offset;
+	int			ino_dirty;
+	int			use_rbuf;
+	int			len;
+	char			fname[MAXNAMELEN + 1];
+	int			res;
+	char			*ptr;
+	char			*endptr;
+	xfs_dir2_block_tail_t	*btp;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+
+	ptr = (char *)data->u;
+	if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)data);
+		endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	} else
+		endptr = (char *)data + mp->m_dirblksize;
+	*dirty = 0;
+	use_rbuf = 0;
+	res = 0;
+	root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino);
+	root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino);
+
+	while (ptr < endptr) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr ||
+				INT_GET(dup->length, ARCH_CONVERT) == 0 ||
+				(INT_GET(dup->length, ARCH_CONVERT) &
+						(XFS_DIR2_DATA_ALIGN - 1)))
+				break;
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+			continue;
+		}
+		dep = (xfs_dir2_data_entry_t *)ptr;
+		lino = INT_GET(dep->inumber, ARCH_CONVERT);
+		bcopy(dep->name, fname, dep->namelen);
+		fname[dep->namelen] = '\0';
+
+		if (fname[0] != '/' && !strcmp(fname, ORPHANAGE))  {
+			agino = XFS_INO_TO_AGINO(mp, lino);
+			agno = XFS_INO_TO_AGNO(mp, lino);
+
+			old_orphanage_ino = lino;
+
+			irec = find_inode_rec(agno, agino);
+
+			/*
+			 * if the orphange inode is in the tree,
+			 * get it, clear it, and mark it free.
+			 * the inodes in the orphanage will get
+			 * reattached to the new orphanage.
+			 */
+			if (irec != NULL)  {
+				ino_offset = agino - irec->ino_startnum;
+
+				/*
+				 * check if we have to use the root inode
+				 * buffer or read one in ourselves.  Note
+				 * that the root inode is always the first
+				 * inode of the chunk that it's in so there
+				 * are two possible cases where lost+found
+				 * might be in the same buffer as the root
+				 * inode.  One case is a large block
+				 * filesystem where the two inodes are
+				 * in different inode chunks but wind
+				 * up in the same block (multiple chunks
+				 * per block) and the second case (one or
+				 * more blocks per chunk) is where the two
+				 * inodes are in the same chunk. Note that
+				 * inodes are allocated on disk in units
+				 * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock).
+				 */
+				if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino)
+						== XFS_INO_TO_FSB(mp, lino) ||
+				    (agno == root_agno &&
+				     agino < root_agino + XFS_INODES_PER_CHUNK)) {
+					use_rbuf = 1;
+					bp = rootino_bp;
+					dino = XFS_MAKE_IPTR(mp, bp, agino -
+						XFS_INO_TO_AGINO(mp,
+							mp->m_sb.sb_rootino));
+				} else  {
+					len = (int)XFS_FSB_TO_BB(mp,
+						MAX(1, XFS_INODES_PER_CHUNK/
+							inodes_per_block));
+					bp = libxfs_readbuf(mp->m_dev,
+						XFS_AGB_TO_DADDR(mp, agno,
+							XFS_AGINO_TO_AGBNO(mp,
+								irec->ino_startnum)),
+						len, 0);
+					if (!bp)
+						do_error("couldn't read %s inode %llu\n",
+							ORPHANAGE, lino);
+
+					/*
+					 * get the agbno containing the first
+					 * inode in the chunk.  In multi-block
+					 * chunks, this gets us the offset
+					 * relative to the beginning of a
+					 * properly aligned buffer.  In
+					 * multi-chunk blocks, this gets us
+					 * the correct block number.  Then
+					 * turn the block number back into
+					 * an agino and calculate the offset
+					 * from there to feed to make the iptr.
+					 * the last term in effect rounds down
+					 * to the first agino in the buffer.
+					 */
+					dino = XFS_MAKE_IPTR(mp, bp,
+						agino - XFS_OFFBNO_TO_AGINO(mp,
+							XFS_AGINO_TO_AGBNO(mp,
+							irec->ino_startnum),
+							0));
+				}
+
+				do_warn("        - clearing existing \"%s\" inode\n",
+					ORPHANAGE);
+
+				ino_dirty = clear_dinode(mp, dino, lino);
+
+				if (!use_rbuf) {
+					ASSERT(ino_dirty == 0 ||
+						ino_dirty && !no_modify);
+
+					if (ino_dirty && !no_modify)
+						libxfs_writebuf(bp, 0);
+					else
+						libxfs_putbuf(bp);
+				} else {
+					if (ino_dirty)
+						*rbuf_dirty = 1;
+				}
+				
+				if (inode_isadir(irec, ino_offset))
+					clear_inode_isadir(irec, ino_offset);
+
+				set_inode_free(irec, ino_offset);
+
+			}
+
+			/*
+			 * regardless of whether the inode num is good or
+			 * bad, mark the entry to be junked so the
+			 * createname in phase 6 will succeed.
+			 */
+			dep->name[0] = '/';
+			*dirty = 1;
+			do_warn(
+			"        - marking entry \"%s\" to be deleted\n",
+						fname);
+			res++;
+		}
+		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+	}
+
+	return(res);
+}
+
+int
+longform2_delete_orphanage(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_dinode_t	*dino,
+			xfs_buf_t	*rootino_bp,
+			int		*rbuf_dirty)
+{
+	xfs_dir2_data_t		*data;
+	xfs_dabuf_t		*bp;
+	xfs_dfsbno_t		fsbno;
+	xfs_dablk_t		da_bno;
+	int			dirty;
+	int			res;
+	bmap_ext_t		*bmp;
+	int			i;
+
+	da_bno = 0;
+	*rbuf_dirty = 0;
+	fsbno = NULLDFSBNO;
+	bmp = malloc(mp->m_dirblkfsbs * sizeof(*bmp));
+	if (!bmp) {
+		do_error(
+	"malloc failed (%u bytes) in longform2_delete_orphanage, ino %llu\n",
+			mp->m_dirblkfsbs * sizeof(*bmp), ino);
+		exit(1);
+	}
+
+	/*
+	 * cycle through the entire directory looking to delete
+	 * every "lost+found" entry.  make sure to catch duplicate
+	 * entries.
+	 *
+	 * We could probably speed this up by doing a smarter lookup
+	 * to get us to the first block that contains the hashvalue
+	 * of "lost+found" but what the heck.  that would require a
+	 * double lookup for each level.  and how big can '/' get???
+	 * It's probably not worth it.
+	 */
+	res = 0;
+
+	for (da_bno = 0;
+	     da_bno < XFS_B_TO_FSB(mp, INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+	     da_bno += mp->m_dirblkfsbs) {
+		for (i = 0; i < mp->m_dirblkfsbs; i++) {
+			fsbno = get_bmapi(mp, dino, ino, da_bno + i,
+					  XFS_DATA_FORK);
+			if (fsbno == NULLDFSBNO)
+				break;
+			bmp[i].startoff = da_bno + i;
+			bmp[i].startblock = fsbno;
+			bmp[i].blockcount = 1;
+			bmp[i].flag = 0;
+		}
+		if (fsbno == NULLDFSBNO)
+			continue;
+		bp = da_read_buf(mp, mp->m_dirblkfsbs, bmp);
+		if (bp == NULL) {
+			do_error(
+		"can't read block %u (fsbno %llu) for directory inode %llu\n",
+					da_bno, bmp[0].startblock, ino);
+			exit(1);
+		}
+
+		data = (xfs_dir2_data_t *)bp->data;
+
+		if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC &&
+		    INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC)  {
+			do_error(
+	"bad magic # (0x%x) for directory data block (bno %u fsbno %llu)\n",
+				INT_GET(data->hdr.magic, ARCH_CONVERT), da_bno, bmp[0].startblock);
+			exit(1);
+		}
+
+		res += lf2_block_delete_orphanage(mp, ino, data, &dirty,
+					rootino_bp, rbuf_dirty);
+
+		ASSERT(dirty == 0 || dirty && !no_modify);
+
+		if (dirty && !no_modify)
+			da_bwrite(mp, bp);
+		else
+			da_brelse(bp);
+	}
+	free(bmp);
+
+	return(res);
+}
+
+/*
+ * returns 1 if a deletion happened, 0 otherwise.
+ */
+/* ARGSUSED */
+int
+shortform2_delete_orphanage(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_dinode_t	*root_dino,
+			xfs_buf_t	*rootino_bp,
+			int		*ino_dirty)
+{
+	xfs_dir2_sf_t		*sf;
+	xfs_dinode_t		*dino;
+	xfs_dir2_sf_entry_t	*sf_entry, *next_sfe, *tmp_sfe;
+	xfs_buf_t		*bp;
+	xfs_ino_t		lino;
+	xfs_agino_t		agino;
+	xfs_agino_t		root_agino;
+	int			max_size;
+	xfs_agnumber_t		agno;
+	xfs_agnumber_t		root_agno;
+	int			ino_dir_size;
+	ino_tree_node_t		*irec;
+	int			ino_offset;
+	int			i;
+	int			dirty;
+	int			tmp_len;
+	int			tmp_elen;
+	int			len;
+	int			use_rbuf;
+	char			fname[MAXNAMELEN + 1];
+	int			res;
+
+	sf = &root_dino->di_u.di_dir2sf;
+	*ino_dirty = 0;
+	irec = NULL;
+	ino_dir_size = INT_GET(root_dino->di_core.di_size, ARCH_CONVERT);
+	max_size = XFS_DFORK_DSIZE_ARCH(root_dino, mp, ARCH_CONVERT);
+	use_rbuf = 0;
+	res = 0;
+	root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino);
+	root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino);
+
+	/*
+	 * run through entries looking for "lost+found".
+	 */
+	sf_entry = next_sfe = XFS_DIR2_SF_FIRSTENTRY(sf);
+	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && ino_dir_size >
+			(__psint_t)next_sfe - (__psint_t)sf; i++)  {
+		tmp_sfe = NULL;
+		sf_entry = next_sfe;
+		lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf,
+			XFS_DIR2_SF_INUMBERP(sf_entry), ARCH_CONVERT);
+		bcopy(sf_entry->name, fname, sf_entry->namelen);
+		fname[sf_entry->namelen] = '\0';
+
+		if (!strcmp(ORPHANAGE, fname))  {
+			agno = XFS_INO_TO_AGNO(mp, lino);
+			agino = XFS_INO_TO_AGINO(mp, lino);
+
+			irec = find_inode_rec(agno, agino);
+
+			/*
+			 * if the orphange inode is in the tree,
+			 * get it, clear it, and mark it free.
+			 * the inodes in the orphanage will get
+			 * reattached to the new orphanage.
+			 */
+			if (irec != NULL)  {
+				do_warn("        - clearing existing \"%s\" inode\n",
+					ORPHANAGE);
+
+				ino_offset = agino - irec->ino_startnum;
+
+				/*
+				 * check if we have to use the root inode
+				 * buffer or read one in ourselves.  Note
+				 * that the root inode is always the first
+				 * inode of the chunk that it's in so there
+				 * are two possible cases where lost+found
+				 * might be in the same buffer as the root
+				 * inode.  One case is a large block
+				 * filesystem where the two inodes are
+				 * in different inode chunks but wind
+				 * up in the same block (multiple chunks
+				 * per block) and the second case (one or
+				 * more blocks per chunk) is where the two
+				 * inodes are in the same chunk. Note that
+				 * inodes are allocated on disk in units
+				 * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock).
+				 */
+				if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino)
+						== XFS_INO_TO_FSB(mp, lino) ||
+				    (agno == root_agno &&
+				     agino < root_agino + XFS_INODES_PER_CHUNK)) {
+					use_rbuf = 1;
+					bp = rootino_bp;
+
+					dino = XFS_MAKE_IPTR(mp, bp, agino -
+						XFS_INO_TO_AGINO(mp,
+							mp->m_sb.sb_rootino));
+				} else  {
+					len = (int)XFS_FSB_TO_BB(mp,
+						MAX(1, XFS_INODES_PER_CHUNK/
+							inodes_per_block));
+					bp = libxfs_readbuf(mp->m_dev,
+						XFS_AGB_TO_DADDR(mp, agno,
+							XFS_AGINO_TO_AGBNO(mp,
+								irec->ino_startnum)),
+						len, 0);
+					if (!bp)
+						do_error("could not read %s inode "
+							"%llu\n", ORPHANAGE, lino);
+					/*
+					 * get the agbno containing the first
+					 * inode in the chunk.  In multi-block
+					 * chunks, this gets us the offset
+					 * relative to the beginning of a
+					 * properly aligned buffer.  In
+					 * multi-chunk blocks, this gets us
+					 * the correct block number.  Then
+					 * turn the block number back into
+					 * an agino and calculate the offset
+					 * from there to feed to make the iptr.
+					 * the last term in effect rounds down
+					 * to the first agino in the buffer.
+					 */
+					dino = XFS_MAKE_IPTR(mp, bp,
+						agino - XFS_OFFBNO_TO_AGINO(mp,
+							XFS_AGINO_TO_AGBNO(mp,
+							irec->ino_startnum),
+							0));
+				}
+
+				dirty = clear_dinode(mp, dino, lino);
+
+				ASSERT(dirty == 0 || dirty && !no_modify);
+
+				/*
+				 * if we read the lost+found inode in to
+				 * it, get rid of it here.  if the lost+found
+				 * inode is in the root inode buffer, the
+				 * buffer will be marked dirty anyway since
+				 * the lost+found entry in the root inode is
+				 * also being deleted which makes the root
+				 * inode buffer automatically dirty.
+				 */
+				if (!use_rbuf)  {
+					dino = NULL;
+					if (dirty && !no_modify)
+						libxfs_writebuf(bp, 0);
+					else
+						libxfs_putbuf(bp);
+				}
+				
+
+				if (inode_isadir(irec, ino_offset))
+					clear_inode_isadir(irec, ino_offset);
+
+				set_inode_free(irec, ino_offset);
+			}
+
+			do_warn("        - deleting existing \"%s\" entry\n",
+				ORPHANAGE);
+
+			/*
+			 * note -- exactly the same deletion code as in
+			 * process_shortform_dir()
+			 */
+			tmp_elen = XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sf_entry);
+			INT_MOD(root_dino->di_core.di_size, ARCH_CONVERT, -(tmp_elen));
+
+			tmp_sfe = (xfs_dir2_sf_entry_t *)
+				((__psint_t) sf_entry + tmp_elen);
+			tmp_len = max_size - ((__psint_t) tmp_sfe
+					- (__psint_t) sf);
+
+			memmove(sf_entry, tmp_sfe, tmp_len);
+
+			INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+			if (lino > XFS_DIR2_MAX_SHORT_INUM)
+				sf->hdr.i8count--;
+
+			bzero((void *) ((__psint_t) sf_entry + tmp_len),
+				tmp_elen);
+
+			/*
+			 * set the tmp value to the current
+			 * pointer so we'll process the entry
+			 * we just moved up
+			 */
+			tmp_sfe = sf_entry;
+
+			/*
+			 * WARNING:  drop the index i by one
+			 * so it matches the decremented count for
+			 * accurate comparisons in the loop test.
+			 * mark root inode as dirty to make deletion
+			 * permanent.
+			 */
+			i--;
+
+			*ino_dirty = 1;
+
+			res++;
+		}
+		next_sfe = (tmp_sfe == NULL)
+			? (xfs_dir2_sf_entry_t *) ((__psint_t) sf_entry +
+				XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sf_entry))
+			: tmp_sfe;
+	}
+
+	return(res);
+}
+
+void
+delete_orphanage(xfs_mount_t *mp)
+{
+	xfs_ino_t ino;
+	xfs_dinode_t *dino;
+	xfs_buf_t *dbp;
+	int dirty, res, len;
+
+	ASSERT(!no_modify);
+
+	dbp = NULL;
+	dirty = res = 0;
+	ino = mp->m_sb.sb_rootino;
+
+	/*
+	 * we know the root is in use or we wouldn't be here
+	 */
+	len = (int)XFS_FSB_TO_BB(mp,
+			MAX(1, XFS_INODES_PER_CHUNK/inodes_per_block));
+	dbp = libxfs_readbuf(mp->m_dev,
+			XFS_FSB_TO_DADDR(mp, XFS_INO_TO_FSB(mp, ino)), len, 0);
+	if (!dbp) {
+		do_error("could not read buffer for root inode %llu "
+			"(daddr %lld, size %d)\n", ino,
+			XFS_FSB_TO_DADDR(mp, XFS_INO_TO_FSB(mp, ino)),
+			XFS_FSB_TO_BB(mp, 1));
+	}
+
+	/*
+	 * we also know that the root inode is always the first inode
+	 * allocated in the system, therefore it'll be at the beginning
+	 * of the root inode chunk
+	 */
+	dino = XFS_MAKE_IPTR(mp, dbp, 0);
+
+	switch (dino->di_core.di_format)  {
+	case XFS_DINODE_FMT_EXTENTS:
+	case XFS_DINODE_FMT_BTREE:
+		if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+			res = longform2_delete_orphanage(mp, ino, dino, dbp,
+				&dirty);
+		else
+			res = longform_delete_orphanage(mp, ino, dino, dbp,
+				&dirty);
+		break;
+	case XFS_DINODE_FMT_LOCAL:
+		if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+			res = shortform2_delete_orphanage(mp, ino, dino, dbp,
+				&dirty);
+		else
+			res = shortform_delete_orphanage(mp, ino, dino, dbp,
+				&dirty);
+		ASSERT(res == 0 && dirty == 0 || res == 1 && dirty == 1);
+		break;
+	default:
+		break;
+	}
+
+	if (res)  {
+		switch (dino->di_core.di_version)  {
+		case XFS_DINODE_VERSION_1:
+			INT_MOD(dino->di_core.di_onlink, ARCH_CONVERT, -1);
+			INT_SET(dino->di_core.di_nlink, ARCH_CONVERT,
+				INT_GET(dino->di_core.di_onlink, ARCH_CONVERT));
+			break;
+		case XFS_DINODE_VERSION_2:
+			INT_MOD(dino->di_core.di_nlink, ARCH_CONVERT, -1);
+			break;
+		default:
+			do_error("unknown version #%d in root inode\n",
+					dino->di_core.di_version);
+		}
+
+		dirty = 1;
+	}
+
+	if (dirty)
+		libxfs_writebuf(dbp, 0);
+	else
+		libxfs_putbuf(dbp);
+}
+
+/*
+ * null out quota inode fields in sb if they point to non-existent inodes.
+ * this isn't as redundant as it looks since it's possible that the sb field
+ * might be set but the imap and inode(s) agree that the inode is
+ * free in which case they'd never be cleared so the fields wouldn't
+ * be cleared by process_dinode().
+ */
+void
+quotino_check(xfs_mount_t *mp)
+{
+	ino_tree_node_t *irec;
+
+	if (mp->m_sb.sb_uquotino != NULLFSINO && mp->m_sb.sb_uquotino != 0)  {
+		irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_uquotino),
+			XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
+
+		if (irec == NULL || is_inode_free(irec,
+				mp->m_sb.sb_uquotino - irec->ino_startnum))  {
+			mp->m_sb.sb_uquotino = NULLFSINO;
+			lost_uquotino = 1;
+		} else
+			lost_uquotino = 0;
+	}
+
+	if (mp->m_sb.sb_pquotino != NULLFSINO && mp->m_sb.sb_pquotino != 0)  {
+		irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_pquotino),
+			XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
+
+		if (irec == NULL || is_inode_free(irec,
+				mp->m_sb.sb_pquotino - irec->ino_startnum))  {
+			mp->m_sb.sb_pquotino = NULLFSINO;
+			lost_pquotino = 1;
+		} else
+			lost_pquotino = 0;
+	}
+}
+
+void
+quota_sb_check(xfs_mount_t *mp)
+{
+	/*
+	 * if the sb says we have quotas and we lost both,
+	 * signal a superblock downgrade.  that will cause
+	 * the quota flags to get zeroed.  (if we only lost
+	 * one quota inode, do nothing and complain later.)
+	 *
+	 * if the sb says we have quotas but we didn't start out
+	 * with any quota inodes, signal a superblock downgrade.
+	 *
+	 * The sb downgrades are so that older systems can mount
+	 * the filesystem.
+	 *
+	 * if the sb says we don't have quotas but it looks like
+	 * we do have quota inodes, then signal a superblock upgrade.
+	 *
+	 * if the sb says we don't have quotas and we have no
+	 * quota inodes, then leave will enough alone.
+	 */
+
+	if (fs_quotas &&
+	    (mp->m_sb.sb_uquotino == NULLFSINO || mp->m_sb.sb_uquotino == 0) &&
+	    (mp->m_sb.sb_pquotino == NULLFSINO || mp->m_sb.sb_pquotino == 0))  {
+		lost_quotas = 1;
+		fs_quotas = 0;
+	} else if (!verify_inum(mp, mp->m_sb.sb_uquotino) &&
+			!verify_inum(mp, mp->m_sb.sb_uquotino))  {
+		fs_quotas = 1;
+	}
+}
+
+
+void
+phase4(xfs_mount_t *mp)
+{
+	ino_tree_node_t		*irec;
+	xfs_drtbno_t		bno;
+	xfs_drtbno_t		rt_start;
+	xfs_extlen_t		rt_len;
+	xfs_agnumber_t		i;
+	xfs_agblock_t		j;
+	xfs_agblock_t		ag_end;
+	xfs_agblock_t		extent_start;
+	xfs_extlen_t		extent_len;
+	int			ag_hdr_len = 4 * mp->m_sb.sb_sectsize;
+	int			ag_hdr_block;
+	int			bstate;
+	int			count_bcnt_extents(xfs_agnumber_t agno);
+	int			count_bno_extents(xfs_agnumber_t agno);
+	
+	ag_hdr_block = howmany(ag_hdr_len, mp->m_sb.sb_blocksize);
+
+	printf("Phase 4 - check for duplicate blocks...\n");
+	printf("        - setting up duplicate extent list...\n");
+
+	irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
+				XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
+
+	/*
+	 * we always have a root inode, even if it's free...
+	 * if the root is free, forget it, lost+found is already gone
+	 */
+	if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
+		need_root_inode = 1;
+		if (no_modify)
+			do_warn("root inode would be lost\n");
+		else
+			do_warn("root inode lost\n");
+	}
+
+	/*
+	 * have to delete lost+found first so that blocks used
+	 * by lost+found don't show up as used
+	 */
+	if (!no_modify)  {
+		printf("        - clear lost+found (if it exists) ...\n");
+		if (!need_root_inode)
+			delete_orphanage(mp);
+	}
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks :
+			mp->m_sb.sb_dblocks -
+				(xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
+		extent_start = extent_len = 0;
+		/*
+		 * set up duplicate extent list for this ag
+		 */
+		for (j = ag_hdr_block; j < ag_end; j++)  {
+
+			bstate = get_agbno_state(mp, i, j);
+
+			switch (bstate)  {
+			case XR_E_BAD_STATE:
+			default:
+				do_warn("unknown block state, ag %d, \
+block %d\n",
+					i, j);
+				/* fall through .. */
+			case XR_E_UNKNOWN:
+			case XR_E_FREE1:
+			case XR_E_FREE:
+			case XR_E_INUSE:
+			case XR_E_INUSE_FS:
+			case XR_E_INO:
+			case XR_E_FS_MAP:
+				if (extent_start == 0)
+					continue;
+				else  {
+					/*
+					 * add extent and reset extent state
+					 */
+					add_dup_extent(i, extent_start,
+							extent_len);
+					extent_start = 0;
+					extent_len = 0;
+				}
+				break;
+			case XR_E_MULT:
+				if (extent_start == 0)  {
+					extent_start = j;
+					extent_len = 1;
+				} else if (extent_len == MAXEXTLEN)  {
+					add_dup_extent(i, extent_start,
+							extent_len);
+					extent_start = j;
+					extent_len = 1;
+				} else
+					extent_len++;
+				break;
+			}
+		}
+		/*
+		 * catch tail-case, extent hitting the end of the ag
+		 */
+		if (extent_start != 0)
+			add_dup_extent(i, extent_start, extent_len);
+	}
+
+	/*
+	 * initialize realtime bitmap
+	 */
+	rt_start = 0;
+	rt_len = 0;
+
+	for (bno = 0; bno < mp->m_sb.sb_rextents; bno++)  {
+
+		bstate = get_rtbno_state(mp, bno);
+
+		switch (bstate)  {
+		case XR_E_BAD_STATE:
+		default:
+			do_warn("unknown rt extent state, extent %llu\n", bno);
+			/* fall through .. */
+		case XR_E_UNKNOWN:
+		case XR_E_FREE1:
+		case XR_E_FREE:
+		case XR_E_INUSE:
+		case XR_E_INUSE_FS:
+		case XR_E_INO:
+		case XR_E_FS_MAP:
+			if (rt_start == 0)
+				continue;
+			else  {
+				/*
+				 * add extent and reset extent state
+				 */
+				add_rt_dup_extent(rt_start, rt_len);
+				rt_start = 0;
+				rt_len = 0;
+			}
+			break;
+		case XR_E_MULT:
+			if (rt_start == 0)  {
+				rt_start = bno;
+				rt_len = 1;
+			} else if (rt_len == MAXEXTLEN)  {
+				/*
+				 * large extent case
+				 */
+				add_rt_dup_extent(rt_start, rt_len);
+				rt_start = bno;
+				rt_len = 1;
+			} else
+				rt_len++;
+			break;
+		}
+	}
+
+	/*
+	 * catch tail-case, extent hitting the end of the ag
+	 */
+	if (rt_start != 0)
+		add_rt_dup_extent(rt_start, rt_len);
+
+	/*
+	 * initialize bitmaps for all AGs
+	 */
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks :
+			mp->m_sb.sb_dblocks -
+				(xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
+		/*
+		 * now reset the bitmap for all ags
+		 */
+		bzero(ba_bmap[i], roundup(mp->m_sb.sb_agblocks*(NBBY/XR_BB),
+						sizeof(__uint64_t)));
+		for (j = 0; j < ag_hdr_block; j++)
+			set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+	}
+	set_bmap_rt(mp->m_sb.sb_rextents);
+	set_bmap_log(mp);
+	set_bmap_fs(mp);
+
+	printf("        - check for inodes claiming duplicate blocks...\n");
+	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+		/*
+		 * ok, now process the inodes -- signal 2-pass check per inode.
+		 * first pass checks if the inode conflicts with a known
+		 * duplicate extent.  if so, the inode is cleared and second
+		 * pass is skipped.  second pass sets the block bitmap
+		 * for all blocks claimed by the inode.  directory
+		 * and attribute processing is turned OFF since we did that 
+		 * already in phase 3.
+		 */
+		do_log("        - agno = %d\n", i);
+		process_aginodes(mp, i, 0, 1, 0);
+
+		/*
+		 * now recycle the per-AG duplicate extent records
+		 */
+		release_dup_extent_tree(i);
+	}
+
+	/*
+	 * free up memory used to track trealtime duplicate extents
+	 */
+	if (rt_start != 0)
+		free_rt_dup_extent_tree(mp);
+
+	/*
+	 * ensure consistency of quota inode pointers in superblock,
+	 * make sure they point to real inodes
+	 */
+	quotino_check(mp);
+	quota_sb_check(mp);
+}
diff --git a/repair/phase5.c b/repair/phase5.c
new file mode 100644
index 000000000..2e306bd25
--- /dev/null
+++ b/repair/phase5.c
@@ -0,0 +1,1633 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "rt.h"
+#include "versions.h"
+
+/*
+ * we maintain the current slice (path from root to leaf)
+ * of the btree incore.  when we need a new block, we ask
+ * the block allocator for the address of a block on that
+ * level, map the block in, and set up the appropriate
+ * pointers (child, silbing, etc.) and keys that should
+ * point to the new block.
+ */
+typedef struct bt_stat_level  {
+	/*
+	 * set in setup_cursor routine and maintained in the tree-building
+	 * routines
+	 */
+	xfs_buf_t		*buf_p;		/* 2 buffer pointers to ... */
+	xfs_buf_t		*prev_buf_p;
+	xfs_agblock_t		agbno;		/* current block being filled */
+	xfs_agblock_t		prev_agbno;	/* previous block */
+	/*
+	 * set in calculate/init cursor routines for each btree level
+	 */
+	int			num_recs_tot;	/* # tree recs in level */
+	int			num_blocks;	/* # tree blocks in level */
+	int			num_recs_pb;	/* num_recs_tot / num_blocks */
+	int			modulo;		/* num_recs_tot % num_blocks */
+} bt_stat_level_t;
+
+typedef struct bt_status  {
+	int			init;		/* cursor set up once? */
+	int			num_levels;	/* # of levels in btree */
+	xfs_extlen_t		num_tot_blocks;	/* # blocks alloc'ed for tree */
+	xfs_extlen_t		num_free_blocks;/* # blocks currently unused */
+
+	xfs_agblock_t		root;		/* root block */
+	/*
+	 * list of blocks to be used to set up this tree
+	 * and pointer to the first unused block on the list
+	 */
+	xfs_agblock_t		*btree_blocks;		/* block list */
+	xfs_agblock_t		*free_btree_blocks;	/* first unused block */
+	/*
+	 * per-level status info
+	 */
+	bt_stat_level_t		level[XFS_BTREE_MAXLEVELS];
+} bt_status_t;
+
+
+int
+mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+	int			in_extent;
+	int			num_extents;
+	xfs_agblock_t		extent_start;
+	xfs_extlen_t		extent_len;
+	xfs_agblock_t		agbno;
+	xfs_agblock_t		ag_end;
+	uint			free_blocks;
+#ifdef XR_BLD_FREE_TRACE
+	int			old_state;
+	int			state = XR_E_BAD_STATE;
+#endif
+
+	/*
+	 * scan the bitmap for the ag looking for continuous
+	 * extents of free blocks.  At this point, we know
+	 * that blocks in the bitmap are either set to an
+	 * "in use" state or set to unknown (0) since the
+	 * bmaps were bzero'ed in phase 4 and only blocks
+	 * being used by inodes, inode bmaps, ag headers,
+	 * and the files themselves were put into the bitmap.
+	 *
+	 */
+	ASSERT(agno < mp->m_sb.sb_agcount);
+
+	extent_start = extent_len = 0;
+	in_extent = 0;
+	num_extents = free_blocks = 0;
+
+	if (agno < mp->m_sb.sb_agcount - 1)
+		ag_end = mp->m_sb.sb_agblocks;
+	else
+		ag_end = mp->m_sb.sb_dblocks -
+			mp->m_sb.sb_agblocks * (mp->m_sb.sb_agcount - 1);
+
+	/*
+	 * ok, now find the number of extents, keep track of the
+	 * largest extent.
+	 */
+	for (agbno = 0; agbno < ag_end; agbno++)  {
+#if 0
+		old_state = state;
+		state = get_agbno_state(mp, agno, agbno);
+		if (state != old_state)  {
+			fprintf(stderr, "agbno %u - new state is %d\n",
+					agbno, state);
+		}
+#endif
+		if (get_agbno_state(mp, agno, agbno) < XR_E_INUSE)  {
+			free_blocks++;
+			if (in_extent == 0)  {
+				/*
+				 * found the start of a free extent
+				 */
+				in_extent = 1;
+				num_extents++;
+				extent_start = agbno;
+				extent_len = 1;
+			} else  {
+				extent_len++;
+			}
+		} else   {
+			if (in_extent)  {
+				/*
+				 * free extent ends here, add extent to the
+				 * 2 incore extent (avl-to-be-B+) trees
+				 */
+				in_extent = 0;
+#if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT)
+				fprintf(stderr, "adding extent %u [%u %u]\n",
+					agno, extent_start, extent_len);
+#endif
+				add_bno_extent(agno, extent_start, extent_len);
+				add_bcnt_extent(agno, extent_start, extent_len);
+			}
+		}
+	}
+	if (in_extent)  {
+		/*
+		 * free extent ends here
+		 */
+		in_extent = 0;
+#if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT)
+		fprintf(stderr, "adding extent %u [%u %u]\n",
+			agno, extent_start, extent_len);
+#endif
+		add_bno_extent(agno, extent_start, extent_len);
+		add_bcnt_extent(agno, extent_start, extent_len);
+	}
+
+	return(num_extents);
+}
+
+/* ARGSUSED */
+xfs_agblock_t
+get_next_blockaddr(xfs_agnumber_t agno, int level, bt_status_t *curs)
+{
+	ASSERT(curs->free_btree_blocks < curs->btree_blocks +
+						curs->num_tot_blocks);
+	ASSERT(curs->num_free_blocks > 0);
+
+	curs->num_free_blocks--;
+	return(*curs->free_btree_blocks++);
+}
+
+/*
+ * set up the dynamically allocated block allocation data in the btree
+ * cursor that depends on the info in the static portion of the cursor.
+ * allocates space from the incore bno/bcnt extent trees and sets up
+ * the first path up the left side of the tree.  Also sets up the
+ * cursor pointer to the btree root.   called by init_freespace_cursor()
+ * and init_ino_cursor()
+ */
+/* ARGSUSED */
+void
+setup_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *curs)
+{
+	int			j;
+	unsigned int		u;
+	xfs_extlen_t		big_extent_len;
+	xfs_agblock_t		big_extent_start;
+	extent_tree_node_t	*ext_ptr;
+	extent_tree_node_t	*bno_ext_ptr;
+	xfs_extlen_t		blocks_allocated;
+	xfs_agblock_t		*agb_ptr;
+
+	/*
+	 * get the number of blocks we need to allocate, then
+	 * set up block number array, set the free block pointer
+	 * to the first block in the array, and null the array
+	 */
+	big_extent_len = curs->num_tot_blocks;
+	blocks_allocated = 0;
+
+	ASSERT(big_extent_len > 0);
+
+	if ((curs->btree_blocks = malloc(sizeof(xfs_agblock_t *)
+					* big_extent_len)) == NULL)  {
+		do_error("could not set up btree block array\n");
+		exit(1);
+	}
+
+	agb_ptr = curs->free_btree_blocks = curs->btree_blocks;
+
+	for (j = 0; j < curs->num_free_blocks; j++, agb_ptr++)
+		*agb_ptr = NULLAGBLOCK;
+
+	/*
+	 * grab the smallest extent and use it up, then get the
+	 * next smallest.  This mimics the init_*_cursor code.
+	 */
+	if ((ext_ptr =  findfirst_bcnt_extent(agno)) == NULL)  {
+		do_error("error - not enough free space in filesystem\n");
+		exit(1);
+	}
+
+	agb_ptr = curs->btree_blocks;
+	j = curs->level[0].num_blocks;
+
+	/*
+	 * set up the free block array
+	 */
+	while (blocks_allocated < big_extent_len)  {
+		/*
+		 * use up the extent we've got
+		 */
+		for (u = 0; u < ext_ptr->ex_blockcount &&
+				blocks_allocated < big_extent_len; u++)  {
+			ASSERT(agb_ptr < curs->btree_blocks
+					+ curs->num_tot_blocks);
+			*agb_ptr++ = ext_ptr->ex_startblock + u;
+			blocks_allocated++;
+		}
+
+		/*
+		 * if we only used part of this last extent, then we
+		 * need only to reset the extent in the extent
+		 * trees and we're done
+		 */
+		if (u < ext_ptr->ex_blockcount)  {
+			big_extent_start = ext_ptr->ex_startblock + u;
+			big_extent_len = ext_ptr->ex_blockcount - u;
+
+			ASSERT(big_extent_len > 0);
+
+			bno_ext_ptr = find_bno_extent(agno,
+						ext_ptr->ex_startblock);
+			ASSERT(bno_ext_ptr != NULL);
+			get_bno_extent(agno, bno_ext_ptr);
+			release_extent_tree_node(bno_ext_ptr);
+
+			ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+					ext_ptr->ex_blockcount);
+			release_extent_tree_node(ext_ptr);
+#ifdef XR_BLD_FREE_TRACE
+			fprintf(stderr, "releasing extent: %u [%u %u]\n",
+				agno, ext_ptr->ex_startblock,
+				ext_ptr->ex_blockcount);
+			fprintf(stderr, "blocks_allocated = %d\n",
+				blocks_allocated);
+#endif
+
+			add_bno_extent(agno, big_extent_start, big_extent_len);
+			add_bcnt_extent(agno, big_extent_start, big_extent_len);
+
+			return;
+		}
+		/*
+		 * delete the used-up extent from both extent trees and
+		 * find next biggest extent
+		 */
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "releasing extent: %u [%u %u]\n",
+			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
+		ASSERT(bno_ext_ptr != NULL);
+		get_bno_extent(agno, bno_ext_ptr);
+		release_extent_tree_node(bno_ext_ptr);
+
+		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+				ext_ptr->ex_blockcount);
+		ASSERT(ext_ptr != NULL);
+		release_extent_tree_node(ext_ptr);
+
+		ext_ptr = findfirst_bcnt_extent(agno);
+	}
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "blocks_allocated = %d\n",
+		blocks_allocated);
+#endif
+}
+
+void
+write_cursor(bt_status_t *curs)
+{
+	int i;
+
+	for (i = 0; i < curs->num_levels; i++)  {
+#if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE)
+		fprintf(stderr, "writing bt block %u\n", curs->level[i].agbno);
+#endif
+		if (curs->level[i].prev_buf_p != NULL)  {
+			ASSERT(curs->level[i].prev_agbno != NULLAGBLOCK);
+			libxfs_writebuf(curs->level[i].prev_buf_p, 0);
+		}
+		libxfs_writebuf(curs->level[i].buf_p, 0);
+	}
+}
+
+void
+finish_cursor(bt_status_t *curs)
+{
+	ASSERT(curs->num_free_blocks == 0);
+	free(curs->btree_blocks);
+}
+
+/*
+ * no-cursor versions of the XFS equivalents.  The address calculators
+ * should be used only for interior btree nodes.
+ * these are adapted from xfs_alloc_btree.h and xfs_tree.h
+ */
+#define XR_ALLOC_KEY_ADDR(mp, bp, i) \
+	(xfs_alloc_key_t *) ((char *) (bp) + sizeof(xfs_alloc_block_t) \
+				+ ((i)-1) * sizeof(xfs_alloc_key_t))
+
+#define XR_ALLOC_PTR_ADDR(mp, bp, i) \
+	(xfs_alloc_ptr_t *) ((char *) (bp) + sizeof(xfs_alloc_block_t) \
+			+ (mp)->m_alloc_mxr[1] * sizeof(xfs_alloc_key_t) \
+			+ ((i)-1) * sizeof(xfs_alloc_ptr_t))
+
+#define XR_ALLOC_BLOCK_MAXRECS(mp, level) \
+			XFS_BTREE_BLOCK_MAXRECS((mp)->m_sb.sb_blocksize, \
+						xfs_alloc, (level) == 0)
+
+/*
+ * this calculates a freespace cursor for an ag.
+ * btree_curs is an in/out.  returns the number of
+ * blocks that will show up in the AGFL.
+ */
+
+int
+calculate_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno,
+			xfs_agblock_t *extents, bt_status_t *btree_curs)
+{
+	xfs_extlen_t		blocks_needed;		/* a running count */
+	xfs_extlen_t		blocks_allocated_pt;	/* per tree */
+	xfs_extlen_t		blocks_allocated_total;	/* for both trees */
+	xfs_agblock_t		num_extents;
+	int			i;
+	int			extents_used;
+	int			extra_blocks;
+	bt_stat_level_t		*lptr;
+	bt_stat_level_t		*p_lptr;
+	extent_tree_node_t	*ext_ptr;
+	int			level;
+#ifdef XR_BLD_FREE_TRACE
+	int			old_state;
+	int			state = XR_E_BAD_STATE;
+#endif
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr,
+		"in init_freespace_cursor, agno = %d\n", agno);
+#endif
+
+	num_extents = *extents;
+	extents_used = 0;
+
+	ASSERT(num_extents != 0);
+
+	lptr = &btree_curs->level[0];
+	btree_curs->init = 1;
+
+	/*
+	 * figure out how much space we need for the leaf level
+	 * of the tree and set up the cursor for the leaf level
+	 * (note that the same code is duplicated further down)
+	 */
+	lptr->num_blocks = howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0));
+	lptr->num_recs_pb = num_extents / lptr->num_blocks;
+	lptr->modulo = num_extents % lptr->num_blocks;
+	lptr->num_recs_tot = num_extents;
+	level = 1;
+
+	/*
+	 * if we need more levels, set them up.  # of records
+	 * per level is the # of blocks in the level below it
+	 */
+	if (lptr->num_blocks > 1)  {
+		for (; btree_curs->level[level - 1].num_blocks > 1
+				&& level < XFS_BTREE_MAXLEVELS;
+				level++)  {
+			lptr = &btree_curs->level[level];
+			p_lptr = &btree_curs->level[level - 1];
+			lptr->num_blocks = howmany(p_lptr->num_blocks,
+					XR_ALLOC_BLOCK_MAXRECS(mp, level));
+			lptr->modulo = p_lptr->num_blocks
+					% lptr->num_blocks;
+			lptr->num_recs_pb = p_lptr->num_blocks
+					/ lptr->num_blocks;
+			lptr->num_recs_tot = p_lptr->num_blocks;
+		}
+	}
+
+	ASSERT(lptr->num_blocks == 1);
+	btree_curs->num_levels = level;
+
+	/*
+	 * ok, now we have a hypothetical cursor that
+	 * will work for both the bno and bcnt trees.
+	 * now figure out if using up blocks to set up the
+	 * trees will perturb the shape of the freespace tree.
+	 * if so, we've over-allocated.  the freespace trees
+	 * as they will be *after* accounting for the free space
+	 * we've used up will need fewer blocks to to represent
+	 * than we've allocated.  We can use the AGFL to hold
+	 * XFS_AGFL_SIZE (128) blocks but that's it.
+	 * Thus we limit things to XFS_AGFL_SIZE/2 for each of the 2 btrees.
+	 * if the number of extra blocks is more than that,
+	 * we'll have to be called again.
+	 */
+	for (blocks_needed = 0, i = 0; i < level; i++)  {
+		blocks_needed += btree_curs->level[i].num_blocks;
+	}
+
+	/*
+	 * record the # of blocks we've allocated
+	 */
+	blocks_allocated_pt = blocks_needed;
+	blocks_needed *= 2;
+	blocks_allocated_total = blocks_needed;
+
+	/*
+	 * figure out how many free extents will be used up by
+	 * our space allocation
+	 */
+	if ((ext_ptr = findfirst_bcnt_extent(agno)) == NULL)  {
+		do_error("can't rebuild fs trees -- not enough free space "
+			"on ag %u\n", agno);
+		exit(1);
+	}
+
+	i = 0;
+	while (ext_ptr != NULL && blocks_needed > 0)  {
+		if (ext_ptr->ex_blockcount <= blocks_needed)  {
+			blocks_needed -= ext_ptr->ex_blockcount;
+			extents_used++;
+		} else  {
+			blocks_needed = 0;
+		}
+
+		ext_ptr = findnext_bcnt_extent(agno, ext_ptr);
+
+#ifdef XR_BLD_FREE_TRACE
+		if (ext_ptr != NULL)  {
+			fprintf(stderr, "got next extent [%u %u]\n",
+				ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+		} else  {
+			fprintf(stderr, "out of extents\n");
+		}
+#endif
+	}
+	if (blocks_needed > 0)  {
+		do_error("ag %u - not enough free space to build freespace "
+			"btrees\n", agno);
+		exit(1);
+	}
+
+	ASSERT(num_extents >= extents_used);
+
+	num_extents -= extents_used;
+
+	/*
+	 * see if the number of leaf blocks will change as a result
+	 * of the number of extents changing
+	 */
+	if (howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0))
+			!= btree_curs->level[0].num_blocks)  {
+		/*
+		 * yes -- recalculate the cursor.  If the number of
+		 * excess (overallocated) blocks is < XFS_AGFL_SIZE/2, we're ok.
+		 * we can put those into the AGFL.  we don't try
+		 * and get things to converge exactly (reach a
+		 * state with zero excess blocks) because there
+		 * exist pathological cases which will never
+		 * converge.  first, check for the zero-case.
+		 */
+		if (num_extents == 0)  {
+			/*
+			 * ok, we've used up all the free blocks
+			 * trying to lay out the leaf level. go
+			 * to a one block (empty) btree and put the
+			 * already allocated blocks into the AGFL
+			 */
+			if (btree_curs->level[0].num_blocks != 1)  {
+				/*
+				 * we really needed more blocks because
+				 * the old tree had more than one level.
+				 * this is bad.
+				 */
+				 do_warn("not enough free blocks left to "
+					"describe all free blocks in AG %u\n",
+					agno);
+			}
+#ifdef XR_BLD_FREE_TRACE
+			fprintf(stderr,
+				"ag %u -- no free extents, alloc'ed %d\n",
+				agno, blocks_allocated_pt);
+#endif
+			lptr->num_blocks = 1;
+			lptr->modulo = 0;
+			lptr->num_recs_pb = 0;
+			lptr->num_recs_tot = 0;
+
+			btree_curs->num_levels = 1;
+
+			/*
+			 * don't reset the allocation stats, assume
+			 * they're all extra blocks
+			 * don't forget to return the total block count
+			 * not the per-tree block count.  these are the
+			 * extras that will go into the AGFL.  subtract
+			 * two for the root blocks.
+			 */
+			btree_curs->num_tot_blocks = blocks_allocated_pt;
+			btree_curs->num_free_blocks = blocks_allocated_pt;
+
+			*extents = 0;
+
+			return(blocks_allocated_total - 2);
+		}
+
+		lptr = &btree_curs->level[0];
+		lptr->num_blocks = howmany(num_extents,
+					XR_ALLOC_BLOCK_MAXRECS(mp, 0));
+		lptr->num_recs_pb = num_extents / lptr->num_blocks;
+		lptr->modulo = num_extents % lptr->num_blocks;
+		lptr->num_recs_tot = num_extents;
+		level = 1;
+
+		/*
+		 * if we need more levels, set them up
+		 */
+		if (lptr->num_blocks > 1)  {
+			for (level = 1; btree_curs->level[level-1].num_blocks
+					> 1 && level < XFS_BTREE_MAXLEVELS;
+					level++)  {
+				lptr = &btree_curs->level[level];
+				p_lptr = &btree_curs->level[level-1];
+				lptr->num_blocks = howmany(p_lptr->num_blocks,
+						XR_ALLOC_BLOCK_MAXRECS(mp,
+								level));
+				lptr->modulo = p_lptr->num_blocks
+						% lptr->num_blocks;
+				lptr->num_recs_pb = p_lptr->num_blocks
+						/ lptr->num_blocks;
+				lptr->num_recs_tot = p_lptr->num_blocks;
+			}
+		}
+		ASSERT(lptr->num_blocks == 1);
+		btree_curs->num_levels = level;
+
+		/*
+		 * now figure out the number of excess blocks
+		 */
+		for (blocks_needed = 0, i = 0; i < level; i++)  {
+			blocks_needed += btree_curs->level[i].num_blocks;
+		}
+		blocks_needed *= 2;
+
+		ASSERT(blocks_allocated_total >= blocks_needed);
+		extra_blocks = blocks_allocated_total - blocks_needed;
+	} else  {
+		if (extents_used > 0) {
+			/*
+			 * reset the leaf level geometry to account
+			 * for consumed extents.  we can leave the
+			 * rest of the cursor alone since the number
+			 * of leaf blocks hasn't changed.
+			 */
+			lptr = &btree_curs->level[0];
+
+			lptr->num_recs_pb = num_extents / lptr->num_blocks;
+			lptr->modulo = num_extents % lptr->num_blocks;
+			lptr->num_recs_tot = num_extents;
+		}
+
+		extra_blocks = 0;
+	}
+
+	btree_curs->num_tot_blocks = blocks_allocated_pt;
+	btree_curs->num_free_blocks = blocks_allocated_pt;
+
+	*extents = num_extents;
+
+	return(extra_blocks);
+}
+
+void
+prop_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno,
+		bt_status_t *btree_curs, xfs_agblock_t startblock,
+		xfs_extlen_t blockcount, int level, __uint32_t magic)
+{
+	xfs_alloc_block_t	*bt_hdr;
+	xfs_alloc_key_t		*bt_key;
+	xfs_alloc_ptr_t		*bt_ptr;
+	xfs_agblock_t		agbno;
+	bt_stat_level_t		*lptr;
+
+	level++;
+
+	if (level >= btree_curs->num_levels)
+		return;
+
+	lptr = &btree_curs->level[level];
+	bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p);
+
+	if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) == 0)  {
+		/*
+		 * only happens once when initializing the
+		 * left-hand side of the tree.
+		 */
+		prop_freespace_cursor(mp, agno, btree_curs, startblock,
+				blockcount, level, magic);
+	}
+
+	if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) ==
+			lptr->num_recs_pb + (lptr->modulo > 0))  {
+		/*
+		 * write out current prev block, grab us a new block,
+		 * and set the rightsib pointer of current block
+		 */
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, " %d ", lptr->prev_agbno);
+#endif
+		if (lptr->prev_agbno != NULLAGBLOCK) {
+			ASSERT(lptr->prev_buf_p != NULL);
+			libxfs_writebuf(lptr->prev_buf_p, 0);
+		}
+		lptr->prev_agbno = lptr->agbno;;
+		lptr->prev_buf_p = lptr->buf_p;
+		agbno = get_next_blockaddr(agno, level, btree_curs);
+
+		INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, agbno);
+
+		lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, agbno),
+					XFS_FSB_TO_BB(mp, 1));
+		lptr->agbno = agbno;
+
+		if (lptr->modulo)
+			lptr->modulo--;
+
+		/*
+		 * initialize block header
+		 */
+		bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p);
+		bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+		INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic);
+		INT_SET(bt_hdr->bb_level, ARCH_CONVERT, level);
+		INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno);
+		INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT);
+
+		/*
+		 * propagate extent record for first extent in new block up
+		 */
+		prop_freespace_cursor(mp, agno, btree_curs, startblock,
+				blockcount, level, magic);
+	}
+	/*
+	 * add extent info to current block
+	 */
+	INT_MOD(bt_hdr->bb_numrecs, ARCH_CONVERT, +1);
+
+	bt_key = XR_ALLOC_KEY_ADDR(mp, bt_hdr,
+			INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+	bt_ptr = XR_ALLOC_PTR_ADDR(mp, bt_hdr,
+			INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+
+	INT_SET(bt_key->ar_startblock, ARCH_CONVERT, startblock);
+	INT_SET(bt_key->ar_blockcount, ARCH_CONVERT, blockcount);
+	INT_SET(*bt_ptr, ARCH_CONVERT, btree_curs->level[level-1].agbno);
+}
+
+/*
+ * rebuilds a freespace tree given a cursor and magic number of type
+ * of tree to build (bno or bcnt).  returns the number of free blocks
+ * represented by the tree.
+ */
+xfs_extlen_t
+build_freespace_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
+		bt_status_t *btree_curs, __uint32_t magic)
+{
+	xfs_agnumber_t		i;
+	xfs_agblock_t		j;
+	xfs_alloc_block_t	*bt_hdr;
+	xfs_alloc_rec_t		*bt_rec;
+	int			level;
+	xfs_agblock_t		agbno;
+	extent_tree_node_t	*ext_ptr;
+	bt_stat_level_t		*lptr;
+	xfs_extlen_t		freeblks;
+
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "in build_freespace_tree, agno = %d\n", agno);
+#endif
+	level = btree_curs->num_levels;
+	freeblks = 0;
+
+	ASSERT(level > 0);
+
+	/*
+	 * initialize the first block on each btree level
+	 */
+	for (i = 0; i < level; i++)  {
+		lptr = &btree_curs->level[i];
+
+		agbno = get_next_blockaddr(agno, i, btree_curs);
+		lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, agbno),
+					XFS_FSB_TO_BB(mp, 1));
+
+		if (i == btree_curs->num_levels - 1)
+			btree_curs->root = agbno;
+
+		lptr->agbno = agbno;
+		lptr->prev_agbno = NULLAGBLOCK;
+		lptr->prev_buf_p = NULL;
+		/*
+		 * initialize block header
+		 */
+		bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p);
+		bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+		INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic);
+		INT_SET(bt_hdr->bb_level, ARCH_CONVERT, i);
+		INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT,
+				bt_hdr->bb_rightsib = NULLAGBLOCK);
+		INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT);
+	}
+	/*
+	 * run along leaf, setting up records.  as we have to switch
+	 * blocks, call the prop_freespace_cursor routine to set up the new
+	 * pointers for the parent.  that can recurse up to the root
+	 * if required.  set the sibling pointers for leaf level here.
+	 */
+	if (magic == XFS_ABTB_MAGIC)
+		ext_ptr = findfirst_bno_extent(agno);
+	else 
+		ext_ptr = findfirst_bcnt_extent(agno);
+
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "bft, agno = %d, start = %u, count = %u\n",
+		agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+
+	lptr = &btree_curs->level[0];
+
+	for (i = 0; i < btree_curs->level[0].num_blocks; i++)  {
+		/*
+		 * block initialization, lay in block header
+		 */
+		bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p);
+		bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+		INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic);
+		INT_ZERO(bt_hdr->bb_level, ARCH_CONVERT);
+		INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno);
+		INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		INT_SET(bt_hdr->bb_numrecs, ARCH_CONVERT,
+				lptr->num_recs_pb + (lptr->modulo > 0));
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "bft, bb_numrecs = %d\n",
+				INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+#endif
+
+		if (lptr->modulo > 0)
+			lptr->modulo--;
+
+		/*
+		 * initialize values in the path up to the root if
+		 * this is a multi-level btree
+		 */
+		if (btree_curs->num_levels > 1)
+			prop_freespace_cursor(mp, agno, btree_curs,
+					ext_ptr->ex_startblock,
+					ext_ptr->ex_blockcount,
+					0, magic);
+
+		bt_rec = (xfs_alloc_rec_t *) ((char *) bt_hdr +
+						sizeof(xfs_alloc_block_t));
+		for (j = 0; j < INT_GET(bt_hdr->bb_numrecs,ARCH_CONVERT); j++) {
+			ASSERT(ext_ptr != NULL);
+			INT_SET(bt_rec[j].ar_startblock, ARCH_CONVERT,
+				ext_ptr->ex_startblock);
+			INT_SET(bt_rec[j].ar_blockcount, ARCH_CONVERT,
+				ext_ptr->ex_blockcount);
+			freeblks += ext_ptr->ex_blockcount;
+			if (magic == XFS_ABTB_MAGIC)
+				ext_ptr = findnext_bno_extent(ext_ptr);
+			else
+				ext_ptr = findnext_bcnt_extent(agno, ext_ptr);
+#if 0
+#ifdef XR_BLD_FREE_TRACE
+			if (ext_ptr == NULL)
+				fprintf(stderr, "null extent pointer, j = %d\n",
+					j);
+			else
+				fprintf(stderr,
+				"bft, agno = %d, start = %u, count = %u\n",
+					agno, ext_ptr->ex_startblock,
+					ext_ptr->ex_blockcount);
+#endif
+#endif
+		}
+
+		if (ext_ptr != NULL)  {
+			/*
+			 * get next leaf level block
+			 */
+			if (lptr->prev_buf_p != NULL)  {
+#ifdef XR_BLD_FREE_TRACE
+				fprintf(stderr, " writing fst agbno %u\n",
+					lptr->prev_agbno);
+#endif
+				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
+				libxfs_writebuf(lptr->prev_buf_p, 0);
+			}
+			lptr->prev_buf_p = lptr->buf_p;
+			lptr->prev_agbno = lptr->agbno;
+
+			INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, lptr->agbno =
+				get_next_blockaddr(agno, 0, btree_curs));
+
+			lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
+					XFS_FSB_TO_BB(mp, 1));
+		}
+	}
+
+	return(freeblks);
+}
+
+/*
+ * no-cursor versions of the XFS equivalents.  The address calculators
+ * should be used only for interior btree nodes.
+ * these are adapted from xfs_ialloc_btree.h and xfs_tree.h
+ */
+#define XR_INOBT_KEY_ADDR(mp, bp, i) \
+	(xfs_inobt_key_t *) ((char *) (bp) + sizeof(xfs_inobt_block_t) \
+				+ ((i)-1) * sizeof(xfs_inobt_key_t))
+
+#define XR_INOBT_PTR_ADDR(mp, bp, i) \
+	(xfs_inobt_ptr_t *) ((char *) (bp) + sizeof(xfs_inobt_block_t) \
+			+ (mp)->m_inobt_mxr[1] * sizeof(xfs_inobt_key_t) \
+			+ ((i)-1) * sizeof(xfs_inobt_ptr_t))
+
+#define XR_INOBT_BLOCK_MAXRECS(mp, level) \
+			XFS_BTREE_BLOCK_MAXRECS((mp)->m_sb.sb_blocksize, \
+						xfs_inobt, (level) == 0)
+
+/*
+ * we don't have to worry here about how chewing up free extents
+ * may perturb things because inode tree building happens before
+ * freespace tree building.
+ */
+void
+init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
+		__uint64_t *num_inos, __uint64_t *num_free_inos)
+{
+	__uint64_t		ninos;
+	__uint64_t		nfinos;
+	ino_tree_node_t		*ino_rec;
+	int			num_recs;
+	int			level;
+	bt_stat_level_t		*lptr;
+	bt_stat_level_t		*p_lptr;
+	xfs_extlen_t		blocks_allocated;
+	int			i;
+
+	*num_inos = *num_free_inos = 0;
+	ninos = nfinos = 0;
+
+	lptr = &btree_curs->level[0];
+	btree_curs->init = 1;
+
+	if ((ino_rec = findfirst_inode_rec(agno)) == NULL)  {
+		/*
+		 * easy corner-case -- no inode records
+		 */
+		lptr->num_blocks = 1;
+		lptr->modulo = 0;
+		lptr->num_recs_pb = 0;
+		lptr->num_recs_tot = 0;
+
+		btree_curs->num_levels = 1;
+		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+
+		setup_cursor(mp, agno, btree_curs);
+
+		return;
+	}
+
+	/*
+	 * build up statistics
+	 */
+	for (num_recs = 0; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec))  {
+		ninos += XFS_INODES_PER_CHUNK;
+		num_recs++;
+		for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+			ASSERT(is_inode_confirmed(ino_rec, i));
+			if (is_inode_free(ino_rec, i))
+				nfinos++;
+		}
+	}
+
+	blocks_allocated = lptr->num_blocks = howmany(num_recs,
+					XR_INOBT_BLOCK_MAXRECS(mp, 0));
+
+	lptr->modulo = num_recs % lptr->num_blocks;
+	lptr->num_recs_pb = num_recs / lptr->num_blocks;
+	lptr->num_recs_tot = num_recs;
+	level = 1;
+
+	if (lptr->num_blocks > 1)  {
+		for (; btree_curs->level[level-1].num_blocks > 1
+				&& level < XFS_BTREE_MAXLEVELS;
+				level++)  {
+			lptr = &btree_curs->level[level];
+			p_lptr = &btree_curs->level[level - 1];
+			lptr->num_blocks = howmany(p_lptr->num_blocks,
+				XR_INOBT_BLOCK_MAXRECS(mp, level));
+			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
+			lptr->num_recs_pb = p_lptr->num_blocks
+					/ lptr->num_blocks;
+			lptr->num_recs_tot = p_lptr->num_blocks;
+
+			blocks_allocated += lptr->num_blocks;
+		}
+	}
+	ASSERT(lptr->num_blocks == 1);
+	btree_curs->num_levels = level;
+
+	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
+			= blocks_allocated;
+
+	setup_cursor(mp, agno, btree_curs);
+
+	*num_inos = ninos;
+	*num_free_inos = nfinos;
+
+	return;
+}
+
+void
+prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
+	xfs_agino_t startino, int level)
+{
+	xfs_inobt_block_t	*bt_hdr;
+	xfs_inobt_key_t		*bt_key;
+	xfs_inobt_ptr_t		*bt_ptr;
+	xfs_agblock_t		agbno;
+	bt_stat_level_t		*lptr;
+
+	level++;
+
+	if (level >= btree_curs->num_levels)
+		return;
+
+	lptr = &btree_curs->level[level];
+	bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p);
+
+	if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) == 0)  {
+		/*
+		 * this only happens once to initialize the
+		 * first path up the left side of the tree
+		 * where the agbno's are already set up
+		 */
+		prop_ino_cursor(mp, agno, btree_curs, startino, level);
+	}
+
+	if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) ==
+				lptr->num_recs_pb + (lptr->modulo > 0))  {
+		/*
+		 * write out current prev block, grab us a new block,
+		 * and set the rightsib pointer of current block
+		 */
+#ifdef XR_BLD_INO_TRACE
+		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
+#endif
+		if (lptr->prev_agbno != NULLAGBLOCK)  {
+			ASSERT(lptr->prev_buf_p != NULL);
+			libxfs_writebuf(lptr->prev_buf_p, 0);
+		}
+		lptr->prev_agbno = lptr->agbno;;
+		lptr->prev_buf_p = lptr->buf_p;
+		agbno = get_next_blockaddr(agno, level, btree_curs);
+
+		INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, agbno);
+
+		lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, agbno),
+					XFS_FSB_TO_BB(mp, 1));
+		lptr->agbno = agbno;
+
+		if (lptr->modulo)
+			lptr->modulo--;
+
+		/*
+		 * initialize block header
+		 */
+		bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p);
+		bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+		INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+		INT_SET(bt_hdr->bb_level, ARCH_CONVERT, level);
+		INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno);
+		INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT);
+		/*
+		 * propagate extent record for first extent in new block up
+		 */
+		prop_ino_cursor(mp, agno, btree_curs, startino, level);
+	}
+	/*
+	 * add inode info to current block
+	 */
+	INT_MOD(bt_hdr->bb_numrecs, ARCH_CONVERT, +1);
+
+	bt_key = XR_INOBT_KEY_ADDR(mp, bt_hdr,
+			INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+	bt_ptr = XR_INOBT_PTR_ADDR(mp, bt_hdr,
+			INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+
+	INT_SET(bt_key->ir_startino, ARCH_CONVERT, startino);
+	INT_SET(*bt_ptr, ARCH_CONVERT, btree_curs->level[level-1].agbno);
+}
+
+void
+build_agi(xfs_mount_t *mp, xfs_agnumber_t agno,
+		bt_status_t *btree_curs, xfs_agino_t first_agino,
+		xfs_agino_t count, xfs_agino_t freecount)
+{
+	xfs_buf_t	*agi_buf;
+	xfs_agi_t	*agi;
+	int		i;
+
+	agi_buf = libxfs_getbuf(mp->m_dev,
+			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR),
+			mp->m_sb.sb_sectsize/BBSIZE);
+	agi = XFS_BUF_TO_AGI(agi_buf);
+	bzero(agi, mp->m_sb.sb_sectsize);
+
+	INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
+	INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
+	INT_SET(agi->agi_seqno, ARCH_CONVERT, agno);
+	if (agno < mp->m_sb.sb_agcount - 1)
+		INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_agblocks);
+	else
+		INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_dblocks -
+			(xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno);
+	INT_SET(agi->agi_count, ARCH_CONVERT, count);
+	INT_SET(agi->agi_root, ARCH_CONVERT, btree_curs->root);
+	INT_SET(agi->agi_level, ARCH_CONVERT, btree_curs->num_levels);
+	INT_SET(agi->agi_freecount, ARCH_CONVERT, freecount);
+	INT_SET(agi->agi_newino, ARCH_CONVERT, first_agino);
+	INT_SET(agi->agi_dirino, ARCH_CONVERT, NULLAGINO);
+
+	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)  {
+		INT_SET(agi->agi_unlinked[i], ARCH_CONVERT, NULLAGINO);
+	}
+
+	libxfs_writebuf(agi_buf, 0);
+}
+
+/*
+ * rebuilds an inode tree given a cursor.  We're lazy here and call
+ * the routine that builds the agi
+ */
+void
+build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
+		bt_status_t *btree_curs)
+{
+	xfs_agnumber_t		i;
+	xfs_agblock_t		j;
+	xfs_agblock_t		agbno;
+	xfs_agino_t		first_agino;
+	xfs_inobt_block_t	*bt_hdr;
+	xfs_inobt_rec_t		*bt_rec;
+	ino_tree_node_t		*ino_rec;
+	bt_stat_level_t		*lptr;
+	xfs_agino_t		count = 0;
+	xfs_agino_t		freecount = 0;
+	int			inocnt;
+	int			k;
+	int			level = btree_curs->num_levels;
+
+	for (i = 0; i < level; i++)  {
+		lptr = &btree_curs->level[i];
+
+		agbno = get_next_blockaddr(agno, i, btree_curs);
+		lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, agbno),
+					XFS_FSB_TO_BB(mp, 1));
+
+		if (i == btree_curs->num_levels - 1)
+			btree_curs->root = agbno;
+
+		lptr->agbno = agbno;
+		lptr->prev_agbno = NULLAGBLOCK;
+		lptr->prev_buf_p = NULL;
+		/*
+		 * initialize block header
+		 */
+		bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p);
+		bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+		INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+		INT_SET(bt_hdr->bb_level, ARCH_CONVERT, i);
+		INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT,
+				bt_hdr->bb_rightsib = NULLAGBLOCK);
+		INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT);
+	}
+	/*
+	 * run along leaf, setting up records.  as we have to switch
+	 * blocks, call the prop_ino_cursor routine to set up the new
+	 * pointers for the parent.  that can recurse up to the root
+	 * if required.  set the sibling pointers for leaf level here.
+	 */
+	ino_rec = findfirst_inode_rec(agno);
+
+	if (ino_rec != NULL)
+		first_agino = ino_rec->ino_startnum;
+	else
+		first_agino = NULLAGINO;
+
+	lptr = &btree_curs->level[0];
+
+	for (i = 0; i < lptr->num_blocks; i++)  {
+		/*
+		 * block initialization, lay in block header
+		 */
+		bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p);
+		bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+		INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+		INT_ZERO(bt_hdr->bb_level, ARCH_CONVERT);
+		INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno);
+		INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+		INT_SET(bt_hdr->bb_numrecs, ARCH_CONVERT,
+				lptr->num_recs_pb + (lptr->modulo > 0));
+
+		if (lptr->modulo > 0)
+			lptr->modulo--;
+
+		if (lptr->num_recs_pb > 0)
+			prop_ino_cursor(mp, agno, btree_curs,
+					ino_rec->ino_startnum, 0);
+
+		bt_rec = (xfs_inobt_rec_t *) ((char *) bt_hdr +
+						sizeof(xfs_inobt_block_t));
+		for (j = 0; j < INT_GET(bt_hdr->bb_numrecs,ARCH_CONVERT); j++) {
+			ASSERT(ino_rec != NULL);
+			INT_SET(bt_rec[j].ir_startino, ARCH_CONVERT,
+					ino_rec->ino_startnum);
+			INT_SET(bt_rec[j].ir_free, ARCH_CONVERT,
+					ino_rec->ir_free);
+
+			inocnt = 0;
+			for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++)  {
+				ASSERT(is_inode_confirmed(ino_rec, k));
+				inocnt += is_inode_free(ino_rec, k);
+			}
+
+			INT_SET(bt_rec[j].ir_freecount, ARCH_CONVERT, inocnt);
+			freecount += inocnt;
+			count += XFS_INODES_PER_CHUNK;
+			ino_rec = next_ino_rec(ino_rec);
+		}
+
+		if (ino_rec != NULL)  {
+			/*
+			 * get next leaf level block
+			 */
+			if (lptr->prev_buf_p != NULL)  {
+#ifdef XR_BLD_INO_TRACE
+				fprintf(stderr, "writing inobt agbno %u\n",
+					lptr->prev_agbno);
+#endif
+				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
+				libxfs_writebuf(lptr->prev_buf_p, 0);
+			}
+			lptr->prev_buf_p = lptr->buf_p;
+			lptr->prev_agbno = lptr->agbno;
+
+			INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, lptr->agbno=
+				get_next_blockaddr(agno, 0, btree_curs));
+
+			lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
+					XFS_FSB_TO_BB(mp, 1));
+		}
+	}
+
+	build_agi(mp, agno, btree_curs, first_agino, count, freecount);
+}
+
+/*
+ * build both the agf and the agfl for an agno given both
+ * btree cursors
+ */
+void
+build_agf_agfl(xfs_mount_t	*mp,
+		xfs_agnumber_t	agno,
+		bt_status_t	*bno_bt,
+		bt_status_t	*bcnt_bt,
+		xfs_extlen_t	freeblks,	/* # free blocks in tree */
+		int		lostblocks)	/* # blocks that will be lost */
+{
+	extent_tree_node_t	*ext_ptr;
+	xfs_buf_t		*agf_buf, *agfl_buf;
+	int			i;
+	int			j;
+	xfs_agfl_t		*agfl;
+	xfs_agf_t		*agf;
+
+	agf_buf = libxfs_getbuf(mp->m_dev,
+				XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR),
+				mp->m_sb.sb_sectsize/BBSIZE);
+	agf = XFS_BUF_TO_AGF(agf_buf);
+	bzero(agf, mp->m_sb.sb_sectsize);
+
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "agf = 0x%x, agf_buf->b_un.b_addr = 0x%x\n",
+		(__psint_t) agf, (__psint_t) agf_buf->b_un.b_addr);
+#endif
+
+	/*
+	 * set up fixed part of agf
+	 */
+	INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
+	INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
+	INT_SET(agf->agf_seqno, ARCH_CONVERT, agno);
+
+	if (agno < mp->m_sb.sb_agcount - 1)
+		INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_agblocks);
+	else
+		INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_dblocks -
+			(xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno);
+
+	INT_SET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT, bno_bt->root);
+	INT_SET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT,
+			bno_bt->num_levels);
+	INT_SET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT, bcnt_bt->root);
+	INT_SET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT,
+			bcnt_bt->num_levels);
+	INT_SET(agf->agf_freeblks, ARCH_CONVERT, freeblks);
+
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "bno root = %u, bcnt root = %u, indices = %u %u\n",
+			INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT),
+			INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT),
+			XFS_BTNUM_BNO,
+			XFS_BTNUM_CNT);
+#endif
+
+	/*
+	 * do we have left-over blocks in the btree cursors that should
+	 * be used to fill the AGFL?
+	 */
+	if (bno_bt->num_free_blocks > 0 || bcnt_bt->num_free_blocks > 0)  {
+		/*
+		 * yes - grab the AGFL buffer
+		 */
+		agfl_buf = libxfs_getbuf(mp->m_dev,
+				XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR),
+				mp->m_sb.sb_sectsize/BBSIZE);
+		agfl = XFS_BUF_TO_AGFL(agfl_buf);
+		bzero(agfl, mp->m_sb.sb_sectsize);
+		/*
+		 * ok, now grab as many blocks as we can
+		 */
+		i = j = 0;
+		while (bno_bt->num_free_blocks > 0 && i < XFS_AGFL_SIZE)  {
+			INT_SET(agfl->agfl_bno[i], ARCH_CONVERT,
+				get_next_blockaddr(agno, 0, bno_bt));
+			i++;
+		}
+
+		while (bcnt_bt->num_free_blocks > 0 && i < XFS_AGFL_SIZE)  {
+			INT_SET(agfl->agfl_bno[i], ARCH_CONVERT,
+				get_next_blockaddr(agno, 0, bcnt_bt));
+			i++;
+		}
+		/*
+		 * now throw the rest of the blocks away and complain
+		 */
+		while (bno_bt->num_free_blocks > 0)  {
+			(void) get_next_blockaddr(agno, 0, bno_bt);
+			j++;
+		}
+		while (bcnt_bt->num_free_blocks > 0)  {
+			(void) get_next_blockaddr(agno, 0, bcnt_bt);
+			j++;
+		}
+
+		if (j > 0)  {
+			if (j == lostblocks)
+				do_warn("lost %d blocks in ag %u\n", j, agno);
+			else
+				do_warn("thought we were going to lose %d "
+					"blocks in ag %u, actually lost %d\n",
+					lostblocks, j, agno);
+		}
+
+		INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
+		INT_SET(agf->agf_fllast, ARCH_CONVERT, i - 1);
+		INT_SET(agf->agf_flcount, ARCH_CONVERT, i);
+
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "writing agfl for ag %u\n", agno);
+#endif
+
+		libxfs_writebuf(agfl_buf, 0);
+	} else  {
+		INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
+		INT_SET(agf->agf_fllast, ARCH_CONVERT, XFS_AGFL_SIZE - 1);
+		INT_ZERO(agf->agf_flcount, ARCH_CONVERT);
+	}
+
+	ext_ptr = findbiggest_bcnt_extent(agno);
+	INT_SET(agf->agf_longest, ARCH_CONVERT,
+			(ext_ptr != NULL) ? ext_ptr->ex_blockcount : 0);
+
+	ASSERT(INT_GET(agf->agf_roots[XFS_BTNUM_BNOi], ARCH_CONVERT) !=
+		INT_GET(agf->agf_roots[XFS_BTNUM_CNTi], ARCH_CONVERT));
+
+	libxfs_writebuf(agf_buf, 0);
+
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "wrote agf for ag %u, error = %d\n", agno, error);
+#endif
+}
+
+/*
+ * update the superblock counters, sync the sb version numbers and
+ * feature bits to the filesystem, and sync up the on-disk superblock
+ * to match the incore superblock.
+ */
+void
+sync_sb(xfs_mount_t *mp)
+{
+	xfs_sb_t	*sbp;
+	xfs_buf_t	*bp;
+
+	bp = libxfs_getsb(mp, 0);
+	if (!bp)
+		do_error("couldn't get superblock\n");
+
+	sbp = XFS_BUF_TO_SBP(bp);
+
+	mp->m_sb.sb_icount = sb_icount;
+	mp->m_sb.sb_ifree = sb_ifree;
+	mp->m_sb.sb_fdblocks = sb_fdblocks;
+	mp->m_sb.sb_frextents = sb_frextents;
+
+	update_sb_version(mp);
+
+	*sbp = mp->m_sb;
+        libxfs_xlate_sb(XFS_BUF_PTR(bp), sbp, -1, ARCH_CONVERT,
+			XFS_SB_ALL_BITS);
+	libxfs_writebuf(bp, 0);
+}
+
+/*
+ * make sure the root and realtime inodes show up allocated
+ * even if they've been freed.  they get reinitialized in phase6.
+ */
+void
+keep_fsinos(xfs_mount_t *mp)
+{
+	ino_tree_node_t		*irec;
+	int			i;
+
+	irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
+			XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
+
+	for (i = 0; i < 3; i++)
+		set_inode_used(irec, i);
+}
+
+void
+phase5(xfs_mount_t *mp)
+{
+	__uint64_t	num_inos;
+	__uint64_t	num_free_inos;
+	bt_status_t	bno_btree_curs;
+	bt_status_t	bcnt_btree_curs;
+	bt_status_t	ino_btree_curs;
+	xfs_agnumber_t	agno;
+	int		extra_blocks = 0;
+	uint		num_freeblocks;
+	xfs_extlen_t	freeblks1;
+	xfs_extlen_t	freeblks2;
+	xfs_agblock_t	num_extents;
+	extern int	count_bno_extents(xfs_agnumber_t);
+	extern int	count_bno_extents_blocks(xfs_agnumber_t, uint *);
+#ifdef XR_BLD_FREE_TRACE
+	extern int	count_bcnt_extents(xfs_agnumber_t);
+#endif
+
+	do_log("Phase 5 - rebuild AG headers and trees...\n");
+
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "inobt level 1, maxrec = %d, minrec = %d\n",
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0),
+		XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0)
+		);
+	fprintf(stderr, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n",
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1),
+		XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1)
+		);
+	fprintf(stderr, "xr inobt level 0 (leaf), maxrec = %d\n",
+		XR_INOBT_BLOCK_MAXRECS(mp, 0));
+	fprintf(stderr, "xr inobt level 1 (int), maxrec = %d\n",
+		XR_INOBT_BLOCK_MAXRECS(mp, 1));
+	fprintf(stderr, "bnobt level 1, maxrec = %d, minrec = %d\n",
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0),
+		XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+	fprintf(stderr, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n",
+		XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1),
+		XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1));
+#endif
+
+	/*
+	 * make sure the root and realtime inodes show up allocated
+	 */
+	keep_fsinos(mp);
+
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)  {
+		/*
+		 * build up incore bno and bcnt extent btrees
+		 */
+		num_extents = mk_incore_fstree(mp, agno);
+
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "# of bno extents is %d\n",
+				count_bno_extents(agno));
+#endif
+
+		if (num_extents == 0)  {
+			/*
+			 * XXX - what we probably should do here is pick an
+			 * inode for a regular file in the allocation group
+			 * that has space allocated and shoot it by traversing
+			 * the bmap list and putting all its extents on the
+			 * incore freespace trees, clearing the inode,
+			 * and clearing the in-use bit in the incore inode
+			 * tree.  Then try mk_incore_fstree() again.
+			 */
+			do_error("unable to rebuild AG %u.  "
+				"Not enough free space in on-disk AG.\n", agno);
+		}
+
+		/*
+		 * done with the AG bitmap, toss it...
+		 */
+		teardown_ag_bmap(mp, agno);
+
+		/*
+		 * ok, now set up the btree cursors for the
+		 * on-disk btrees (includs pre-allocating all
+		 * required blocks for the trees themselves)
+		 */
+		init_ino_cursor(mp, agno, &ino_btree_curs,
+				&num_inos, &num_free_inos);
+
+		sb_icount += num_inos;
+		sb_ifree += num_free_inos;
+
+		num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
+		/*
+		 * lose two blocks per AG -- the space tree roots
+		 * are counted as allocated since the space trees
+		 * always have roots
+		 */
+		sb_fdblocks += num_freeblocks - 2;
+
+		if (num_extents == 0)  {
+			/*
+			 * XXX - what we probably should do here is pick an
+			 * inode for a regular file in the allocation group
+			 * that has space allocated and shoot it by traversing
+			 * the bmap list and putting all its extents on the
+			 * incore freespace trees, clearing the inode,
+			 * and clearing the in-use bit in the incore inode
+			 * tree.  Then try mk_incore_fstree() again.
+			 */
+			do_error("unable to rebuild AG %u.  No free space.\n",
+				agno);
+			exit(1);
+		}
+
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "# of bno extents is %d\n", num_extents);
+#endif
+
+		/*
+		 * track blocks that we might really lose
+		 */
+		extra_blocks = calculate_freespace_cursor(mp, agno,
+					&num_extents, &bno_btree_curs);
+
+		/*
+		 * freespace btrees live in the "free space" but
+		 * the filesystem treats AGFL blocks as allocated
+		 * since they aren't described by the freespace trees
+		 */
+
+		/*
+		 * see if we can fit all the extra blocks into the AGFL
+		 */
+		extra_blocks = (extra_blocks - XFS_AGFL_SIZE > 0)
+				? extra_blocks - XFS_AGFL_SIZE
+				: 0;
+
+		if (extra_blocks > 0)  {
+			do_warn("lost %d blocks in agno %d, sorry.\n",
+				extra_blocks, agno);
+			sb_fdblocks -= extra_blocks;
+		}
+
+		bcnt_btree_curs = bno_btree_curs;
+
+		setup_cursor(mp, agno, &bno_btree_curs);
+		setup_cursor(mp, agno, &bcnt_btree_curs);
+
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "# of bno extents is %d\n",
+				count_bno_extents(agno));
+		fprintf(stderr, "# of bcnt extents is %d\n",
+				count_bcnt_extents(agno));
+#endif
+		/*
+		 * now rebuild the freespace trees
+		 */
+		freeblks1 = build_freespace_tree(mp, agno, &bno_btree_curs,
+					XFS_ABTB_MAGIC);
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "# of free blocks == %d\n", freeblks1);
+#endif
+		write_cursor(&bno_btree_curs);
+
+		freeblks2 = build_freespace_tree(mp, agno, &bcnt_btree_curs,
+					XFS_ABTC_MAGIC);
+		write_cursor(&bcnt_btree_curs);
+
+		ASSERT(freeblks1 == freeblks2);
+
+		/*
+		 * set up agf and agfl
+		 */
+		build_agf_agfl(mp, agno, &bno_btree_curs,
+				&bcnt_btree_curs, freeblks1, extra_blocks);
+		/*
+		 * build inode allocation tree.  this also build the agi
+		 */
+		build_ino_tree(mp, agno, &ino_btree_curs);
+		write_cursor(&ino_btree_curs);
+		/*
+		 * tear down cursors
+		 */
+		finish_cursor(&bno_btree_curs);
+		finish_cursor(&ino_btree_curs);
+		finish_cursor(&bcnt_btree_curs);
+		/*
+		 * release the incore per-AG bno/bcnt trees so
+		 * the extent nodes can be recycled
+		 */
+		release_agbno_extent_tree(agno);
+		release_agbcnt_extent_tree(agno);
+	}
+
+	if (mp->m_sb.sb_rblocks)  {
+		do_log(
+		"        - generate realtime summary info and bitmap...\n");
+		rtinit(mp);
+		generate_rtinfo(mp, btmcompute, sumcompute);
+		teardown_rt_bmap(mp);
+	}
+
+	do_log("        - reset superblock...\n");
+
+	/*
+	 * sync superblock counter and set version bits correctly
+	 */
+	sync_sb(mp);
+
+	bad_ino_btree = 0;
+}
diff --git a/repair/phase6.c b/repair/phase6.c
new file mode 100644
index 000000000..1babc07c9
--- /dev/null
+++ b/repair/phase6.c
@@ -0,0 +1,3971 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <errno.h>
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "dir.h"
+#include "dir2.h"
+#include "dir_stack.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "versions.h"
+
+static cred_t zerocr;
+static int orphanage_entered;
+
+/*
+ * Data structures and routines to keep track of directory entries
+ * and whether their leaf entry has been seen
+ */
+typedef struct dir_hash_ent {
+	struct dir_hash_ent	*next;	/* pointer to next entry */
+	xfs_dir2_leaf_entry_t	ent;	/* address and hash value */
+	short			junkit;	/* name starts with / */
+	short			seen;	/* have seen leaf entry */
+} dir_hash_ent_t;
+
+typedef struct dir_hash_tab {
+	int			size;	/* size of hash table */
+	dir_hash_ent_t		*tab[1];/* actual hash table, variable size */
+} dir_hash_tab_t;
+#define	DIR_HASH_TAB_SIZE(n)	\
+	(offsetof(dir_hash_tab_t, tab) + (sizeof(dir_hash_ent_t *) * (n)))
+#define	DIR_HASH_FUNC(t,a)	((a) % (t)->size)
+
+/*
+ * Track the contents of the freespace table in a directory.
+ */
+typedef struct freetab {
+	int			naents;
+	int			nents;
+	struct freetab_ent {
+		xfs_dir2_data_off_t	v;
+		short			s;
+	} ents[1];
+} freetab_t;
+#define	FREETAB_SIZE(n)	\
+	(offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
+
+#define	DIR_HASH_CK_OK		0
+#define	DIR_HASH_CK_DUPLEAF	1
+#define	DIR_HASH_CK_BADHASH	2
+#define	DIR_HASH_CK_NODATA	3
+#define	DIR_HASH_CK_NOLEAF	4
+#define	DIR_HASH_CK_BADSTALE	5
+
+static void
+dir_hash_add(
+	dir_hash_tab_t		*hashtab,
+	xfs_dahash_t		hash,
+	xfs_dir2_dataptr_t	addr,
+	int			junk)
+{
+	int			i;
+	dir_hash_ent_t		*p;
+
+	i = DIR_HASH_FUNC(hashtab, addr);
+	if ((p = malloc(sizeof(*p))) == NULL) {
+		do_error("malloc failed in dir_hash_add (%u bytes)\n",
+			sizeof(*p));
+		exit(1);
+	}
+	p->next = hashtab->tab[i];
+	hashtab->tab[i] = p;
+	if (!(p->junkit = junk))
+		p->ent.hashval = hash;
+	p->ent.address = addr;
+	p->seen = 0;
+}
+
+static int
+dir_hash_unseen(
+	dir_hash_tab_t	*hashtab)
+{
+	int		i;
+	dir_hash_ent_t	*p;
+
+	for (i = 0; i < hashtab->size; i++) {
+		for (p = hashtab->tab[i]; p; p = p->next) {
+			if (p->seen == 0)
+				return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+dir_hash_check(
+	dir_hash_tab_t	*hashtab,
+	xfs_inode_t	*ip,
+	int		seeval)
+{
+	static char	*seevalstr[] = {
+		"ok",
+		"duplicate leaf",
+		"hash value mismatch",
+		"no data entry",
+		"no leaf entry",
+		"bad stale count",
+	};
+
+	if (seeval == DIR_HASH_CK_OK && dir_hash_unseen(hashtab))
+		seeval = DIR_HASH_CK_NOLEAF;
+	if (seeval == DIR_HASH_CK_OK)
+		return 0;
+	do_warn("bad hash table for directory inode %llu (%s): ", ip->i_ino,
+		seevalstr[seeval]);
+	if (!no_modify)
+		do_warn("rebuilding\n");
+	else
+		do_warn("would rebuild\n");
+	return 1;
+}
+
+static void
+dir_hash_done(
+	dir_hash_tab_t	*hashtab)
+{
+	int		i;
+	dir_hash_ent_t	*n;
+	dir_hash_ent_t	*p;
+
+	for (i = 0; i < hashtab->size; i++) {
+		for (p = hashtab->tab[i]; p; p = n) {
+			n = p->next;
+			free(p);
+		}
+	}
+	free(hashtab);
+}
+
+static dir_hash_tab_t *
+dir_hash_init(
+	xfs_fsize_t	size)
+{
+	dir_hash_tab_t	*hashtab;
+	int		hsize;
+
+	hsize = size / (16 * 4);
+	if (hsize > 1024)
+		hsize = 1024;
+	else if (hsize < 16)
+		hsize = 16;
+	if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL) {
+		do_error("calloc failed in dir_hash_init\n");
+		exit(1);
+	}
+	hashtab->size = hsize;
+	return hashtab;
+}
+
+static int
+dir_hash_see(
+	dir_hash_tab_t		*hashtab,
+	xfs_dahash_t		hash,
+	xfs_dir2_dataptr_t	addr)
+{
+	int			i;
+	dir_hash_ent_t		*p;
+
+	i = DIR_HASH_FUNC(hashtab, addr);
+	for (p = hashtab->tab[i]; p; p = p->next) {
+		if (p->ent.address != addr)
+			continue;
+		if (p->seen)
+			return DIR_HASH_CK_DUPLEAF;
+		if (p->junkit == 0 && p->ent.hashval != hash)
+			return DIR_HASH_CK_BADHASH;
+		p->seen = 1;
+		return DIR_HASH_CK_OK;
+	}
+	return DIR_HASH_CK_NODATA;
+}
+
+static int
+dir_hash_see_all(
+	dir_hash_tab_t		*hashtab,
+	xfs_dir2_leaf_entry_t	*ents,
+	int			count,
+	int			stale)
+{
+	int			i;
+	int			j;
+	int			rval;
+
+	for (i = j = 0; i < count; i++) {
+		if (INT_GET(ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+			j++;
+			continue;
+		}
+		rval = dir_hash_see(hashtab, INT_GET(ents[i].hashval, ARCH_CONVERT), INT_GET(ents[i].address, ARCH_CONVERT));
+		if (rval != DIR_HASH_CK_OK)
+			return rval;
+	}
+	return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
+}
+
+
+/*
+ * Version 1 or 2 directory routine wrappers
+*/
+static void
+dir_init(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, xfs_inode_t *pdp)
+{
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		libxfs_dir2_init(tp, dp, pdp);
+	else
+		libxfs_dir_init(tp, dp, pdp);
+}
+
+static int
+dir_createname(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *pip,
+		char *name, int namelen, xfs_ino_t inum, xfs_fsblock_t *first,
+		xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		return libxfs_dir2_createname(tp, pip, name, namelen,
+				inum, first, flist, total);
+	else
+		return libxfs_dir_createname(tp, pip, name, namelen,
+				inum, first, flist, total);
+}
+
+static int
+dir_lookup(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name,
+		int namelen, xfs_ino_t *inum)
+{
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		return libxfs_dir2_lookup(tp, dp, name, namelen, inum);
+	else
+		return libxfs_dir_lookup(tp, dp, name, namelen, inum);
+}
+
+static int
+dir_replace(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name,
+		int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock,
+		xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		return libxfs_dir2_replace(tp, dp, name, namelen, inum,
+				firstblock, flist, total);
+	else
+		return libxfs_dir_replace(tp, dp, name, namelen, inum,
+				firstblock, flist, total);
+}
+
+static int
+dir_removename(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name,
+		int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock,
+		xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		return libxfs_dir2_removename(tp, dp, name, namelen, inum,
+				firstblock, flist, total);
+	else
+		return libxfs_dir_removename(tp, dp, name, namelen, inum,
+				firstblock, flist, total);
+}
+
+static int
+dir_bogus_removename(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp,
+		char *name, xfs_fsblock_t *firstblock, xfs_bmap_free_t *flist,
+		xfs_extlen_t total, xfs_dahash_t hashval, int namelen)
+{
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		return libxfs_dir2_bogus_removename(tp, dp, name, firstblock,
+				flist, total, hashval, namelen);
+	else
+		return libxfs_dir_bogus_removename(tp, dp, name, firstblock,
+				flist, total, hashval, namelen);
+}
+
+
+static void
+res_failed(
+	int	err)
+{
+	if (err == ENOSPC) {
+		do_error("ran out of disk space!\n");
+	} else
+		do_error("xfs_trans_reserve returned %d\n", err);
+}
+
+void
+mk_rbmino(xfs_mount_t *mp)
+{
+	xfs_trans_t	*tp;
+	xfs_inode_t	*ip;
+	xfs_bmbt_irec_t	*ep;
+	xfs_fsblock_t	first;
+	int		i;
+	int		nmap;
+	int		committed;
+	int		error;
+	xfs_bmap_free_t	flist;
+	xfs_dfiloff_t	bno;
+	xfs_bmbt_irec_t	map[XFS_BMAP_MAX_NMAP];
+
+	/*
+	 * first set up inode
+	 */
+	tp = libxfs_trans_alloc(mp, 0);
+
+	if (i = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0))
+		res_failed(i);
+
+	error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
+	if (error) {
+		do_error("couldn't iget realtime bitmap inode -- error - %d\n",
+			error);
+	}
+
+	bzero(&ip->i_d, sizeof(xfs_dinode_core_t));
+
+	ip->i_d.di_magic = XFS_DINODE_MAGIC;
+	ip->i_d.di_mode = IFREG;
+	ip->i_d.di_version = XFS_DINODE_VERSION_1;
+	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+	ip->i_d.di_nlink = 1;		/* account for sb ptr */
+
+	/*
+	 * now the ifork
+	 */
+	ip->i_df.if_flags = XFS_IFEXTENTS;
+	ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+	ip->i_df.if_u1.if_extents = NULL;
+
+	ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
+
+	/*
+	 * commit changes
+	 */
+	libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	libxfs_trans_ihold(tp, ip);
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, NULL);
+
+	/*
+	 * then allocate blocks for file and fill with zeroes (stolen
+	 * from mkfs)
+	 */
+	tp = libxfs_trans_alloc(mp, 0);
+	if (error = libxfs_trans_reserve(tp, mp->m_sb.sb_rbmblocks +
+			(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1), 0, 0, 0, 0))
+		res_failed(error);
+
+	libxfs_trans_ijoin(tp, ip, 0);
+	bno = 0;
+	XFS_BMAP_INIT(&flist, &first);
+	while (bno < mp->m_sb.sb_rbmblocks) {
+		nmap = XFS_BMAP_MAX_NMAP;
+		error = libxfs_bmapi(tp, ip, bno,
+			  (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
+			  XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks,
+			  map, &nmap, &flist);
+		if (error) {
+			do_error("couldn't allocate realtime bitmap - err %d\n",
+				error);
+		}
+		for (i = 0, ep = map; i < nmap; i++, ep++) {
+			libxfs_device_zero(mp->m_dev,
+				XFS_FSB_TO_DADDR(mp, ep->br_startblock),
+				XFS_FSB_TO_BB(mp, ep->br_blockcount));
+			bno += ep->br_blockcount;
+		}
+	}
+	error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+	if (error) {
+		do_error(
+		"allocation of the realtime bitmap failed, error = %d\n",
+			error);
+	}
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+}
+
+int
+fill_rbmino(xfs_mount_t *mp)
+{
+	xfs_buf_t	*bp;
+	xfs_trans_t	*tp;
+	xfs_inode_t	*ip;
+	xfs_rtword_t	*bmp;
+	xfs_fsblock_t	first;
+	int		nmap;
+	int		error;
+	xfs_dfiloff_t	bno;
+	xfs_bmbt_irec_t	map;
+
+	bmp = btmcompute;
+	bno = 0;
+
+	tp = libxfs_trans_alloc(mp, 0);
+
+	if (error = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0))
+		res_failed(error);
+
+	error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
+	if (error) {
+		do_error("couldn't iget realtime bitmap inode -- error - %d\n",
+			error);
+	}
+
+	while (bno < mp->m_sb.sb_rbmblocks)  {
+		/*
+		 * fill the file one block at a time
+		 */
+		nmap = 1;
+		error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE,
+					&first, 1, &map, &nmap, NULL);
+		if (error || nmap != 1) {
+			do_error(
+			"couldn't map realtime bitmap block %llu - err %d\n",
+				bno, error);
+		}
+
+		ASSERT(map.br_startblock != HOLESTARTBLOCK);
+
+		error = libxfs_trans_read_buf(
+				mp, tp, mp->m_dev,
+				XFS_FSB_TO_DADDR(mp, map.br_startblock), 
+				XFS_FSB_TO_BB(mp, 1), 1, &bp);
+
+		if (error) {
+			do_warn(
+	"can't access block %llu (fsbno %llu) of realtime bitmap inode %llu\n",
+				bno, map.br_startblock, mp->m_sb.sb_rbmino);
+			return(1);
+		}
+
+		bcopy(bmp, XFS_BUF_PTR(bp), mp->m_sb.sb_blocksize);
+
+		libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
+
+		bmp = (xfs_rtword_t *)((__psint_t) bmp + mp->m_sb.sb_blocksize);
+		bno++;
+	}
+
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+	return(0);
+}
+
+int
+fill_rsumino(xfs_mount_t *mp)
+{
+	xfs_buf_t	*bp;
+	xfs_trans_t	*tp;
+	xfs_inode_t	*ip;
+	xfs_suminfo_t	*smp;
+	xfs_fsblock_t	first;
+	int		nmap;
+	int		error;
+	xfs_dfiloff_t	bno;
+	xfs_dfiloff_t	end_bno;
+	xfs_bmbt_irec_t	map;
+
+	smp = sumcompute;
+	bno = 0;
+	end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
+
+	tp = libxfs_trans_alloc(mp, 0);
+
+	if (error = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0))
+		res_failed(error);
+
+	error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
+	if (error) {
+		do_error("couldn't iget realtime summary inode -- error - %d\n",
+			error);
+	}
+
+	while (bno < end_bno)  {
+		/*
+		 * fill the file one block at a time
+		 */
+		nmap = 1;
+		error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE,
+					&first, 1, &map, &nmap, NULL);
+		if (error || nmap != 1) {
+			do_error(
+		"couldn't map realtime summary inode block %llu - err %d\n",
+				bno, error);
+		}
+
+		ASSERT(map.br_startblock != HOLESTARTBLOCK);
+
+		error = libxfs_trans_read_buf(
+				mp, tp, mp->m_dev,
+				XFS_FSB_TO_DADDR(mp, map.br_startblock), 
+				XFS_FSB_TO_BB(mp, 1), 1, &bp);
+
+		if (error) {
+			do_warn(
+	"can't access block %llu (fsbno %llu) of realtime summary inode %llu\n",
+				bno, map.br_startblock, mp->m_sb.sb_rsumino);
+			return(1);
+		}
+
+		bcopy(smp, XFS_BUF_PTR(bp), mp->m_sb.sb_blocksize);
+
+		libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
+
+		smp = (xfs_suminfo_t *)((__psint_t)smp + mp->m_sb.sb_blocksize);
+		bno++;
+	}
+
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+	return(0);
+}
+
+void
+mk_rsumino(xfs_mount_t *mp)
+{
+	xfs_trans_t	*tp;
+	xfs_inode_t	*ip;
+	xfs_bmbt_irec_t	*ep;
+	xfs_fsblock_t	first;
+	int		i;
+	int		nmap;
+	int		committed;
+	int		error;
+	int		nsumblocks;
+	xfs_bmap_free_t	flist;
+	xfs_dfiloff_t	bno;
+	xfs_bmbt_irec_t	map[XFS_BMAP_MAX_NMAP];
+
+	/*
+	 * first set up inode
+	 */
+	tp = libxfs_trans_alloc(mp, 0);
+
+	if (i = libxfs_trans_reserve(tp, 10, XFS_ICHANGE_LOG_RES(mp), 0,
+				XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT))
+		res_failed(i);
+
+	error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
+	if (error) {
+		do_error("couldn't iget realtime summary inode -- error - %d\n",
+			error);
+	}
+
+	bzero(&ip->i_d, sizeof(xfs_dinode_core_t));
+
+	ip->i_d.di_magic = XFS_DINODE_MAGIC;
+	ip->i_d.di_mode = IFREG;
+	ip->i_d.di_version = XFS_DINODE_VERSION_1;
+	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+	ip->i_d.di_nlink = 1;		/* account for sb ptr */
+
+	/*
+	 * now the ifork
+	 */
+	ip->i_df.if_flags = XFS_IFEXTENTS;
+	ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+	ip->i_df.if_u1.if_extents = NULL;
+
+	ip->i_d.di_size = mp->m_rsumsize;
+
+	/*
+	 * commit changes
+	 */
+	libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	libxfs_trans_ihold(tp, ip);
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+	/*
+	 * then allocate blocks for file and fill with zeroes (stolen
+	 * from mkfs)
+	 */
+	tp = libxfs_trans_alloc(mp, 0);
+	XFS_BMAP_INIT(&flist, &first);
+
+	nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
+	if (error = libxfs_trans_reserve(tp,
+				  mp->m_sb.sb_rbmblocks +
+				      (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1),
+				  BBTOB(128), 0, XFS_TRANS_PERM_LOG_RES,
+				  XFS_DEFAULT_PERM_LOG_COUNT))
+		res_failed(error);
+
+	libxfs_trans_ijoin(tp, ip, 0);
+	bno = 0;
+	XFS_BMAP_INIT(&flist, &first);
+	while (bno < nsumblocks) {
+		nmap = XFS_BMAP_MAX_NMAP;
+		error = libxfs_bmapi(tp, ip, bno,
+			  (xfs_extlen_t)(nsumblocks - bno),
+			  XFS_BMAPI_WRITE, &first, nsumblocks,
+			  map, &nmap, &flist);
+		if (error) {
+			do_error(
+			"couldn't allocate realtime summary inode - err %d\n",
+				error);
+		}
+		for (i = 0, ep = map; i < nmap; i++, ep++) {
+			libxfs_device_zero(mp->m_dev,
+				      XFS_FSB_TO_DADDR(mp, ep->br_startblock),
+				      XFS_FSB_TO_BB(mp, ep->br_blockcount));
+				do_error("dev_zero of rtbitmap failed\n");
+			bno += ep->br_blockcount;
+		}
+	}
+	error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+	if (error) {
+		do_error(
+		"allocation of the realtime summary ino failed, err = %d\n",
+			error);
+	}
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+}
+
+/*
+ * makes a new root directory.
+ */
+void
+mk_root_dir(xfs_mount_t *mp)
+{
+	xfs_trans_t	*tp;
+	xfs_inode_t	*ip;
+	int		i;
+	int		error;
+	const mode_t	mode = 0755;
+
+	tp = libxfs_trans_alloc(mp, 0);
+	ip = NULL;
+
+	if (i = libxfs_trans_reserve(tp, 10, XFS_ICHANGE_LOG_RES(mp), 0,
+				XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT))
+		res_failed(i);
+
+	error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rootino, 0, &ip);
+	if (error) {
+		do_error("could not iget root inode -- error - %d\n", error);
+	}
+
+	/*
+	 * take care of the core -- initialization from xfs_ialloc()
+	 */
+	bzero(&ip->i_d, sizeof(xfs_dinode_core_t));
+
+	ip->i_d.di_magic = XFS_DINODE_MAGIC;
+	ip->i_d.di_mode = (__uint16_t) mode|IFDIR;
+	ip->i_d.di_version = XFS_DINODE_VERSION_1;
+	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+	ip->i_d.di_nlink = 1;		/* account for . */
+
+	libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+	/*
+	 * now the ifork
+	 */
+	ip->i_df.if_flags = XFS_IFEXTENTS;
+	ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+	ip->i_df.if_u1.if_extents = NULL;
+
+	mp->m_rootip = ip;
+
+	/*
+	 * initialize the directory
+	 */
+	dir_init(mp, tp, ip, ip);
+
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+}
+
+/*
+ * orphanage name == lost+found
+ */
+xfs_ino_t
+mk_orphanage(xfs_mount_t *mp)
+{
+	xfs_ino_t	ino;
+	xfs_trans_t	*tp;
+	xfs_inode_t	*ip;
+	xfs_inode_t	*pip;
+	xfs_fsblock_t	first;
+	int		i;
+	int		committed;
+	int		error;
+	xfs_bmap_free_t	flist;
+	const int	mode = 0755;
+	const int	uid = 0;
+	const int	gid = 0;
+	int		nres;
+
+	tp = libxfs_trans_alloc(mp, 0);
+	XFS_BMAP_INIT(&flist, &first);
+
+	nres = XFS_MKDIR_SPACE_RES(mp, strlen(ORPHANAGE));
+	if (i = libxfs_trans_reserve(tp, nres, XFS_MKDIR_LOG_RES(mp), 0,
+				XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT))
+		res_failed(i);
+
+	/*
+	 * use iget/ijoin instead of trans_iget because the ialloc
+	 * wrapper can commit the transaction and start a new one
+	 */
+	if (i = libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip, 0))
+		do_error("%d - couldn't iget root inode to make %s\n",
+			i, ORPHANAGE);
+
+	error = libxfs_inode_alloc(&tp, pip, mode|IFDIR,
+					1, mp->m_dev, &zerocr, &ip);
+
+	if (error) {
+		do_error("%s inode allocation failed %d\n",
+			ORPHANAGE, error);
+	}
+
+	ip->i_d.di_uid = uid;
+	ip->i_d.di_gid = gid;
+	ip->i_d.di_nlink++;		/* account for . */
+
+	/*
+	 * now that we know the transaction will stay around,
+	 * add the root inode to it
+	 */
+	libxfs_trans_ijoin(tp, pip, 0);
+
+	/*
+	 * create the actual entry
+	 */
+	if (error = dir_createname(mp, tp, pip, ORPHANAGE,
+			strlen(ORPHANAGE), ip->i_ino, &first, &flist, nres)) {
+		do_warn("can't make %s, createname error %d, will try later\n",
+			ORPHANAGE, error);
+		orphanage_entered = 0;
+	} else
+		orphanage_entered = 1;
+
+	/* 
+	 * bump up the link count in the root directory to account
+	 * for .. in the new directory
+	 */
+	pip->i_d.di_nlink++;
+
+	libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
+	dir_init(mp, tp, ip, pip);
+	libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+	error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+	if (error) {
+		do_error("%s directory creation failed -- bmapf error %d\n",
+			ORPHANAGE, error);
+	}
+
+	ino = ip->i_ino;
+
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+	/* need libxfs_iput here? - nathans TODO - possible memory leak? */
+
+	return(ino);
+}
+
+/*
+ * move a file to the orphange.  the orphanage is guaranteed
+ * at this point to only have file in it whose name == file inode #
+ */
+void
+mv_orphanage(xfs_mount_t	*mp,
+		xfs_ino_t	dir_ino,	/* orphange inode # */
+		xfs_ino_t	ino,		/* inode # to be moved */
+		int		isa_dir)	/* 1 if inode is a directory */
+{
+	xfs_ino_t	entry_ino_num;
+	xfs_inode_t	*dir_ino_p;
+	xfs_inode_t	*ino_p;
+	xfs_trans_t	*tp;
+	xfs_fsblock_t	first;
+	xfs_bmap_free_t	flist;
+	int		err;
+	int		committed;
+	char		fname[MAXPATHLEN + 1];
+	int		nres;
+
+	sprintf(fname, "%llu", ino);
+
+	if (err = libxfs_iget(mp, NULL, dir_ino, 0, &dir_ino_p, 0))
+		do_error("%d - couldn't iget orphanage inode\n", err);
+
+	tp = libxfs_trans_alloc(mp, 0);
+
+	if (err = libxfs_iget(mp, NULL, ino, 0, &ino_p, 0))
+		do_error("%d - couldn't iget disconnected inode\n", err);
+
+	if (isa_dir)  {
+		nres = XFS_DIRENTER_SPACE_RES(mp, strlen(fname)) +
+		       XFS_DIRENTER_SPACE_RES(mp, 2);
+		if (err = dir_lookup(mp, tp, ino_p, "..", 2,
+				&entry_ino_num))  {
+			ASSERT(err == ENOENT);
+
+			if (err = libxfs_trans_reserve(tp, nres,
+					XFS_RENAME_LOG_RES(mp), 0,
+					XFS_TRANS_PERM_LOG_RES,
+					XFS_RENAME_LOG_COUNT))
+				do_error(
+		"space reservation failed (%d), filesystem may be out of space\n",
+					err);
+
+			libxfs_trans_ijoin(tp, dir_ino_p, 0);
+			libxfs_trans_ijoin(tp, ino_p, 0);
+
+			XFS_BMAP_INIT(&flist, &first);
+			if (err = dir_createname(mp, tp, dir_ino_p, fname,
+						strlen(fname), ino, &first,
+						&flist, nres))
+				do_error(
+	"name create failed in %s (%d), filesystem may be out of space\n",
+					ORPHANAGE, err);
+
+			dir_ino_p->i_d.di_nlink++;
+			libxfs_trans_log_inode(tp, dir_ino_p, XFS_ILOG_CORE);
+
+			if (err = dir_createname(mp, tp, ino_p, "..", 2,
+						dir_ino, &first, &flist, nres))
+				do_error(
+	"creation of .. entry failed (%d), filesystem may be out of space\n",
+					err);
+
+			ino_p->i_d.di_nlink++;
+			libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
+
+			if (err = libxfs_bmap_finish(&tp, &flist, first, &committed))
+				do_error(
+	"bmap finish failed (err - %d), filesystem may be out of space\n",
+					err);
+
+			libxfs_trans_commit(tp,
+				XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+		} else  {
+			if (err = libxfs_trans_reserve(tp, nres,
+					XFS_RENAME_LOG_RES(mp), 0,
+					XFS_TRANS_PERM_LOG_RES,
+					XFS_RENAME_LOG_COUNT))
+				do_error(
+	"space reservation failed (%d), filesystem may be out of space\n",
+					err);
+
+			libxfs_trans_ijoin(tp, dir_ino_p, 0);
+			libxfs_trans_ijoin(tp, ino_p, 0);
+
+			XFS_BMAP_INIT(&flist, &first);
+
+			if (err = dir_createname(mp, tp, dir_ino_p, fname,
+						strlen(fname), ino, &first,
+						&flist, nres))
+				do_error(
+	"name create failed in %s (%d), filesystem may be out of space\n",
+					ORPHANAGE, err);
+
+			dir_ino_p->i_d.di_nlink++;
+			libxfs_trans_log_inode(tp, dir_ino_p, XFS_ILOG_CORE);
+
+			/*
+			 * don't replace .. value if it already points
+			 * to us.  that'll pop a libxfs/kernel ASSERT.
+			 */
+			if (entry_ino_num != dir_ino)  {
+				if (err = dir_replace(mp, tp, ino_p, "..",
+							2, dir_ino, &first,
+							&flist, nres))
+					do_error(
+		"name replace op failed (%d), filesystem may be out of space\n",
+						err);
+			}
+
+			if (err = libxfs_bmap_finish(&tp, &flist, first,
+							&committed))
+				do_error(
+		"bmap finish failed (%d), filesystem may be out of space\n",
+					err);
+
+			libxfs_trans_commit(tp,
+				XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+		}
+	} else  {
+		/*
+		 * use the remove log reservation as that's
+		 * more accurate.  we're only creating the
+		 * links, we're not doing the inode allocation
+		 * also accounted for in the create
+		 */
+		nres = XFS_DIRENTER_SPACE_RES(mp, strlen(fname));
+		if (err = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0,
+				XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT))
+			do_error(
+	"space reservation failed (%d), filesystem may be out of space\n",
+				err);
+
+		libxfs_trans_ijoin(tp, dir_ino_p, 0);
+		libxfs_trans_ijoin(tp, ino_p, 0);
+
+		XFS_BMAP_INIT(&flist, &first);
+		if (err = dir_createname(mp, tp, dir_ino_p, fname,
+				strlen(fname), ino, &first, &flist, nres))
+			do_error(
+	"name create failed in %s (%d), filesystem may be out of space\n",
+				ORPHANAGE, err);
+		ASSERT(err == 0);
+
+		ino_p->i_d.di_nlink = 1;
+		libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
+
+		if (err = libxfs_bmap_finish(&tp, &flist, first, &committed))
+			do_error(
+		"bmap finish failed (%d), filesystem may be out of space\n",
+				err);
+
+		libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+	}
+}
+
+/*
+ * like get_first_dblock_fsbno only it uses the simulation code instead
+ * of raw I/O.
+ *
+ * Returns the fsbno of the first (leftmost) block in the directory leaf.
+ * sets *bno to the directory block # corresponding to the returned fsbno.
+ */
+xfs_dfsbno_t
+map_first_dblock_fsbno(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_inode_t	*ip,
+			xfs_dablk_t	*bno)
+{
+	xfs_fsblock_t		fblock;
+	xfs_da_intnode_t	*node;
+	xfs_buf_t		*bp;
+	xfs_dablk_t		da_bno;
+	xfs_dfsbno_t		fsbno;
+	xfs_bmbt_irec_t		map;
+	int			nmap;
+	int			i;
+	int			error;
+	char			*ftype;
+
+	/*
+	 * traverse down left-side of tree until we hit the
+	 * left-most leaf block setting up the btree cursor along
+	 * the way.
+	 */
+	da_bno = 0;
+	*bno = 0;
+	i = -1;
+	node = NULL;
+	fblock = NULLFSBLOCK;
+	ftype = "dir";
+
+	nmap = 1;
+	error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t) da_bno, 1,
+			XFS_BMAPI_METADATA, &fblock, 0,
+			&map, &nmap, NULL);
+	if (error || nmap != 1)  {
+		if (!no_modify)
+			do_error(
+"can't map block %d in %s inode %llu, xfs_bmapi returns %d, nmap = %d\n",
+				da_bno, ftype, ino, error, nmap);
+		else  {
+			do_warn(
+"can't map block %d in %s inode %llu, xfs_bmapi returns %d, nmap = %d\n",
+				da_bno, ftype, ino, error, nmap);
+			return(NULLDFSBNO);
+		}
+	}
+
+	if ((fsbno = map.br_startblock) == HOLESTARTBLOCK)  {
+		if (!no_modify)
+			do_error("block %d in %s ino %llu doesn't exist\n",
+				da_bno, ftype, ino);
+		else  {
+			do_warn("block %d in %s ino %llu doesn't exist\n",
+				da_bno, ftype, ino);
+			return(NULLDFSBNO);
+		}
+	}
+
+	if (ip->i_d.di_size <= XFS_LBSIZE(mp))
+		return(fsbno);
+
+	if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+		return(fsbno);
+
+	do {
+		/*
+		 * walk down left side of btree, release buffers as you
+		 * go.  if the root block is a leaf (single-level btree),
+		 * just return it.
+		 * 
+		 */
+
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+
+		if (!bp) {
+			do_warn(
+		"can't read block %u (fsbno %llu) for directory inode %llu\n",
+					da_bno, fsbno, ino);
+			return(NULLDFSBNO);
+		}
+
+		node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
+
+		if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+			libxfs_putbuf(bp);
+			do_warn(
+"bad dir/attr magic number in inode %llu, file bno = %u, fsbno = %llu\n",
+				ino, da_bno, fsbno);
+			return(NULLDFSBNO);
+		}
+
+		if (i == -1)
+			i = INT_GET(node->hdr.level, ARCH_CONVERT);
+
+		da_bno = INT_GET(node->btree[0].before, ARCH_CONVERT);
+
+		libxfs_putbuf(bp);
+		bp = NULL;
+
+		nmap = 1;
+		error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t) da_bno, 1,
+				XFS_BMAPI_METADATA, &fblock, 0,
+				&map, &nmap, NULL);
+		if (error || nmap != 1)  {
+			if (!no_modify)
+				do_error(
+	"can't map block %d in %s ino %llu, xfs_bmapi returns %d, nmap = %d\n",
+					da_bno, ftype, ino, error, nmap);
+			else  {
+				do_warn(
+	"can't map block %d in %s ino %llu, xfs_bmapi returns %d, nmap = %d\n",
+					da_bno, ftype, ino, error, nmap);
+				return(NULLDFSBNO);
+			}
+		}
+		if ((fsbno = map.br_startblock) == HOLESTARTBLOCK)  {
+			if (!no_modify)
+				do_error(
+				"block %d in %s inode %llu doesn't exist\n",
+					da_bno, ftype, ino);
+			else  {
+				do_warn(
+				"block %d in %s inode %llu doesn't exist\n",
+					da_bno, ftype, ino);
+				return(NULLDFSBNO);
+			}
+		}
+
+		i--;
+	} while(i > 0);
+
+	*bno = da_bno;
+	return(fsbno);
+}
+
+/*
+ * scan longform directory and prune first bad entry.  returns 1 if
+ * it had to remove something, 0 if it made it all the way through
+ * the directory.  prune_lf_dir_entry does all the necessary bmap calls.
+ *
+ * hashval is an in/out -- starting hashvalue in, hashvalue of the
+ *			deleted entry (if there was one) out
+ *
+ * this routine can NOT be called if running in no modify mode
+ */
+int
+prune_lf_dir_entry(xfs_mount_t *mp, xfs_ino_t ino, xfs_inode_t *ip,
+			xfs_dahash_t *hashval)
+{
+	xfs_dfsbno_t		fsbno;
+	int			i;
+	int			index;
+	int			error;
+	int			namelen;
+	xfs_bmap_free_t		free_list;
+	xfs_fsblock_t		first_block;
+	xfs_buf_t		*bp;
+	xfs_dir_leaf_name_t	*namest;
+	xfs_dir_leafblock_t	*leaf;
+	xfs_dir_leaf_entry_t	*entry;
+	xfs_trans_t		*tp;
+	xfs_dablk_t		da_bno;
+	xfs_fsblock_t		fblock;
+	int			committed;
+	int			nmap;
+	xfs_bmbt_irec_t		map;
+	char			fname[MAXNAMELEN + 1];
+	char			*ftype;
+	int			nres;
+
+	/*
+	 * ok, this is kind of a schizoid routine.  we use our
+	 * internal bmapi routines to walk the directory.  when
+	 * we find a bogus entry, we release the buffer so
+	 * the simulation code doesn't deadlock and use the
+	 * sim code to remove the entry.  That will cause an
+	 * extra bmap traversal to map the block but I think
+	 * that's preferable to hacking the bogus removename
+	 * function to be really different and then trying to
+	 * maintain both versions as time goes on.
+	 *
+	 * first, grab the dinode and find the right leaf block.
+	 */
+
+	ftype = "dir";
+	da_bno = 0;
+	bp = NULL;
+	namest = NULL;
+	fblock = NULLFSBLOCK;
+
+	fsbno = map_first_dblock_fsbno(mp, ino, ip, &da_bno);
+
+	/*
+	 * now go foward along the leaves of the btree looking
+	 * for an entry beginning with '/'
+	 */
+	do {
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+
+		if (!bp)  {
+			do_error(
+	"can't read directory inode %llu (leaf) block %u (fsbno %llu)\n",
+				ino, da_bno, fsbno);
+			/* NOTREACHED */
+		}
+
+		leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+		ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+		entry = &leaf->entries[0];
+
+		for (index = -1, i = 0;
+				i < INT_GET(leaf->hdr.count, ARCH_CONVERT) && index == -1;
+				i++)  {
+			namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+			if (namest->name[0] != '/')
+				entry++;
+			else
+				index = i;
+		}
+
+		/*
+		 * if we got a bogus entry, exit loop with a pointer to
+		 * the leaf block buffer.  otherwise, keep trying blocks
+		 */
+		da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+		if (index == -1)  {
+			if (bp != NULL)  {
+				libxfs_putbuf(bp);
+				bp = NULL;
+			}
+
+			/*
+			 * map next leaf block unless we've run out
+			 */
+			if (da_bno != 0)  {
+				nmap = 1;
+				error = libxfs_bmapi(NULL, ip,
+						(xfs_fileoff_t) da_bno, 1,
+						XFS_BMAPI_METADATA, &fblock, 0,
+						&map, &nmap, NULL);
+				if (error || nmap != 1)
+					do_error(
+"can't map block %d in directory %llu, xfs_bmapi returns %d, nmap = %d\n",
+						da_bno, ino, error, nmap);
+				if ((fsbno = map.br_startblock)
+						== HOLESTARTBLOCK)  {
+					do_error(
+				"%s ino %llu block %d doesn't exist\n",
+						ftype, ino, da_bno);
+				}
+			}
+		}
+	} while (da_bno != 0 && index == -1);
+
+	/*
+	 * if we hit the edge of the tree with no bad entries, we're done
+	 * and the buffer was released.
+	 */
+	if (da_bno == 0 && index == -1)
+		return(0);
+
+	ASSERT(index >= 0);
+	ASSERT(entry == &leaf->entries[index]);
+	ASSERT(namest == XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)));
+
+	/*
+	 * snag the info we need out of the directory then release all buffers
+	 */
+	bcopy(namest->name, fname, entry->namelen);
+	fname[entry->namelen] = '\0';
+	*hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+	namelen = entry->namelen;
+
+	libxfs_putbuf(bp);
+
+	/*
+	 * ok, now the hard part, blow away the index'th entry in this block
+	 *
+	 * allocate a remove transaction for it.  that's not quite true since
+	 * we're only messing with one inode, not two but...
+	 */
+
+	tp = libxfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+
+	nres = XFS_REMOVE_SPACE_RES(mp);
+	error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp),
+				    0, XFS_TRANS_PERM_LOG_RES,
+				    XFS_REMOVE_LOG_COUNT);
+	if (error)
+		res_failed(error);
+
+	libxfs_trans_ijoin(tp, ip, 0);
+	libxfs_trans_ihold(tp, ip);
+
+	XFS_BMAP_INIT(&free_list, &first_block);
+
+	error = dir_bogus_removename(mp, tp, ip, fname,
+		&first_block, &free_list, nres, *hashval, namelen);
+
+	if (error)  {
+		do_error(
+"couldn't remove bogus entry \"%s\" in\n\tdirectory inode %llu, errno = %d\n",
+			fname, ino, error);
+		/* NOTREACHED */
+	}
+
+	error = libxfs_bmap_finish(&tp, &free_list, first_block, &committed);
+
+	ASSERT(error == 0);
+
+	libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+	return(1);
+}
+
+/*
+ * process a leaf block, also checks for .. entry
+ * and corrects it to match what we think .. should be
+ */
+void
+lf_block_dir_entry_check(xfs_mount_t		*mp,
+			xfs_ino_t		ino,
+			xfs_dir_leafblock_t	*leaf,
+			int			*dirty,
+			int			*num_illegal,
+			int			*need_dot,
+			dir_stack_t		*stack,
+			ino_tree_node_t		*current_irec,
+			int			current_ino_offset)
+{
+	xfs_dir_leaf_entry_t	*entry;
+	ino_tree_node_t		*irec;
+	xfs_ino_t		lino;
+	xfs_ino_t		parent;
+	xfs_dir_leaf_name_t	*namest;
+	int			i;
+	int			junkit;
+	int			ino_offset;
+	int			nbad;
+	char			fname[MAXNAMELEN + 1];
+
+	entry = &leaf->entries[0];
+	*dirty = 0;
+	nbad = 0;
+
+	/*
+	 * look at each entry.  reference inode pointed to by each
+	 * entry in the incore inode tree.
+	 * if not a directory, set reached flag, increment link count
+	 * if a directory and reached, mark entry as to be deleted.
+	 * if a directory, check to see if recorded parent
+	 *	matches current inode #,
+	 *	if so, then set reached flag, increment link count
+	 *		of current and child dir inodes, push the child
+	 *		directory inode onto the directory stack.
+	 *	if current inode != parent, then mark entry to be deleted.
+	 *
+	 * return
+	 */
+	for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++)  {
+		/*
+		 * snag inode #, update link counts, and make sure
+		 * this isn't a loop if the child is a directory
+		 */
+		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+
+		/*
+		 * skip bogus entries (leading '/').  they'll be deleted
+		 * later
+		 */
+		if (namest->name[0] == '/')  {
+			nbad++;
+			continue;
+		}
+
+		junkit = 0;
+
+		XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT);
+		bcopy(namest->name, fname, entry->namelen);
+		fname[entry->namelen] = '\0';
+
+		ASSERT(lino != NULLFSINO);
+
+		/*
+		 * skip the '..' entry since it's checked when the
+		 * directory is reached by something else.  if it never
+		 * gets reached, it'll be moved to the orphanage and we'll
+		 * take care of it then.
+		 */
+		if (entry->namelen == 2 && namest->name[0] == '.' &&
+				namest->name[1] == '.')  {
+			continue;
+		}
+		ASSERT(no_modify || !verify_inum(mp, lino));
+
+		/*
+		 * special case the . entry.  we know there's only one
+		 * '.' and only '.' points to itself because bogus entries
+		 * got trashed in phase 3 if there were > 1.
+		 * bump up link count for '.' but don't set reached
+		 * until we're actually reached by another directory
+		 * '..' is already accounted for or will be taken care
+		 * of when directory is moved to orphanage.
+		 */
+		if (ino == lino)  {
+			ASSERT(namest->name[0] == '.' && entry->namelen == 1);
+			add_inode_ref(current_irec, current_ino_offset);
+			*need_dot = 0;
+			continue;
+		}
+
+		/*
+		 * special case the "lost+found" entry if pointing
+		 * to where we think lost+found should be.  if that's
+		 * the case, that's the one we created in phase 6.
+		 * just skip it.  no need to process it and it's ..
+		 * link is already accounted for.
+		 */
+
+		if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0)
+			continue;
+
+		/*
+		 * skip entries with bogus inumbers if we're in no modify mode
+		 */
+		if (no_modify && verify_inum(mp, lino))
+			continue;
+
+		/*
+		 * ok, now handle the rest of the cases besides '.' and '..'
+		 */
+		irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+					XFS_INO_TO_AGINO(mp, lino));
+		
+		if (irec == NULL)  {
+			nbad++;
+			do_warn(
+	"entry \"%s\" in dir inode %llu points to non-existent inode, ",
+				fname, ino);
+
+			if (!no_modify)  {
+				namest->name[0] = '/';
+				*dirty = 1;
+				do_warn("marking entry to be junked\n");
+			} else  {
+				do_warn("would junk entry\n");
+			}
+
+			continue;
+		}
+
+		ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
+
+		/*
+		 * if it's a free inode, blow out the entry.
+		 * by now, any inode that we think is free
+		 * really is free.
+		 */
+		if (is_inode_free(irec, ino_offset))  {
+			/*
+			 * don't complain if this entry points to the old
+			 * and now-free lost+found inode
+			 */
+			if (verbose || no_modify || lino != old_orphanage_ino)
+				do_warn(
+		"entry \"%s\" in dir inode %llu points to free inode %llu",
+					fname, ino, lino);
+			nbad++;
+
+			if (!no_modify)  {
+				if (verbose || lino != old_orphanage_ino)
+					do_warn(", marking entry to be junked\n");
+
+				else
+					do_warn("\n");
+				namest->name[0] = '/';
+				*dirty = 1;
+			} else  {
+				do_warn(", would junk entry\n");
+			}
+
+			continue;
+		}
+
+		/*
+		 * check easy case first, regular inode, just bump
+		 * the link count and continue
+		 */
+		if (!inode_isadir(irec, ino_offset))  {
+			add_inode_reached(irec, ino_offset);
+			continue;
+		}
+
+		parent = get_inode_parent(irec, ino_offset);
+		ASSERT(parent != 0);
+
+		/*
+		 * bump up the link counts in parent and child
+		 * directory but if the link doesn't agree with
+		 * the .. in the child, blow out the entry.
+		 * if the directory has already been reached,
+		 * blow away the entry also.
+		 */
+		if (is_inode_reached(irec, ino_offset))  {
+			junkit = 1;
+			do_warn(
+"entry \"%s\" in dir %llu points to an already connected dir inode %llu,\n",
+				fname, ino, lino);
+		} else if (parent == ino)  {
+			add_inode_reached(irec, ino_offset);
+			add_inode_ref(current_irec, current_ino_offset);
+
+			if (!is_inode_refchecked(lino, irec, ino_offset))
+				push_dir(stack, lino);
+		} else  {
+			junkit = 1;
+			do_warn(
+"entry \"%s\" in dir ino %llu not consistent with .. value (%llu) in ino %llu,\n",
+				fname, ino, parent, lino);
+		}
+
+		if (junkit)  {
+			junkit = 0;
+			nbad++;
+
+			if (!no_modify)  {
+				namest->name[0] = '/';
+				*dirty = 1;
+				if (verbose || lino != old_orphanage_ino)
+					do_warn("\twill clear entry \"%s\"\n",
+						fname);
+			} else  {
+				do_warn("\twould clear entry \"%s\"\n", fname);
+			}
+		}
+	}
+
+	*num_illegal += nbad;
+}
+
+/*
+ * succeeds or dies, inode never gets dirtied since all changes
+ * happen in file blocks.  the inode size and other core info
+ * is already correct, it's just the leaf entries that get altered.
+ */
+void
+longform_dir_entry_check(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_inode_t	*ip,
+			int		*num_illegal,
+			int		*need_dot,
+			dir_stack_t	*stack,
+			ino_tree_node_t	*irec,
+			int		ino_offset)
+{
+	xfs_dir_leafblock_t	*leaf;
+	xfs_buf_t		*bp;
+	xfs_dfsbno_t		fsbno;
+	xfs_fsblock_t		fblock;
+	xfs_dablk_t		da_bno;
+	int			dirty;
+	int			nmap;
+	int			error;
+	int			skipit;
+	xfs_bmbt_irec_t		map;
+	char			*ftype;
+
+	da_bno = 0;
+	fblock = NULLFSBLOCK;
+	*need_dot = 1;
+	ftype = "dir";
+
+	fsbno = map_first_dblock_fsbno(mp, ino, ip, &da_bno);
+
+	if (fsbno == NULLDFSBNO && no_modify)  {
+		do_warn("cannot map block 0 of directory inode %llu\n", ino);
+		return;
+	}
+
+	do {
+		ASSERT(fsbno != NULLDFSBNO);
+		skipit = 0;
+
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+				XFS_FSB_TO_BB(mp, 1), 0);
+
+		if (!bp) {
+			do_error(
+		"can't read block %u (fsbno %llu) for directory inode %llu\n",
+					da_bno, fsbno, ino);
+			/* NOTREACHED */
+		}
+
+		leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+
+		da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+		if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)  {
+			if (!no_modify)  {
+				do_error(
+	"bad magic # (0x%x) for dir ino %llu leaf block (bno %u fsbno %llu)\n",
+					INT_GET(leaf->hdr.info.magic, ARCH_CONVERT),
+					ino, da_bno, fsbno);
+				/* NOTREACHED */
+			} else  {
+				/*
+				 * this block's bad but maybe the
+				 * forward pointer is good...
+				 */
+				skipit = 1;
+				dirty = 0;
+			}
+		}
+
+		if (!skipit)
+			lf_block_dir_entry_check(mp, ino, leaf, &dirty,
+						num_illegal, need_dot, stack,
+						irec, ino_offset);
+
+		ASSERT(dirty == 0 || dirty && !no_modify);
+
+		if (dirty && !no_modify)
+			libxfs_writebuf(bp, 0);
+		else
+			libxfs_putbuf(bp);
+		bp = NULL;
+
+		if (da_bno != 0)  {
+			nmap = 1;
+			error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t)da_bno, 1,
+					XFS_BMAPI_METADATA, &fblock, 0,
+					&map, &nmap, NULL);
+			if (error || nmap != 1)  {
+				if (!no_modify)
+					do_error(
+"can't map leaf block %d in dir %llu, xfs_bmapi returns %d, nmap = %d\n",
+						da_bno, ino, error, nmap);
+				else  {
+					do_warn(
+"can't map leaf block %d in dir %llu, xfs_bmapi returns %d, nmap = %d\n",
+						da_bno, ino, error, nmap);
+					return;
+				}
+			}
+			if ((fsbno = map.br_startblock) == HOLESTARTBLOCK)  {
+				if (!no_modify)
+					do_error(
+				"block %d in %s ino %llu doesn't exist\n",
+						da_bno, ftype, ino);
+				else  {
+					do_warn(
+				"block %d in %s ino %llu doesn't exist\n",
+						da_bno, ftype, ino);
+					return;
+				}
+			}
+		}
+	} while (da_bno != 0);
+}
+
+/*
+ * Kill a block in a version 2 inode.
+ * Makes its own transaction.
+ */
+static void
+dir2_kill_block(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	xfs_dablk_t	da_bno,
+	xfs_dabuf_t	*bp)
+{
+	xfs_da_args_t	args;
+	int		committed;
+	int		error;
+	xfs_fsblock_t	firstblock;
+	xfs_bmap_free_t	flist;
+	int		nres;
+	xfs_trans_t	*tp;
+
+	tp = libxfs_trans_alloc(mp, 0);
+	nres = XFS_REMOVE_SPACE_RES(mp);
+	error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0,
+			XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+	if (error)
+		res_failed(error);
+	libxfs_trans_ijoin(tp, ip, 0);
+	libxfs_trans_ihold(tp, ip);
+	libxfs_da_bjoin(tp, bp);
+	bzero(&args, sizeof(args));
+	XFS_BMAP_INIT(&flist, &firstblock);
+	args.dp = ip;
+	args.trans = tp;
+	args.firstblock = &firstblock;
+	args.flist = &flist;
+	args.whichfork = XFS_DATA_FORK;
+	if (da_bno >= mp->m_dirleafblk && da_bno < mp->m_dirfreeblk)
+		error = libxfs_da_shrink_inode(&args, da_bno, bp);
+	else
+		error = libxfs_dir2_shrink_inode(&args,
+				XFS_DIR2_DA_TO_DB(mp, da_bno), bp);
+	if (error)
+		do_error("shrink_inode failed inode %llu block %u\n",
+			ip->i_ino, da_bno);
+	libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+	libxfs_trans_commit(tp, 0, 0);
+}
+
+/*
+ * process a data block, also checks for .. entry
+ * and corrects it to match what we think .. should be
+ */
+static void
+longform_dir2_entry_check_data(
+	xfs_mount_t		*mp,
+	xfs_inode_t		*ip,
+	int			*num_illegal,
+	int			*need_dot,
+	dir_stack_t		*stack,
+	ino_tree_node_t		*current_irec,
+	int			current_ino_offset,
+	xfs_dabuf_t		**bpp,
+	dir_hash_tab_t		*hashtab,
+	freetab_t		**freetabp,
+	xfs_dablk_t		da_bno,
+	int			isblock)
+{
+	xfs_dir2_dataptr_t	addr;
+	xfs_dir2_leaf_entry_t	*blp;
+	xfs_dabuf_t		*bp;
+	xfs_dir2_block_tail_t	*btp;
+	int			committed;
+	xfs_dir2_data_t		*d;
+	xfs_dir2_db_t		db;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+	char			*endptr;
+	int			error;
+	xfs_fsblock_t		firstblock;
+	xfs_bmap_free_t		flist;
+	char			fname[MAXNAMELEN + 1];
+	freetab_t		*freetab;
+	int			i;
+	int			ino_offset;
+	ino_tree_node_t		*irec;
+	int			junkit;
+	int			lastfree;
+	int			len;
+	int			nbad;
+	int			needlog;
+	int			needscan;
+	xfs_ino_t		parent;
+	char			*ptr;
+	xfs_trans_t		*tp;
+	int			wantmagic;
+
+	bp = *bpp;
+	d = bp->data;
+	ptr = (char *)d->u;
+	nbad = 0;
+	needscan = needlog = 0;
+	freetab = *freetabp;
+	if (isblock) {
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, d);
+		blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+		endptr = (char *)blp;
+		if (endptr > (char *)btp)
+			endptr = (char *)btp;
+		wantmagic = XFS_DIR2_BLOCK_MAGIC;
+	} else {
+		endptr = (char *)d + mp->m_dirblksize;
+		wantmagic = XFS_DIR2_DATA_MAGIC;
+	}
+	db = XFS_DIR2_DA_TO_DB(mp, da_bno);
+	if (freetab->naents <= db) {
+		struct freetab_ent e;
+
+		*freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
+		if (!freetab) {
+			do_error(
+		"realloc failed in longform_dir2_entry_check_data (%u bytes)\n",
+				FREETAB_SIZE(db + 1));
+			exit(1);
+		}
+		e.v = NULLDATAOFF;
+		e.s = 0;
+		for (i = freetab->naents; i < db; i++)
+			freetab->ents[i] = e;
+		freetab->naents = db + 1;
+	}
+	if (freetab->nents < db + 1)
+		freetab->nents = db + 1;
+	while (ptr < endptr) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr || INT_GET(dup->length, ARCH_CONVERT) == 0 ||
+			    (INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1)))
+				break;
+			if (INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) != 
+			    (char *)dup - (char *)d)
+				break;
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+			if (ptr >= endptr)
+				break;
+		}
+		dep = (xfs_dir2_data_entry_t *)ptr;
+		if (ptr + XFS_DIR2_DATA_ENTSIZE(dep->namelen) > endptr)
+			break;
+		if (INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) != (char *)dep - (char *)d)
+			break;
+		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+	}
+	if (ptr != endptr) {
+		do_warn("corrupt block %u in directory inode %llu: ",
+			da_bno, ip->i_ino);
+		if (!no_modify) {
+			do_warn("junking block\n");
+			dir2_kill_block(mp, ip, da_bno, bp);
+		} else {
+			do_warn("would junk block\n");
+			libxfs_da_brelse(NULL, bp);
+		}
+		freetab->ents[db].v = NULLDATAOFF;
+		*bpp = NULL;
+		return;
+	}
+	tp = libxfs_trans_alloc(mp, 0);
+	error = libxfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
+		XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+	if (error)
+		res_failed(error);
+	libxfs_trans_ijoin(tp, ip, 0);
+	libxfs_trans_ihold(tp, ip);
+	libxfs_da_bjoin(tp, bp);
+	if (isblock)
+		libxfs_da_bhold(tp, bp);
+	XFS_BMAP_INIT(&flist, &firstblock);
+	if (INT_GET(d->hdr.magic, ARCH_CONVERT) != wantmagic) {
+		do_warn("bad directory block magic # %#x for directory inode "
+			"%llu block %d: ",
+			INT_GET(d->hdr.magic, ARCH_CONVERT), ip->i_ino, da_bno);
+		if (!no_modify) {
+			do_warn("fixing magic # to %#x\n", wantmagic);
+			INT_SET(d->hdr.magic, ARCH_CONVERT, wantmagic);
+			needlog = 1;
+		} else
+			do_warn("would fix magic # to %#x\n", wantmagic);
+	}
+	lastfree = 0;
+	ptr = (char *)d->u;
+	/*
+	 * look at each entry.  reference inode pointed to by each
+	 * entry in the incore inode tree.
+	 * if not a directory, set reached flag, increment link count
+	 * if a directory and reached, mark entry as to be deleted.
+	 * if a directory, check to see if recorded parent
+	 *	matches current inode #,
+	 *	if so, then set reached flag, increment link count
+	 *		of current and child dir inodes, push the child
+	 *		directory inode onto the directory stack.
+	 *	if current inode != parent, then mark entry to be deleted.
+	 */
+	while (ptr < endptr) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			if (lastfree) {
+				do_warn("directory inode %llu block %u has "
+					"consecutive free entries: ",
+					ip->i_ino, da_bno);
+				if (!no_modify) {
+					do_warn("joining together\n");
+					len = INT_GET(dup->length, ARCH_CONVERT);
+					libxfs_dir2_data_use_free(tp, bp, dup,
+						ptr - (char *)d, len, &needlog,
+						&needscan);
+					libxfs_dir2_data_make_free(tp, bp,
+						ptr - (char *)d, len, &needlog,
+						&needscan);
+				} else
+					do_warn("would join together\n");
+			}
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+			lastfree = 1;
+			continue;
+		}
+		addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, db, ptr - (char *)d);
+		dep = (xfs_dir2_data_entry_t *)ptr;
+		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		lastfree = 0;
+		dir_hash_add(hashtab,
+			libxfs_da_hashname((char *)dep->name, dep->namelen),
+			addr, dep->name[0] == '/');
+		/*
+		 * skip bogus entries (leading '/').  they'll be deleted
+		 * later
+		 */
+		if (dep->name[0] == '/')  {
+			nbad++;
+			continue;
+		}
+		junkit = 0;
+		bcopy(dep->name, fname, dep->namelen);
+		fname[dep->namelen] = '\0';
+		ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != NULLFSINO);
+		/*
+		 * skip the '..' entry since it's checked when the
+		 * directory is reached by something else.  if it never
+		 * gets reached, it'll be moved to the orphanage and we'll
+		 * take care of it then.
+		 */
+		if (dep->namelen == 2 && dep->name[0] == '.' &&
+		    dep->name[1] == '.')
+			continue;
+		ASSERT(no_modify || !verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT)));
+		/*
+		 * special case the . entry.  we know there's only one
+		 * '.' and only '.' points to itself because bogus entries
+		 * got trashed in phase 3 if there were > 1.
+		 * bump up link count for '.' but don't set reached
+		 * until we're actually reached by another directory
+		 * '..' is already accounted for or will be taken care
+		 * of when directory is moved to orphanage.
+		 */
+		if (ip->i_ino == INT_GET(dep->inumber, ARCH_CONVERT))  {
+			ASSERT(dep->name[0] == '.' && dep->namelen == 1);
+			add_inode_ref(current_irec, current_ino_offset);
+			*need_dot = 0;
+			continue;
+		}
+		/*
+		 * special case the "lost+found" entry if pointing
+		 * to where we think lost+found should be.  if that's
+		 * the case, that's the one we created in phase 6.
+		 * just skip it.  no need to process it and it's ..
+		 * link is already accounted for.
+		 */
+		if (INT_GET(dep->inumber, ARCH_CONVERT) == orphanage_ino &&
+		    strcmp(fname, ORPHANAGE) == 0)
+			continue;
+		/*
+		 * skip entries with bogus inumbers if we're in no modify mode
+		 */
+		if (no_modify && verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT)))
+			continue;
+		/*
+		 * ok, now handle the rest of the cases besides '.' and '..'
+		 */
+		irec = find_inode_rec(XFS_INO_TO_AGNO(mp, INT_GET(dep->inumber, ARCH_CONVERT)),
+			XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)));
+		if (irec == NULL)  {
+			nbad++;
+			do_warn("entry \"%s\" in directory inode %llu points "
+				"to non-existent inode, ",
+				fname, ip->i_ino);
+			if (!no_modify)  {
+				dep->name[0] = '/';
+				libxfs_dir2_data_log_entry(tp, bp, dep);
+				do_warn("marking entry to be junked\n");
+			} else  {
+				do_warn("would junk entry\n");
+			}
+			continue;
+		}
+		ino_offset =
+			XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)) - irec->ino_startnum;
+		/*
+		 * if it's a free inode, blow out the entry.
+		 * by now, any inode that we think is free
+		 * really is free.
+		 */
+		if (is_inode_free(irec, ino_offset))  {
+			/*
+			 * don't complain if this entry points to the old
+			 * and now-free lost+found inode
+			 */
+			if (verbose || no_modify ||
+			    INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino)
+				do_warn("entry \"%s\" in directory inode %llu "
+					"points to free inode %llu",
+					fname, ip->i_ino, INT_GET(dep->inumber, ARCH_CONVERT));
+			nbad++;
+			if (!no_modify)  {
+				if (verbose ||
+				    INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino)
+					do_warn(", marking entry to be "
+						"junked\n");
+				else
+					do_warn("\n");
+				dep->name[0] = '/';
+				libxfs_dir2_data_log_entry(tp, bp, dep);
+			} else  {
+				do_warn(", would junk entry\n");
+			}
+			continue;
+		}
+		/*
+		 * check easy case first, regular inode, just bump
+		 * the link count and continue
+		 */
+		if (!inode_isadir(irec, ino_offset))  {
+			add_inode_reached(irec, ino_offset);
+			continue;
+		}
+		parent = get_inode_parent(irec, ino_offset);
+		ASSERT(parent != 0);
+		/*
+		 * bump up the link counts in parent and child
+		 * directory but if the link doesn't agree with
+		 * the .. in the child, blow out the entry.
+		 * if the directory has already been reached,
+		 * blow away the entry also.
+		 */
+		if (is_inode_reached(irec, ino_offset))  {
+			junkit = 1;
+			do_warn("entry \"%s\" in dir %llu points to an already "
+				"connected directory inode %llu,\n", fname,
+				ip->i_ino, INT_GET(dep->inumber, ARCH_CONVERT));
+		} else if (parent == ip->i_ino)  {
+			add_inode_reached(irec, ino_offset);
+			add_inode_ref(current_irec, current_ino_offset);
+			if (!is_inode_refchecked(INT_GET(dep->inumber, ARCH_CONVERT), irec,
+					ino_offset))
+				push_dir(stack, INT_GET(dep->inumber, ARCH_CONVERT));
+		} else  {
+			junkit = 1;
+			do_warn("entry \"%s\" in directory inode %llu not "
+				"consistent with .. value (%llu) in ino "
+				"%llu,\n",
+				fname, ip->i_ino, parent, INT_GET(dep->inumber, ARCH_CONVERT));
+		}
+		if (junkit)  {
+			junkit = 0;
+			nbad++;
+			if (!no_modify)  {
+				dep->name[0] = '/';
+				libxfs_dir2_data_log_entry(tp, bp, dep);
+				if (verbose ||
+				    INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino)
+					do_warn("\twill clear entry \"%s\"\n",
+						fname);
+			} else  {
+				do_warn("\twould clear entry \"%s\"\n", fname);
+			}
+		}
+	}
+	*num_illegal += nbad;
+	if (needscan)
+		libxfs_dir2_data_freescan(mp, d, &needlog, NULL);
+	if (needlog)
+		libxfs_dir2_data_log_header(tp, bp);
+	libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+	libxfs_trans_commit(tp, 0, 0);
+	freetab->ents[db].v = INT_GET(d->hdr.bestfree[0].length, ARCH_CONVERT);
+	freetab->ents[db].s = 0;
+}
+
+/*
+ * Check contents of leaf-form block.
+ */
+int
+longform_dir2_check_leaf(
+	xfs_mount_t		*mp,
+	xfs_inode_t		*ip,
+	dir_hash_tab_t		*hashtab,
+	freetab_t		*freetab)
+{
+	int			badtail;
+	xfs_dir2_data_off_t	*bestsp;
+	xfs_dabuf_t		*bp;
+	xfs_dablk_t		da_bno;
+	int			i;
+	xfs_dir2_leaf_t		*leaf;
+	xfs_dir2_leaf_tail_t	*ltp;
+	int			seeval;
+
+	da_bno = mp->m_dirleafblk;
+	if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK)) {
+		do_error("can't read block %u for directory inode %llu\n",
+			da_bno, ip->i_ino);
+		/* NOTREACHED */
+	}
+	leaf = bp->data;
+	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+	if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC ||
+	    INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) || INT_GET(leaf->hdr.info.back, ARCH_CONVERT) ||
+	    INT_GET(leaf->hdr.count, ARCH_CONVERT) < INT_GET(leaf->hdr.stale, ARCH_CONVERT) ||
+	    INT_GET(leaf->hdr.count, ARCH_CONVERT) > XFS_DIR2_MAX_LEAF_ENTS(mp) ||
+	    (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] > (char *)bestsp) {
+		do_warn("leaf block %u for directory inode %llu bad header\n",
+			da_bno, ip->i_ino);
+		libxfs_da_brelse(NULL, bp);
+		return 1;
+	}
+	seeval = dir_hash_see_all(hashtab, leaf->ents, INT_GET(leaf->hdr.count, ARCH_CONVERT),
+		INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+	if (dir_hash_check(hashtab, ip, seeval)) {
+		libxfs_da_brelse(NULL, bp);
+		return 1;
+	}
+	badtail = freetab->nents != INT_GET(ltp->bestcount, ARCH_CONVERT);
+	for (i = 0; !badtail && i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
+		freetab->ents[i].s = 1;
+		badtail = freetab->ents[i].v != INT_GET(bestsp[i], ARCH_CONVERT);
+	}
+	if (badtail) {
+		do_warn("leaf block %u for directory inode %llu bad tail\n",
+			da_bno, ip->i_ino);
+		libxfs_da_brelse(NULL, bp);
+		return 1;
+	}
+	libxfs_da_brelse(NULL, bp);
+	return 0;
+}
+
+/*
+ * Check contents of the node blocks (leaves)
+ * Looks for matching hash values for the data entries.
+ */
+int
+longform_dir2_check_node(
+	xfs_mount_t		*mp,
+	xfs_inode_t		*ip,
+	dir_hash_tab_t		*hashtab,
+	freetab_t		*freetab)
+{
+	xfs_dabuf_t		*bp;
+	xfs_dablk_t		da_bno;
+	xfs_dir2_db_t		fdb;
+	xfs_dir2_free_t		*free;
+	int			i;
+	xfs_dir2_leaf_t		*leaf;
+	xfs_fileoff_t		next_da_bno;
+	int			seeval = 0;
+	int			used;
+
+	for (da_bno = mp->m_dirleafblk, next_da_bno = 0;
+	     next_da_bno != NULLFILEOFF && da_bno < mp->m_dirfreeblk;
+	     da_bno = (xfs_dablk_t)next_da_bno) {
+		next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+		if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+			break;
+		if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp,
+				XFS_DATA_FORK)) {
+			do_error("can't read block %u for directory inode "
+				 "%llu\n",
+				da_bno, ip->i_ino);
+			/* NOTREACHED */
+		}
+		leaf = bp->data;
+		if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) {
+			if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+				libxfs_da_brelse(NULL, bp);
+				continue;
+			}
+			do_warn("unknown magic number %#x for block %u in "
+				"directory inode %llu\n",
+				INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), da_bno, ip->i_ino);
+			libxfs_da_brelse(NULL, bp);
+			return 1;
+		}
+		if (INT_GET(leaf->hdr.count, ARCH_CONVERT) < INT_GET(leaf->hdr.stale, ARCH_CONVERT) ||
+		    INT_GET(leaf->hdr.count, ARCH_CONVERT) > XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+			do_warn("leaf block %u for directory inode %llu bad "
+				"header\n",
+				da_bno, ip->i_ino);
+			libxfs_da_brelse(NULL, bp);
+			return 1;
+		}
+		seeval = dir_hash_see_all(hashtab, leaf->ents, INT_GET(leaf->hdr.count, ARCH_CONVERT),
+			INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+		libxfs_da_brelse(NULL, bp);
+		if (seeval != DIR_HASH_CK_OK)
+			return 1;
+	}
+	if (dir_hash_check(hashtab, ip, seeval))
+		return 1;
+	for (da_bno = mp->m_dirfreeblk, next_da_bno = 0;
+	     next_da_bno != NULLFILEOFF;
+	     da_bno = (xfs_dablk_t)next_da_bno) {
+		next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+		if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+			break;
+		if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp,
+				XFS_DATA_FORK)) {
+			do_error("can't read block %u for directory inode "
+				 "%llu\n",
+				da_bno, ip->i_ino);
+			/* NOTREACHED */
+		}
+		free = bp->data;
+		fdb = XFS_DIR2_DA_TO_DB(mp, da_bno);
+		if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC ||
+		    INT_GET(free->hdr.firstdb, ARCH_CONVERT) !=
+			(fdb - XFS_DIR2_FREE_FIRSTDB(mp)) *
+			XFS_DIR2_MAX_FREE_BESTS(mp) ||
+		    INT_GET(free->hdr.nvalid, ARCH_CONVERT) < INT_GET(free->hdr.nused, ARCH_CONVERT)) {
+			do_warn("free block %u for directory inode %llu bad "
+				"header\n",
+				da_bno, ip->i_ino);
+			libxfs_da_brelse(NULL, bp);
+			return 1;
+		}
+		for (i = used = 0; i < INT_GET(free->hdr.nvalid, ARCH_CONVERT); i++) {
+			if (i + INT_GET(free->hdr.firstdb, ARCH_CONVERT) >= freetab->nents ||
+			    freetab->ents[i + INT_GET(free->hdr.firstdb, ARCH_CONVERT)].v !=
+			    INT_GET(free->bests[i], ARCH_CONVERT)) {
+				do_warn("free block %u entry %i for directory "
+					"ino %llu bad\n",
+					da_bno, i, ip->i_ino);
+				libxfs_da_brelse(NULL, bp);
+				return 1;
+			}
+			used += INT_GET(free->bests[i], ARCH_CONVERT) != NULLDATAOFF;
+			freetab->ents[i + INT_GET(free->hdr.firstdb, ARCH_CONVERT)].s = 1;
+		}
+		if (used != INT_GET(free->hdr.nused, ARCH_CONVERT)) {
+			do_warn("free block %u for directory inode %llu bad "
+				"nused\n",
+				da_bno, ip->i_ino);
+			libxfs_da_brelse(NULL, bp);
+			return 1;
+		}
+		libxfs_da_brelse(NULL, bp);
+	}
+	for (i = 0; i < freetab->nents; i++) {
+		if (freetab->ents[i].s == 0) {
+			do_warn("missing freetab entry %u for directory inode "
+				"%llu\n",
+				i, ip->i_ino);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Rebuild a directory: set up.
+ * Turn it into a node-format directory with no contents in the
+ * upper area.  Also has correct freespace blocks.
+ */
+void
+longform_dir2_rebuild_setup(
+	xfs_mount_t		*mp,
+	xfs_ino_t		ino,
+	xfs_inode_t		*ip,
+	freetab_t		*freetab)
+{
+	xfs_da_args_t		args;
+	int			committed;
+	xfs_dir2_data_t		*data;
+	xfs_dabuf_t		*dbp;
+	int			error;
+	xfs_dir2_db_t		fbno;
+	xfs_dabuf_t		*fbp;
+	xfs_fsblock_t		firstblock;
+	xfs_bmap_free_t		flist;
+	xfs_dir2_free_t		*free;
+	int			i;
+	int			j;
+	xfs_dablk_t		lblkno;
+	xfs_dabuf_t		*lbp;
+	xfs_dir2_leaf_t		*leaf;
+	int			nres;
+	xfs_trans_t		*tp;
+
+	tp = libxfs_trans_alloc(mp, 0);
+	nres = XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK);
+	error = libxfs_trans_reserve(tp,
+		nres, XFS_CREATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES,
+		XFS_CREATE_LOG_COUNT);
+	if (error)
+		res_failed(error);
+	libxfs_trans_ijoin(tp, ip, 0);
+	libxfs_trans_ihold(tp, ip);
+	XFS_BMAP_INIT(&flist, &firstblock);
+	if (libxfs_da_read_buf(tp, ip, mp->m_dirdatablk, -2, &dbp,
+			XFS_DATA_FORK)) {
+		do_error("can't read block %u for directory inode %llu\n",
+			mp->m_dirdatablk, ino);
+		/* NOTREACHED */
+	}
+	if (dbp && (data = dbp->data)->hdr.magic == XFS_DIR2_BLOCK_MAGIC) {
+		xfs_dir2_block_t	*block;
+		xfs_dir2_leaf_entry_t	*blp;
+		xfs_dir2_block_tail_t	*btp;
+		int			needlog;
+		int			needscan;
+
+		INT_SET(data->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
+		block = (xfs_dir2_block_t *)data;
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+		blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+		needlog = needscan = 0;
+		libxfs_dir2_data_make_free(tp, dbp, (char *)blp - (char *)block,
+			(char *)block + mp->m_dirblksize - (char *)blp,
+			&needlog, &needscan);
+		if (needscan)
+			libxfs_dir2_data_freescan(mp, data, &needlog, NULL);
+		libxfs_da_log_buf(tp, dbp, 0, mp->m_dirblksize - 1);
+	}
+	bzero(&args, sizeof(args));
+	args.trans = tp;
+	args.dp = ip;
+	args.whichfork = XFS_DATA_FORK;
+	args.firstblock = &firstblock;
+	args.flist = &flist;
+	args.total = nres;
+	if ((error = libxfs_da_grow_inode(&args, &lblkno)) ||
+	    (error = libxfs_da_get_buf(tp, ip, lblkno, -1, &lbp, XFS_DATA_FORK))) {
+		do_error("can't add btree block to directory inode %llu\n",
+			ino);
+		/* NOTREACHED */
+	}
+	leaf = lbp->data;
+	bzero(leaf, mp->m_dirblksize);
+	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC);
+	libxfs_da_log_buf(tp, lbp, 0, mp->m_dirblksize - 1);
+	libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+	libxfs_trans_commit(tp, 0, 0);
+
+	for (i = 0; i < freetab->nents; i += XFS_DIR2_MAX_FREE_BESTS(mp)) {
+		tp = libxfs_trans_alloc(mp, 0);
+		nres = XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK);
+		error = libxfs_trans_reserve(tp,
+			nres, XFS_CREATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES,
+			XFS_CREATE_LOG_COUNT);
+		if (error)
+			res_failed(error);
+		libxfs_trans_ijoin(tp, ip, 0);
+		libxfs_trans_ihold(tp, ip);
+		XFS_BMAP_INIT(&flist, &firstblock);
+		bzero(&args, sizeof(args));
+		args.trans = tp;
+		args.dp = ip;
+		args.whichfork = XFS_DATA_FORK;
+		args.firstblock = &firstblock;
+		args.flist = &flist;
+		args.total = nres;
+		if ((error = libxfs_dir2_grow_inode(&args, XFS_DIR2_FREE_SPACE,
+						 &fbno)) ||
+		    (error = libxfs_da_get_buf(tp, ip, XFS_DIR2_DB_TO_DA(mp, fbno),
+					    -1, &fbp, XFS_DATA_FORK))) {
+			do_error("can't add free block to directory inode "
+				 "%llu\n",
+				ino);
+			/* NOTREACHED */
+		}
+		free = fbp->data;
+		bzero(free, mp->m_dirblksize);
+		INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
+		INT_SET(free->hdr.firstdb, ARCH_CONVERT, i);
+		INT_SET(free->hdr.nvalid, ARCH_CONVERT, XFS_DIR2_MAX_FREE_BESTS(mp));
+		if (i + INT_GET(free->hdr.nvalid, ARCH_CONVERT) > freetab->nents)
+			INT_SET(free->hdr.nvalid, ARCH_CONVERT, freetab->nents - i);
+		for (j = 0; j < INT_GET(free->hdr.nvalid, ARCH_CONVERT); j++) {
+			INT_SET(free->bests[j], ARCH_CONVERT, freetab->ents[i + j].v);
+			if (INT_GET(free->bests[j], ARCH_CONVERT) != NULLDATAOFF)
+				INT_MOD(free->hdr.nused, ARCH_CONVERT, +1);
+		}
+		libxfs_da_log_buf(tp, fbp, 0, mp->m_dirblksize - 1);
+		libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+		libxfs_trans_commit(tp, 0, 0);
+	}
+}
+
+/*
+ * Rebuild the entries from a single data block.
+ */
+void
+longform_dir2_rebuild_data(
+	xfs_mount_t		*mp,
+	xfs_ino_t		ino,
+	xfs_inode_t		*ip,
+	xfs_dablk_t		da_bno)
+{
+	xfs_dabuf_t		*bp;
+	xfs_dir2_block_tail_t	*btp;
+	int			committed;
+	xfs_dir2_data_t		*data;
+	xfs_dir2_db_t		dbno;
+	xfs_dir2_data_entry_t	*dep;
+	xfs_dir2_data_unused_t	*dup;
+	char			*endptr;
+	int			error;
+	xfs_dir2_free_t		*fblock;
+	xfs_dabuf_t		*fbp;
+	xfs_dir2_db_t		fdb;
+	int			fi;
+	xfs_fsblock_t		firstblock;
+	xfs_bmap_free_t		flist;
+	int			needlog;
+	int			needscan;
+	int			nres;
+	char			*ptr;
+	xfs_trans_t		*tp;
+
+	if (libxfs_da_read_buf(NULL, ip, da_bno, da_bno == 0 ? -2 : -1, &bp,
+			XFS_DATA_FORK)) {
+		do_error("can't read block %u for directory inode %llu\n",
+			da_bno, ino);
+		/* NOTREACHED */
+	}
+	if (da_bno == 0 && bp == NULL)
+		/*
+		 * The block was punched out.
+		 */
+		return;
+	ASSERT(bp);
+	dbno = XFS_DIR2_DA_TO_DB(mp, da_bno);
+	fdb = XFS_DIR2_DB_TO_FDB(mp, dbno);
+	if (libxfs_da_read_buf(NULL, ip, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp,
+			XFS_DATA_FORK)) {
+		do_error("can't read block %u for directory inode %llu\n",
+			XFS_DIR2_DB_TO_DA(mp, fdb), ino);
+		/* NOTREACHED */
+	}
+	data = malloc(mp->m_dirblksize);
+	if (!data) {
+		do_error(
+		"malloc failed in longform_dir2_rebuild_data (%u bytes)\n",
+			mp->m_dirblksize);
+		exit(1);
+	}
+	bcopy(bp->data, data, mp->m_dirblksize);
+	ptr = (char *)data->u;
+	if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)data);
+		endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+	} else
+		endptr = (char *)data + mp->m_dirblksize;
+	fblock = fbp->data;
+	fi = XFS_DIR2_DB_TO_FDINDEX(mp, dbno);
+	tp = libxfs_trans_alloc(mp, 0);
+	error = libxfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0,
+		XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+	if (error)
+		res_failed(error);
+	libxfs_trans_ijoin(tp, ip, 0);
+	libxfs_trans_ihold(tp, ip);
+	libxfs_da_bjoin(tp, bp);
+	libxfs_da_bhold(tp, bp);
+	libxfs_da_bjoin(tp, fbp);
+	libxfs_da_bhold(tp, fbp);
+	XFS_BMAP_INIT(&flist, &firstblock);
+	needlog = needscan = 0;
+	bzero(((xfs_dir2_data_t *)(bp->data))->hdr.bestfree,
+		sizeof(data->hdr.bestfree));
+	libxfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)sizeof(data->hdr),
+		mp->m_dirblksize - sizeof(data->hdr), &needlog, &needscan);
+	ASSERT(needscan == 0);
+	libxfs_dir2_data_log_header(tp, bp);
+	INT_SET(fblock->bests[fi], ARCH_CONVERT,
+		INT_GET(((xfs_dir2_data_t *)(bp->data))->hdr.bestfree[0].length, ARCH_CONVERT));
+	libxfs_dir2_free_log_bests(tp, fbp, fi, fi);
+	libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+	libxfs_trans_commit(tp, 0, 0);
+
+	while (ptr < endptr) {
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+			ptr += INT_GET(dup->length, ARCH_CONVERT);
+			continue;
+		}
+		dep = (xfs_dir2_data_entry_t *)ptr;
+		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		if (dep->name[0] == '/')
+			continue;
+		tp = libxfs_trans_alloc(mp, 0);
+		nres = XFS_CREATE_SPACE_RES(mp, dep->namelen);
+		error = libxfs_trans_reserve(tp, nres, XFS_CREATE_LOG_RES(mp), 0,
+			XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+		if (error)
+			res_failed(error);
+		libxfs_trans_ijoin(tp, ip, 0);
+		libxfs_trans_ihold(tp, ip);
+		libxfs_da_bjoin(tp, bp);
+		libxfs_da_bhold(tp, bp);
+		libxfs_da_bjoin(tp, fbp);
+		libxfs_da_bhold(tp, fbp);
+		XFS_BMAP_INIT(&flist, &firstblock);
+		error = dir_createname(mp, tp, ip, (char *)dep->name,
+			dep->namelen, INT_GET(dep->inumber, ARCH_CONVERT),
+			&firstblock, &flist, nres);
+		ASSERT(error == 0);
+		libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+		libxfs_trans_commit(tp, 0, 0);
+	}
+	libxfs_da_brelse(NULL, bp);
+	libxfs_da_brelse(NULL, fbp);
+	free(data);
+}
+
+/*
+ * Finish the rebuild of a directory.
+ * Stuff / in and then remove it, this forces the directory to end 
+ * up in the right format.
+ */
+void
+longform_dir2_rebuild_finish(
+	xfs_mount_t		*mp,
+	xfs_ino_t		ino,
+	xfs_inode_t		*ip)
+{
+	int			committed;
+	int			error;
+	xfs_fsblock_t		firstblock;
+	xfs_bmap_free_t		flist;
+	int			nres;
+	xfs_trans_t		*tp;
+
+	tp = libxfs_trans_alloc(mp, 0);
+	nres = XFS_CREATE_SPACE_RES(mp, 1);
+	error = libxfs_trans_reserve(tp, nres, XFS_CREATE_LOG_RES(mp), 0,
+		XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+	if (error)
+		res_failed(error);
+	libxfs_trans_ijoin(tp, ip, 0);
+	libxfs_trans_ihold(tp, ip);
+	XFS_BMAP_INIT(&flist, &firstblock);
+	error = dir_createname(mp, tp, ip, "/", 1, ino,
+			&firstblock, &flist, nres);
+	ASSERT(error == 0);
+	libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+	libxfs_trans_commit(tp, 0, 0);
+
+	/* could kill trailing empty data blocks here */
+
+	tp = libxfs_trans_alloc(mp, 0);
+	nres = XFS_REMOVE_SPACE_RES(mp);
+	error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0,
+		XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+	if (error)
+		res_failed(error);
+	libxfs_trans_ijoin(tp, ip, 0);
+	libxfs_trans_ihold(tp, ip);
+	XFS_BMAP_INIT(&flist, &firstblock);
+	error = dir_removename(mp, tp, ip, "/", 1, ino,
+			&firstblock, &flist, nres);
+	ASSERT(error == 0);
+	libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+	libxfs_trans_commit(tp, 0, 0);
+}
+
+/*
+ * Rebuild a directory.
+ * Remove all the non-data blocks.
+ * Re-initialize to (empty) node form.
+ * Loop over the data blocks reinserting each entry.
+ * Force the directory into the right format.
+ */
+void
+longform_dir2_rebuild(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino,
+	xfs_inode_t	*ip,
+	int		*num_illegal,
+	freetab_t	*freetab,
+	int		isblock)
+{
+	xfs_dabuf_t	*bp;
+	xfs_dablk_t	da_bno;
+	xfs_fileoff_t	next_da_bno;
+
+	do_warn("rebuilding directory inode %llu\n", ino);
+	for (da_bno = mp->m_dirleafblk, next_da_bno = isblock ? NULLFILEOFF : 0;
+	     next_da_bno != NULLFILEOFF;
+	     da_bno = (xfs_dablk_t)next_da_bno) {
+		next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+		if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+			break;
+		if (libxfs_da_get_buf(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK)) {
+			do_error("can't get block %u for directory inode "
+				 "%llu\n",
+				da_bno, ino);
+			/* NOTREACHED */
+		}
+		dir2_kill_block(mp, ip, da_bno, bp);
+	}
+	longform_dir2_rebuild_setup(mp, ino, ip, freetab);
+	for (da_bno = mp->m_dirdatablk, next_da_bno = 0;
+	     da_bno < mp->m_dirleafblk && next_da_bno != NULLFILEOFF;
+	     da_bno = (xfs_dablk_t)next_da_bno) {
+		next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+		if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+			break;
+		longform_dir2_rebuild_data(mp, ino, ip, da_bno);
+	}
+	longform_dir2_rebuild_finish(mp, ino, ip);
+	*num_illegal = 0;
+}
+
+/*
+ * succeeds or dies, inode never gets dirtied since all changes
+ * happen in file blocks.  the inode size and other core info
+ * is already correct, it's just the leaf entries that get altered.
+ * XXX above comment is wrong for v2 - need to see why it matters
+ */
+void
+longform_dir2_entry_check(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_inode_t	*ip,
+			int		*num_illegal,
+			int		*need_dot,
+			dir_stack_t	*stack,
+			ino_tree_node_t	*irec,
+			int		ino_offset)
+{
+	xfs_dir2_block_t	*block;
+	xfs_dir2_leaf_entry_t	*blp;
+	xfs_dabuf_t		*bp;
+	xfs_dir2_block_tail_t	*btp;
+	xfs_dablk_t		da_bno;
+	freetab_t		*freetab;
+	dir_hash_tab_t		*hashtab;
+	int			i;
+	int			isblock;
+	int			isleaf;
+	xfs_fileoff_t		next_da_bno;
+	int			seeval;
+	int			fixit;
+
+	*need_dot = 1;
+	freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dirblksize));
+	if (!freetab) {
+		do_error(
+		"malloc failed in longform_dir2_entry_check (%u bytes)\n",
+			FREETAB_SIZE(ip->i_d.di_size / mp->m_dirblksize));
+		exit(1);
+	}
+	freetab->naents = ip->i_d.di_size / mp->m_dirblksize;
+	freetab->nents = 0;
+	for (i = 0; i < freetab->naents; i++) {
+		freetab->ents[i].v = NULLDATAOFF;
+		freetab->ents[i].s = 0;
+	}
+	libxfs_dir2_isblock(NULL, ip, &isblock);
+	libxfs_dir2_isleaf(NULL, ip, &isleaf);
+	hashtab = dir_hash_init(ip->i_d.di_size);
+	for (da_bno = 0, next_da_bno = 0;
+	     next_da_bno != NULLFILEOFF && da_bno < mp->m_dirleafblk;
+	     da_bno = (xfs_dablk_t)next_da_bno) {
+		next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+		if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+			break;
+		if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp,
+				XFS_DATA_FORK)) {
+			do_error("can't read block %u for directory inode "
+				 "%llu\n",
+				da_bno, ino);
+			/* NOTREACHED */
+		}
+		longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
+			stack, irec, ino_offset, &bp, hashtab, &freetab, da_bno,
+			isblock);
+		/* it releases the buffer unless isblock is set */
+	}
+	fixit = (*num_illegal != 0) || dir2_is_badino(ino);
+	if (isblock) {
+		ASSERT(bp);
+		block = bp->data;
+		btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+		blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+		seeval = dir_hash_see_all(hashtab, blp, INT_GET(btp->count, ARCH_CONVERT), INT_GET(btp->stale, ARCH_CONVERT));
+		if (dir_hash_check(hashtab, ip, seeval))
+			fixit |= 1;
+		libxfs_da_brelse(NULL, bp);
+	} else if (isleaf) {
+		fixit |= longform_dir2_check_leaf(mp, ip, hashtab, freetab);
+	} else {
+		fixit |= longform_dir2_check_node(mp, ip, hashtab, freetab);
+	}
+	dir_hash_done(hashtab);
+	if (!no_modify && fixit)
+		longform_dir2_rebuild(mp, ino, ip, num_illegal, freetab,
+			isblock);
+	free(freetab);
+}
+
+/*
+ * shortform directory processing routines -- entry verification and
+ * bad entry deletion (pruning).
+ */
+void
+shortform_dir_entry_check(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_inode_t	*ip,
+			int		*ino_dirty,
+			dir_stack_t	*stack,
+			ino_tree_node_t	*current_irec,
+			int		current_ino_offset)
+{
+	xfs_ino_t		lino;
+	xfs_ino_t		parent;
+	xfs_dir_shortform_t	*sf;
+	xfs_dir_sf_entry_t	*sf_entry, *next_sfe, *tmp_sfe;
+	xfs_ifork_t		*ifp;
+	ino_tree_node_t		*irec;
+	int			max_size;
+	int			ino_offset;
+	int			i;
+	int			junkit;
+	int			tmp_len;
+	int			tmp_elen;
+	int			bad_sfnamelen;
+	int			namelen;
+	int			bytes_deleted;
+	char			fname[MAXNAMELEN + 1];
+
+	ifp = &ip->i_df;
+	sf = (xfs_dir_shortform_t *) ifp->if_u1.if_data;
+	*ino_dirty = 0;
+	bytes_deleted = 0;
+
+	max_size = ifp->if_bytes;
+	ASSERT(ip->i_d.di_size <= ifp->if_bytes);
+
+	/*
+	 * no '.' entry in shortform dirs, just bump up ref count by 1
+	 * '..' was already (or will be) accounted for and checked when
+	 * the directory is reached or will be taken care of when the
+	 * directory is moved to orphanage.
+	 */
+	add_inode_ref(current_irec, current_ino_offset);
+
+	/*
+	 * now run through entries, stop at first bad entry, don't need
+	 * to skip over '..' since that's encoded in its own field and
+	 * no need to worry about '.' since it doesn't exist.
+	 */
+	sf_entry = next_sfe = &sf->list[0];
+	if (sf == NULL) { 
+		junkit = 1;
+		do_warn("shortform dir inode %llu has null data entries \n", ino);
+
+		}
+	else {
+	   for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && max_size >
+					(__psint_t)next_sfe - (__psint_t)sf;
+			sf_entry = next_sfe, i++)  {
+		junkit = 0;
+		bad_sfnamelen = 0;
+		tmp_sfe = NULL;
+
+		XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT);
+
+		namelen = sf_entry->namelen;
+
+		ASSERT(no_modify || namelen > 0);
+
+		if (no_modify && namelen == 0)  {
+			/*
+			 * if we're really lucky, this is
+			 * the last entry in which case we
+			 * can use the dir size to set the
+			 * namelen value.  otherwise, forget
+			 * it because we're not going to be
+			 * able to find the next entry.
+			 */
+			bad_sfnamelen = 1;
+
+			if (i == INT_GET(sf->hdr.count, ARCH_CONVERT) - 1)  {
+				namelen = ip->i_d.di_size -
+					((__psint_t) &sf_entry->name[0] -
+					 (__psint_t) sf);
+			} else  {
+				/*
+				 * don't process the rest of the directory,
+				 * break out of processing looop
+				 */
+				break;
+			}
+		} else if (no_modify && (__psint_t) sf_entry - (__psint_t) sf +
+				+ XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)
+				> ip->i_d.di_size)  {
+			bad_sfnamelen = 1;
+
+			if (i == INT_GET(sf->hdr.count, ARCH_CONVERT) - 1)  {
+				namelen = ip->i_d.di_size -
+					((__psint_t) &sf_entry->name[0] -
+					 (__psint_t) sf);
+			} else  {
+				/*
+				 * don't process the rest of the directory,
+				 * break out of processing looop
+				 */
+				break;
+			}
+		}
+
+		bcopy(sf_entry->name, fname, sf_entry->namelen);
+		fname[sf_entry->namelen] = '\0';
+
+		ASSERT(no_modify || lino != NULLFSINO);
+		ASSERT(no_modify || !verify_inum(mp, lino));
+
+		/*
+		 * special case the "lost+found" entry if it's pointing
+		 * to where we think lost+found should be.  if that's
+		 * the case, that's the one we created in phase 6.
+		 * just skip it.  no need to process it and its ..
+		 * link is already accounted for.  Also skip entries
+		 * with bogus inode numbers if we're in no modify mode.
+		 */
+
+		if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0
+				|| no_modify && verify_inum(mp, lino))  {
+			next_sfe = (xfs_dir_sf_entry_t *)
+				((__psint_t) sf_entry +
+				XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry));
+			continue;
+		}
+
+		irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+					XFS_INO_TO_AGINO(mp, lino));
+
+		if (irec == NULL && no_modify)  {
+			do_warn(
+"entry \"%s\" in shortform dir %llu references non-existent ino %llu\n",
+				fname, ino, lino);
+			do_warn("would junk entry\n");
+			continue;
+		}
+
+		ASSERT(irec != NULL);
+
+		ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
+
+		/*
+		 * if it's a free inode, blow out the entry.
+		 * by now, any inode that we think is free
+		 * really is free.
+		 */
+		if (is_inode_free(irec, ino_offset))  {
+			/*
+			 * don't complain if this entry points to the old
+			 * and now-free lost+found inode
+			 */
+			if (verbose || no_modify || lino != old_orphanage_ino)
+				do_warn(
+	"entry \"%s\" in shortform dir inode %llu points to free inode %llu\n",
+					fname, ino, lino);
+
+			if (!no_modify)  {
+				junkit = 1;
+			} else  {
+				do_warn("would junk entry \"%s\"\n",
+					fname);
+			}
+		} else if (!inode_isadir(irec, ino_offset))  {
+			/*
+			 * check easy case first, regular inode, just bump
+			 * the link count and continue
+			 */
+			add_inode_reached(irec, ino_offset);
+
+			next_sfe = (xfs_dir_sf_entry_t *)
+				((__psint_t) sf_entry +
+				XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry));
+			continue;
+		} else  {
+			parent = get_inode_parent(irec, ino_offset);
+
+			/*
+			 * bump up the link counts in parent and child.
+			 * directory but if the link doesn't agree with
+			 * the .. in the child, blow out the entry
+			 */
+			if (is_inode_reached(irec, ino_offset))  {
+				junkit = 1;
+				do_warn(
+	"entry \"%s\" in dir %llu references already connected dir ino %llu,\n",
+					fname, ino, lino);
+			} else if (parent == ino)  {
+				add_inode_reached(irec, ino_offset);
+				add_inode_ref(current_irec, current_ino_offset);
+
+				if (!is_inode_refchecked(lino, irec,
+						ino_offset))
+					push_dir(stack, lino);
+			} else  {
+				junkit = 1;
+				do_warn(
+"entry \"%s\" in dir %llu not consistent with .. value (%llu) in dir ino %llu,\n",
+					fname, ino, parent, lino);
+			}
+		}
+
+		if (junkit)  {
+			if (!no_modify)  {
+				tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry);
+				tmp_sfe = (xfs_dir_sf_entry_t *)
+					((__psint_t) sf_entry + tmp_elen);
+				tmp_len = max_size - ((__psint_t) tmp_sfe
+							- (__psint_t) sf);
+				max_size -= tmp_elen;
+				bytes_deleted += tmp_elen;
+
+				memmove(sf_entry, tmp_sfe, tmp_len);
+
+				INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+				bzero((void *) ((__psint_t) sf_entry + tmp_len),
+						tmp_elen);
+
+				/*
+				 * set the tmp value to the current
+				 * pointer so we'll process the entry
+				 * we just moved up
+				 */
+				tmp_sfe = sf_entry;
+
+				/*
+				 * WARNING:  drop the index i by one
+				 * so it matches the decremented count for
+				 * accurate comparisons in the loop test
+				 */
+				i--;
+
+				*ino_dirty = 1;
+
+				if (verbose || lino != old_orphanage_ino)
+					do_warn(
+			"junking entry \"%s\" in directory inode %llu\n",
+						fname, lino);
+			} else  {
+				do_warn("would junk entry \"%s\"\n", fname);
+			}
+		}
+
+		/*
+		 * go onto next entry unless we've just junked an
+		 * entry in which the current entry pointer points
+		 * to an unprocessed entry.  have to take into entries
+		 * with bad namelen into account in no modify mode since we
+		 * calculate size based on next_sfe.
+		 */
+		ASSERT(no_modify || bad_sfnamelen == 0);
+
+		next_sfe = (tmp_sfe == NULL)
+			? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry
+				+ ((!bad_sfnamelen)
+					? XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)
+					: sizeof(xfs_dir_sf_entry_t) - 1
+						+ namelen))
+			: tmp_sfe;
+	    }
+	}
+
+	/*
+	 * sync up sizes if required
+	 */
+	if (*ino_dirty)  {
+		ASSERT(bytes_deleted > 0);
+		ASSERT(!no_modify);
+		libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
+		ip->i_d.di_size -= bytes_deleted;
+	}
+
+	if (ip->i_d.di_size != ip->i_df.if_bytes)  {
+		ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
+				((__psint_t) next_sfe - (__psint_t) sf));
+		ip->i_d.di_size = (xfs_fsize_t)
+				((__psint_t) next_sfe - (__psint_t) sf);
+		do_warn(
+		"setting size to %lld bytes to reflect junked entries\n",
+				ip->i_d.di_size);
+		*ino_dirty = 1;
+	}
+}
+
+/* ARGSUSED */
+void
+prune_sf_dir_entry(xfs_mount_t *mp, xfs_ino_t ino, xfs_inode_t *ip)
+{
+				/* REFERENCED */
+	xfs_ino_t		lino;
+	xfs_dir_shortform_t	*sf;
+	xfs_dir_sf_entry_t	*sf_entry, *next_sfe, *tmp_sfe;
+	xfs_ifork_t		*ifp;
+	int			max_size;
+	int			i;
+	int			tmp_len;
+	int			tmp_elen;
+	int			bytes_deleted;
+	char			fname[MAXNAMELEN + 1];
+
+	ifp = &ip->i_df;
+	sf = (xfs_dir_shortform_t *) ifp->if_u1.if_data;
+	bytes_deleted = 0;
+
+	max_size = ifp->if_bytes;
+	ASSERT(ip->i_d.di_size <= ifp->if_bytes);
+
+	/*
+	 * now run through entries and delete every bad entry
+	 */
+	sf_entry = next_sfe = &sf->list[0];
+
+	for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && max_size >
+					(__psint_t)next_sfe - (__psint_t)sf;
+			sf_entry = next_sfe, i++)  {
+		tmp_sfe = NULL;
+
+		XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT);
+
+		bcopy(sf_entry->name, fname, sf_entry->namelen);
+		fname[sf_entry->namelen] = '\0';
+
+		if (sf_entry->name[0] == '/')  {
+			if (!no_modify)  {
+				tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry);
+				tmp_sfe = (xfs_dir_sf_entry_t *)
+					((__psint_t) sf_entry + tmp_elen);
+				tmp_len = max_size - ((__psint_t) tmp_sfe
+							- (__psint_t) sf);
+				max_size -= tmp_elen;
+				bytes_deleted += tmp_elen;
+
+				memmove(sf_entry, tmp_sfe, tmp_len);
+
+				INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+				bzero((void *) ((__psint_t) sf_entry + tmp_len),
+						tmp_elen);
+
+				/*
+				 * set the tmp value to the current
+				 * pointer so we'll process the entry
+				 * we just moved up
+				 */
+				tmp_sfe = sf_entry;
+
+				/*
+				 * WARNING:  drop the index i by one
+				 * so it matches the decremented count for
+				 * accurate comparisons in the loop test
+				 */
+				i--;
+			}
+		}
+		next_sfe = (tmp_sfe == NULL)
+			? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry +
+				XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry))
+			: tmp_sfe;
+	}
+
+	/*
+	 * sync up sizes if required
+	 */
+	if (bytes_deleted > 0)  {
+		libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
+		ip->i_d.di_size -= bytes_deleted;
+	}
+
+	if (ip->i_d.di_size != ip->i_df.if_bytes)  {
+		ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
+				((__psint_t) next_sfe - (__psint_t) sf));
+		ip->i_d.di_size = (xfs_fsize_t)
+				((__psint_t) next_sfe - (__psint_t) sf);
+		do_warn(
+		"setting size to %lld bytes to reflect junked entries\n",
+				ip->i_d.di_size);
+	}
+}
+
+/*
+ * shortform directory v2 processing routines -- entry verification and
+ * bad entry deletion (pruning).
+ */
+void
+shortform_dir2_entry_check(xfs_mount_t	*mp,
+			xfs_ino_t	ino,
+			xfs_inode_t	*ip,
+			int		*ino_dirty,
+			dir_stack_t	*stack,
+			ino_tree_node_t	*current_irec,
+			int		current_ino_offset)
+{
+	xfs_ino_t		lino;
+	xfs_ino_t		parent;
+	xfs_dir2_sf_t		*sfp;
+	xfs_dir2_sf_entry_t	*sfep, *next_sfep, *tmp_sfep;
+	xfs_ifork_t		*ifp;
+	ino_tree_node_t		*irec;
+	int			max_size;
+	int			ino_offset;
+	int			i;
+	int			junkit;
+	int			tmp_len;
+	int			tmp_elen;
+	int			bad_sfnamelen;
+	int			namelen;
+	int			bytes_deleted;
+	char			fname[MAXNAMELEN + 1];
+	int			i8;
+
+	ifp = &ip->i_df;
+	sfp = (xfs_dir2_sf_t *) ifp->if_u1.if_data;
+	*ino_dirty = 0;
+	bytes_deleted = i8 = 0;
+
+	max_size = ifp->if_bytes;
+	ASSERT(ip->i_d.di_size <= ifp->if_bytes);
+
+	/*
+	 * no '.' entry in shortform dirs, just bump up ref count by 1
+	 * '..' was already (or will be) accounted for and checked when
+	 * the directory is reached or will be taken care of when the
+	 * directory is moved to orphanage.
+	 */
+	add_inode_ref(current_irec, current_ino_offset);
+
+	/*
+	 * now run through entries, stop at first bad entry, don't need
+	 * to skip over '..' since that's encoded in its own field and
+	 * no need to worry about '.' since it doesn't exist.
+	 */
+	sfep = next_sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+
+	for (i = 0; i < INT_GET(sfp->hdr.count, ARCH_CONVERT) && max_size >
+					(__psint_t)next_sfep - (__psint_t)sfp;
+			sfep = next_sfep, i++)  {
+		junkit = 0;
+		bad_sfnamelen = 0;
+		tmp_sfep = NULL;
+
+		lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+
+		namelen = sfep->namelen;
+
+		ASSERT(no_modify || namelen > 0);
+
+		if (no_modify && namelen == 0)  {
+			/*
+			 * if we're really lucky, this is
+			 * the last entry in which case we
+			 * can use the dir size to set the
+			 * namelen value.  otherwise, forget
+			 * it because we're not going to be
+			 * able to find the next entry.
+			 */
+			bad_sfnamelen = 1;
+
+			if (i == INT_GET(sfp->hdr.count, ARCH_CONVERT) - 1)  {
+				namelen = ip->i_d.di_size -
+					((__psint_t) &sfep->name[0] -
+					 (__psint_t) sfp);
+			} else  {
+				/*
+				 * don't process the rest of the directory,
+				 * break out of processing loop
+				 */
+				break;
+			}
+		} else if (no_modify && (__psint_t) sfep - (__psint_t) sfp +
+				+ XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep)
+				> ip->i_d.di_size)  {
+			bad_sfnamelen = 1;
+
+			if (i == INT_GET(sfp->hdr.count, ARCH_CONVERT) - 1)  {
+				namelen = ip->i_d.di_size -
+					((__psint_t) &sfep->name[0] -
+					 (__psint_t) sfp);
+			} else  {
+				/*
+				 * don't process the rest of the directory,
+				 * break out of processing loop
+				 */
+				break;
+			}
+		}
+
+		bcopy(sfep->name, fname, sfep->namelen);
+		fname[sfep->namelen] = '\0';
+
+		ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
+		ASSERT(no_modify || !verify_inum(mp, lino));
+
+		/*
+		 * special case the "lost+found" entry if it's pointing
+		 * to where we think lost+found should be.  if that's
+		 * the case, that's the one we created in phase 6.
+		 * just skip it.  no need to process it and its ..
+		 * link is already accounted for.  Also skip entries
+		 * with bogus inode numbers if we're in no modify mode.
+		 */
+
+		if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0
+				|| no_modify && verify_inum(mp, lino))  {
+			next_sfep = (xfs_dir2_sf_entry_t *)
+				((__psint_t) sfep +
+				XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep));
+			continue;
+		}
+
+		irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+					XFS_INO_TO_AGINO(mp, lino));
+
+		if (irec == NULL && no_modify)  {
+			do_warn("entry \"%s\" in shortform directory %llu "
+				"references non-existent inode %llu\n",
+				fname, ino, lino);
+			do_warn("would junk entry\n");
+			continue;
+		}
+
+		ASSERT(irec != NULL);
+
+		ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
+
+		/*
+		 * if it's a free inode, blow out the entry.
+		 * by now, any inode that we think is free
+		 * really is free.
+		 */
+		if (is_inode_free(irec, ino_offset))  {
+			/*
+			 * don't complain if this entry points to the old
+			 * and now-free lost+found inode
+			 */
+			if (verbose || no_modify || lino != old_orphanage_ino)
+				do_warn("entry \"%s\" in shortform directory "
+					"inode %llu points to free inode "
+					"%llu\n",
+					fname, ino, lino);
+
+			if (!no_modify)  {
+				junkit = 1;
+			} else  {
+				do_warn("would junk entry \"%s\"\n",
+					fname);
+			}
+		} else if (!inode_isadir(irec, ino_offset))  {
+			/*
+			 * check easy case first, regular inode, just bump
+			 * the link count and continue
+			 */
+			add_inode_reached(irec, ino_offset);
+
+			next_sfep = (xfs_dir2_sf_entry_t *)
+				((__psint_t) sfep +
+				XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep));
+			continue;
+		} else  {
+			parent = get_inode_parent(irec, ino_offset);
+
+			/*
+			 * bump up the link counts in parent and child.
+			 * directory but if the link doesn't agree with
+			 * the .. in the child, blow out the entry
+			 */
+			if (is_inode_reached(irec, ino_offset))  {
+				junkit = 1;
+				do_warn("entry \"%s\" in directory inode %llu "
+					"references already connected inode "
+					"%llu,\n",
+					fname, ino, lino);
+			} else if (parent == ino)  {
+				add_inode_reached(irec, ino_offset);
+				add_inode_ref(current_irec, current_ino_offset);
+
+				if (!is_inode_refchecked(lino, irec,
+						ino_offset))
+					push_dir(stack, lino);
+			} else  {
+				junkit = 1;
+				do_warn("entry \"%s\" in directory inode %llu "
+					"not consistent with .. value (%llu) "
+					"in inode %llu,\n",
+					fname, ino, parent, lino);
+			}
+		}
+
+		if (junkit)  {
+			if (!no_modify)  {
+				tmp_elen = XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep);
+				tmp_sfep = (xfs_dir2_sf_entry_t *)
+					((__psint_t) sfep + tmp_elen);
+				tmp_len = max_size - ((__psint_t) tmp_sfep
+							- (__psint_t) sfp);
+				max_size -= tmp_elen;
+				bytes_deleted += tmp_elen;
+
+				memmove(sfep, tmp_sfep, tmp_len);
+
+				INT_MOD(sfp->hdr.count, ARCH_CONVERT, -1);
+				bzero((void *) ((__psint_t) sfep + tmp_len),
+						tmp_elen);
+
+				/*
+				 * set the tmp value to the current
+				 * pointer so we'll process the entry
+				 * we just moved up
+				 */
+				tmp_sfep = sfep;
+
+				/*
+				 * WARNING:  drop the index i by one
+				 * so it matches the decremented count for
+				 * accurate comparisons in the loop test
+				 */
+				i--;
+
+				*ino_dirty = 1;
+
+				if (verbose || lino != old_orphanage_ino)
+					do_warn("junking entry \"%s\" in "
+						"directory inode %llu\n",
+						fname, lino);
+			} else  {
+				do_warn("would junk entry \"%s\"\n", fname);
+			}
+		} else if (lino > XFS_DIR2_MAX_SHORT_INUM)
+			i8++;
+
+		/*
+		 * go onto next entry unless we've just junked an
+		 * entry in which the current entry pointer points
+		 * to an unprocessed entry.  have to take into entries
+		 * with bad namelen into account in no modify mode since we
+		 * calculate size based on next_sfep.
+		 */
+		ASSERT(no_modify || bad_sfnamelen == 0);
+
+		next_sfep = (tmp_sfep == NULL)
+			? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep
+				+ ((!bad_sfnamelen)
+					? XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep)
+					: XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, namelen)))
+			: tmp_sfep;
+	}
+
+	if (sfp->hdr.i8count != i8) {
+		if (no_modify) {
+			do_warn("would fix i8count in inode %llu\n", ino);
+		} else {
+			if (i8 == 0) {
+				tmp_sfep = next_sfep;
+				process_sf_dir2_fixi8(sfp, &tmp_sfep);
+				bytes_deleted +=
+					(__psint_t)next_sfep -
+					(__psint_t)tmp_sfep;
+				next_sfep = tmp_sfep;
+			} else
+				sfp->hdr.i8count = i8;
+			*ino_dirty = 1;
+			do_warn("fixing i8count in inode %llu\n", ino);
+		}
+	}
+
+	/*
+	 * sync up sizes if required
+	 */
+	if (*ino_dirty)  {
+		ASSERT(bytes_deleted > 0);
+		ASSERT(!no_modify);
+		libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
+		ip->i_d.di_size -= bytes_deleted;
+	}
+
+	if (ip->i_d.di_size != ip->i_df.if_bytes)  {
+		ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
+				((__psint_t) next_sfep - (__psint_t) sfp));
+		ip->i_d.di_size = (xfs_fsize_t)
+				((__psint_t) next_sfep - (__psint_t) sfp);
+		do_warn("setting size to %lld bytes to reflect junked "
+			"entries\n",
+			ip->i_d.di_size);
+		*ino_dirty = 1;
+	}
+}
+
+/*
+ * processes all directories reachable via the inodes on the stack
+ * returns 0 if things are good, 1 if there's a problem
+ */
+void
+process_dirstack(xfs_mount_t *mp, dir_stack_t *stack)
+{
+	xfs_bmap_free_t		flist;
+	xfs_fsblock_t		first;
+	xfs_ino_t		ino;
+	xfs_inode_t		*ip;
+	xfs_trans_t		*tp;
+	xfs_dahash_t		hashval;
+	ino_tree_node_t		*irec;
+	int			ino_offset, need_dot, committed;
+	int			dirty, num_illegal, error, nres;
+
+	/*
+	 * pull directory inode # off directory stack
+	 *
+	 * open up directory inode, check all entries,
+	 * then call prune_dir_entries to remove all
+	 * remaining illegal directory entries.
+	 */
+
+	while ((ino = pop_dir(stack)) != NULLFSINO)  {
+		irec = find_inode_rec(XFS_INO_TO_AGNO(mp, ino),
+					XFS_INO_TO_AGINO(mp, ino));
+		ASSERT(irec != NULL);
+
+		ino_offset = XFS_INO_TO_AGINO(mp, ino) - irec->ino_startnum;
+
+		ASSERT(!is_inode_refchecked(ino, irec, ino_offset));
+
+		if (error = libxfs_iget(mp, NULL, ino, 0, &ip, 0))  {
+			if (!no_modify)
+				do_error("couldn't map inode %llu, err = %d\n",
+					ino, error);
+			else  {
+				do_warn("couldn't map inode %llu, err = %d\n",
+					ino, error);
+				/*
+				 * see below for what we're doing if this
+				 * is root.  Why do we need to do this here?
+				 * to ensure that the root doesn't show up
+				 * as being disconnected in the no_modify case.
+				 */
+				if (mp->m_sb.sb_rootino == ino)  {
+					add_inode_reached(irec, 0);
+					add_inode_ref(irec, 0);
+				}
+			}
+
+			add_inode_refchecked(ino, irec, 0);
+			continue;
+		}
+
+		need_dot = dirty = num_illegal = 0;
+
+		if (mp->m_sb.sb_rootino == ino)  {
+			/*
+			 * mark root inode reached and bump up
+			 * link count for root inode to account
+			 * for '..' entry since the root inode is
+			 * never reached by a parent.  we know
+			 * that root's '..' is always good --
+			 * guaranteed by phase 3 and/or below.
+			 */
+			add_inode_reached(irec, ino_offset);
+			/*
+			 * account for link for the orphanage
+			 * "lost+found".  if we're running in
+			 * modify mode and it already existed,
+			 * we deleted it so it's '..' reference
+			 * never got counted.  so add it here if
+			 * we're going to create lost+found.
+			 *
+			 * if we're running in no_modify mode,
+			 * we never deleted lost+found and we're
+			 * not going to create it so do nothing.
+			 *
+			 * either way, the counts will match when
+			 * we look at the root inode's nlinks
+			 * field and compare that to our incore
+			 * count in phase 7.
+			 */
+			if (!no_modify)
+				add_inode_ref(irec, ino_offset);
+		}
+
+		add_inode_refchecked(ino, irec, ino_offset);
+
+		/*
+		 * look for bogus entries
+		 */
+		switch (ip->i_d.di_format)  {
+		case XFS_DINODE_FMT_EXTENTS:
+		case XFS_DINODE_FMT_BTREE:
+			/*
+			 * also check for missing '.' in longform dirs.
+			 * missing .. entries are added if required when
+			 * the directory is connected to lost+found. but
+			 * we need to create '.' entries here.
+			 */
+			if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+				longform_dir2_entry_check(mp, ino, ip,
+							&num_illegal, &need_dot,
+							stack, irec,
+							ino_offset);
+			else
+				longform_dir_entry_check(mp, ino, ip,
+							&num_illegal, &need_dot,
+							stack, irec,
+							ino_offset);
+			break;
+		case XFS_DINODE_FMT_LOCAL:
+			tp = libxfs_trans_alloc(mp, 0);
+			/*
+			 * using the remove reservation is overkill
+			 * since at most we'll only need to log the
+			 * inode but it's easier than wedging a
+			 * new define in ourselves.
+			 */
+			nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
+			error = libxfs_trans_reserve(tp, nres,
+					XFS_REMOVE_LOG_RES(mp), 0,
+					XFS_TRANS_PERM_LOG_RES,
+					XFS_REMOVE_LOG_COUNT);
+			if (error)
+				res_failed(error);
+
+			libxfs_trans_ijoin(tp, ip, 0);
+			libxfs_trans_ihold(tp, ip);
+
+			if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+				shortform_dir2_entry_check(mp, ino, ip, &dirty,
+							stack, irec,
+							ino_offset);
+			else
+				shortform_dir_entry_check(mp, ino, ip, &dirty,
+							stack, irec,
+							ino_offset);
+
+			ASSERT(dirty == 0 || dirty && !no_modify);
+			if (dirty)  {
+				libxfs_trans_log_inode(tp, ip,
+					XFS_ILOG_CORE | XFS_ILOG_DDATA);
+				libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+						|XFS_TRANS_SYNC, 0);
+			} else  {
+				libxfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+			}
+			break;
+		default:
+			break;
+		}
+
+		hashval = 0;
+
+		if (!no_modify && !orphanage_entered &&
+		    ino == mp->m_sb.sb_rootino) {
+			do_warn("re-entering %s into root directory\n",
+				ORPHANAGE);
+			tp = libxfs_trans_alloc(mp, 0);
+			nres = XFS_MKDIR_SPACE_RES(mp, strlen(ORPHANAGE));
+			error = libxfs_trans_reserve(tp, nres,
+					XFS_MKDIR_LOG_RES(mp), 0,
+					XFS_TRANS_PERM_LOG_RES,
+					XFS_MKDIR_LOG_COUNT);
+			if (error)
+				res_failed(error);
+			libxfs_trans_ijoin(tp, ip, 0);
+			libxfs_trans_ihold(tp, ip);
+			XFS_BMAP_INIT(&flist, &first);
+			if (error = dir_createname(mp, tp, ip, ORPHANAGE,
+						strlen(ORPHANAGE),
+						orphanage_ino, &first, &flist,
+						nres))
+				do_error("can't make %s entry in root inode "
+					 "%llu, createname error %d\n",
+					ORPHANAGE, ino, error);
+			libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+			error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+			ASSERT(error == 0);
+			libxfs_trans_commit(tp,
+				XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_SYNC, 0);
+			orphanage_entered = 1;
+		}
+
+		/*
+		 * if we have to create a .. for /, do it now *before*
+		 * we delete the bogus entries, otherwise the directory
+		 * could transform into a shortform dir which would
+		 * probably cause the simulation to choke.  Even
+		 * if the illegal entries get shifted around, it's ok
+		 * because the entries are structurally intact and in
+		 * in hash-value order so the simulation won't get confused
+		 * if it has to move them around.
+		 */
+		if (!no_modify && need_root_dotdot &&
+				ino == mp->m_sb.sb_rootino)  {
+			ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
+
+			do_warn("recreating root directory .. entry\n");
+
+			tp = libxfs_trans_alloc(mp, 0);
+			ASSERT(tp != NULL);
+
+			nres = XFS_MKDIR_SPACE_RES(mp, 2);
+			error = libxfs_trans_reserve(tp, nres,
+					XFS_MKDIR_LOG_RES(mp),
+					0,
+					XFS_TRANS_PERM_LOG_RES,
+					XFS_MKDIR_LOG_COUNT);
+
+			if (error)
+				res_failed(error);
+
+			libxfs_trans_ijoin(tp, ip, 0);
+			libxfs_trans_ihold(tp, ip);
+
+			XFS_BMAP_INIT(&flist, &first);
+
+			if (error = dir_createname(mp, tp, ip, "..", 2,
+					ip->i_ino, &first, &flist, nres))
+				do_error(
+"can't make \"..\" entry in root inode %llu, createname error %d\n",
+					ino, error);
+
+			libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+			error = libxfs_bmap_finish(&tp, &flist, first,
+					&committed);
+			ASSERT(error == 0);
+			libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+					|XFS_TRANS_SYNC, 0);
+
+			need_root_dotdot = 0;
+		} else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
+			do_warn("would recreate root directory .. entry\n");
+		}
+
+		/*
+		 * delete any illegal entries -- which should only exist
+		 * if the directory is a longform directory.  bogus
+		 * shortform directory entries were deleted in phase 4.
+		 */
+		if (!no_modify && num_illegal > 0)  {
+			ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
+			ASSERT(!XFS_SB_VERSION_HASDIRV2(&mp->m_sb));
+
+			while (num_illegal > 0 && ip->i_d.di_format !=
+					XFS_DINODE_FMT_LOCAL)  {
+				prune_lf_dir_entry(mp, ino, ip, &hashval);
+				num_illegal--;
+			}
+
+			/*
+			 * handle case where we've deleted so many
+			 * entries that the directory has changed from
+			 * a longform to a shortform directory.  have
+			 * to allocate a transaction since we're working
+			 * with the incore data fork.
+			 */
+			if (num_illegal > 0)  {
+				ASSERT(ip->i_d.di_format ==
+					XFS_DINODE_FMT_LOCAL);
+				tp = libxfs_trans_alloc(mp, 0);
+				/*
+				 * using the remove reservation is overkill
+				 * since at most we'll only need to log the
+				 * inode but it's easier than wedging a
+				 * new define in ourselves.  10 block fs
+				 * space reservation is also overkill but
+				 * what the heck...
+				 */
+				nres = XFS_REMOVE_SPACE_RES(mp);
+				error = libxfs_trans_reserve(tp, nres,
+						XFS_REMOVE_LOG_RES(mp), 0,
+						XFS_TRANS_PERM_LOG_RES,
+						XFS_REMOVE_LOG_COUNT);
+				if (error)
+					res_failed(error);
+
+				libxfs_trans_ijoin(tp, ip, 0);
+				libxfs_trans_ihold(tp, ip);
+
+				prune_sf_dir_entry(mp, ino, ip);
+
+				libxfs_trans_log_inode(tp, ip,
+						XFS_ILOG_CORE | XFS_ILOG_DDATA);
+				ASSERT(error == 0);
+				libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+						|XFS_TRANS_SYNC, 0);
+			}
+		}
+
+		/*
+		 * if we need to create the '.' entry, do so only if
+		 * the directory is a longform dir.  it it's been
+		 * turned into a shortform dir, then the inode is ok
+		 * since shortform dirs have no '.' entry and the inode
+		 * has already been committed by prune_lf_dir_entry().
+		 */
+		if (need_dot)  {
+			/*
+			 * bump up our link count but don't
+			 * bump up the inode link count.  chances
+			 * are good that even though we lost '.'
+			 * the inode link counts reflect '.' so
+			 * leave the inode link count alone and if
+			 * it turns out to be wrong, we'll catch
+			 * that in phase 7.
+			 */
+			add_inode_ref(irec, ino_offset);
+
+			if (no_modify)  {
+				do_warn(
+	"would create missing \".\" entry in dir ino %llu\n",
+					ino);
+			} else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)  {
+				/*
+				 * need to create . entry in longform dir.
+				 */
+				do_warn(
+	"creating missing \".\" entry in dir ino %llu\n",
+					ino);
+
+				tp = libxfs_trans_alloc(mp, 0);
+				ASSERT(tp != NULL);
+
+				nres = XFS_MKDIR_SPACE_RES(mp, 1);
+				error = libxfs_trans_reserve(tp, nres,
+						XFS_MKDIR_LOG_RES(mp),
+						0,
+						XFS_TRANS_PERM_LOG_RES,
+						XFS_MKDIR_LOG_COUNT);
+
+				if (error)
+					res_failed(error);
+
+				libxfs_trans_ijoin(tp, ip, 0);
+				libxfs_trans_ihold(tp, ip);
+
+				XFS_BMAP_INIT(&flist, &first);
+
+				if (error = dir_createname(mp, tp, ip, ".",
+						1, ip->i_ino, &first, &flist,
+						nres))
+					do_error(
+	"can't make \".\" entry in dir ino %llu, createname error %d\n",
+						ino, error);
+
+				libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+				error = libxfs_bmap_finish(&tp, &flist, first,
+						&committed);
+				ASSERT(error == 0);
+				libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+						|XFS_TRANS_SYNC, 0);
+			}
+		}
+
+		libxfs_iput(ip, 0);
+	}
+}
+
+/*
+ * mark realtime bitmap and summary inodes as reached.
+ * quota inode will be marked here as well
+ */
+void
+mark_standalone_inodes(xfs_mount_t *mp)
+{
+	ino_tree_node_t		*irec;
+	int			offset;
+
+	irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
+			XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
+
+	ASSERT(irec != NULL);
+
+	offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
+			irec->ino_startnum;
+
+	add_inode_reached(irec, offset);
+
+	irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
+			XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
+
+	offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) - 
+			irec->ino_startnum;
+
+	ASSERT(irec != NULL);
+
+	add_inode_reached(irec, offset);
+
+	if (fs_quotas)  {
+		if (mp->m_sb.sb_uquotino
+				&& mp->m_sb.sb_uquotino != NULLFSINO)  {
+			irec = find_inode_rec(XFS_INO_TO_AGNO(mp,
+						mp->m_sb.sb_uquotino),
+				XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
+			offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
+					- irec->ino_startnum;
+			add_inode_reached(irec, offset);
+		}
+		if (mp->m_sb.sb_pquotino
+				&& mp->m_sb.sb_pquotino != NULLFSINO)  {
+			irec = find_inode_rec(XFS_INO_TO_AGNO(mp,
+						mp->m_sb.sb_pquotino),
+				XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
+			offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
+					- irec->ino_startnum;
+			add_inode_reached(irec, offset);
+		}
+	}
+}
+
+void
+phase6(xfs_mount_t *mp)
+{
+	xfs_ino_t		ino;
+	ino_tree_node_t		*irec;
+	dir_stack_t		stack;
+	int			i;
+	int			j;
+
+	bzero(&zerocr, sizeof(cred_t));
+
+	do_log("Phase 6 - check inode connectivity...\n");
+
+	if (!no_modify)
+		teardown_bmap_finish(mp);
+	else
+		teardown_bmap(mp);
+
+	incore_ext_teardown(mp);
+
+	add_ino_backptrs(mp);
+
+	/*
+	 * verify existence of root directory - if we have to
+	 * make one, it's ok for the incore data structs not to
+	 * know about it since everything about it (and the other
+	 * inodes in its chunk if a new chunk was created) are ok
+	 */
+	if (need_root_inode)  {
+		if (!no_modify)  {
+			do_warn("reinitializing root directory\n");
+			mk_root_dir(mp);
+			need_root_inode = 0;
+			need_root_dotdot = 0;
+		} else  {
+			do_warn("would reinitialize root directory\n");
+		}
+	}
+
+	if (need_rbmino)  {
+		if (!no_modify)  {
+			do_warn("reinitializing realtime bitmap inode\n");
+			mk_rbmino(mp);
+			need_rbmino = 0;
+		} else  {
+			do_warn("would reinitialize realtime bitmap inode\n");
+		}
+	}
+
+	if (need_rsumino)  {
+		if (!no_modify)  {
+			do_warn("reinitializing realtime summary inode\n");
+			mk_rsumino(mp);
+			need_rsumino = 0;
+		} else  {
+			do_warn("would reinitialize realtime summary inode\n");
+		}
+	}
+
+	if (!no_modify)  {
+		do_log(
+	"        - resetting contents of realtime bitmap and summary inodes\n");
+		if (fill_rbmino(mp))  {
+			do_warn(
+			"Warning:  realtime bitmap may be inconsistent\n");
+		}
+
+		if (fill_rsumino(mp))  {
+			do_warn(
+			"Warning:  realtime bitmap may be inconsistent\n");
+		}
+	}
+
+	/*
+	 * make orphanage (it's guaranteed to not exist now)
+	 */
+	if (!no_modify)  {
+		do_log("        - ensuring existence of %s directory\n",
+			ORPHANAGE);
+		orphanage_ino = mk_orphanage(mp);
+	}
+
+	dir_stack_init(&stack);
+
+	mark_standalone_inodes(mp);
+
+	/*
+	 * push root dir on stack, then go
+	 */
+	if (!need_root_inode)  {
+		do_log("        - traversing filesystem starting at / ... \n");
+
+		push_dir(&stack, mp->m_sb.sb_rootino);
+		process_dirstack(mp, &stack);
+
+		do_log("        - traversal finished ... \n");
+	} else  {
+		ASSERT(no_modify != 0);
+
+		do_log(
+"        - root inode lost, cannot make new one in no modify mode ... \n");
+		do_log(
+"        - skipping filesystem traversal from / ... \n");
+	}
+
+	do_log("        - traversing all unattached subtrees ... \n");
+
+	irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
+				XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
+
+	/*
+	 * we always have a root inode, even if it's free...
+	 * if the root is free, forget it, lost+found is already gone
+	 */
+	if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
+		need_root_inode = 1;
+	}
+
+	/*
+	 * then process all unreached inodes
+	 * by walking incore inode tree
+	 *
+	 *	get next unreached directory inode # from
+	 *		incore list
+	 *	push inode on dir stack
+	 *	call process_dirstack
+	 */
+	for (i = 0; i < glob_agcount; i++)  {
+		irec = findfirst_inode_rec(i);
+
+		if (irec == NULL)
+			continue;
+
+		while (irec != NULL)  {
+			for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
+				if (!is_inode_confirmed(irec, j))
+					continue;
+				/*
+				 * skip directories that have already been
+				 * processed, even if they haven't been
+				 * reached.  If they are reachable, we'll
+				 * pick them up when we process their parent.
+				 */
+				ino = XFS_AGINO_TO_INO(mp, i,
+						j + irec->ino_startnum);
+				if (inode_isadir(irec, j) &&
+						!is_inode_refchecked(ino,
+							irec, j)) {
+					push_dir(&stack, ino);
+					process_dirstack(mp, &stack);
+				}
+			}
+			irec = next_ino_rec(irec);
+		}
+	}
+
+	do_log("        - traversals finished ... \n");
+	do_log("        - moving disconnected inodes to lost+found ... \n");
+
+	/*
+	 * move all disconnected inodes to the orphanage
+	 */
+	for (i = 0; i < glob_agcount; i++)  {
+		irec = findfirst_inode_rec(i);
+
+		if (irec == NULL)
+			continue;
+
+		while (irec != NULL)  {
+			for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
+				ASSERT(is_inode_confirmed(irec, j));
+				if (is_inode_free(irec, j))
+					continue;
+				if (!is_inode_reached(irec, j)) {
+					ASSERT(inode_isadir(irec, j) ||
+						num_inode_references(irec, j)
+						== 0);
+					ino = XFS_AGINO_TO_INO(mp, i,
+						j + irec->ino_startnum);
+					if (inode_isadir(irec, j))
+						do_warn(
+						"disconnected dir inode %llu, ",
+							ino);
+					else
+						do_warn(
+						"disconnected inode %llu, ",
+							ino);
+					if (!no_modify)  {
+						do_warn("moving to %s\n",
+							ORPHANAGE);
+						mv_orphanage(mp, orphanage_ino,
+							ino,
+							inode_isadir(irec, j));
+					} else  {
+						do_warn("would move to %s\n",
+							ORPHANAGE);
+					}
+					/*
+					 * for read-only case, even though
+					 * the inode isn't really reachable,
+					 * set the flag (and bump our link
+					 * count) anyway to fool phase 7
+					 */
+					add_inode_reached(irec, j);
+				}
+			}
+			irec = next_ino_rec(irec);
+		}
+	}
+}
diff --git a/repair/phase7.c b/repair/phase7.c
new file mode 100644
index 000000000..670afeebd
--- /dev/null
+++ b/repair/phase7.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "versions.h"
+
+/* dinoc is a pointer to the IN-CORE dinode core */
+void
+set_nlinks(xfs_dinode_core_t	*dinoc,
+		xfs_ino_t	ino,
+		__uint32_t	nrefs,
+		int		*dirty)
+{
+	if (!no_modify)  {
+		if (INT_GET(dinoc->di_nlink, ARCH_NOCONVERT) != nrefs)  {
+			*dirty = 1;
+			do_warn("resetting inode %llu nlinks from %d to %d\n",
+					ino, INT_GET(dinoc->di_nlink, ARCH_NOCONVERT), nrefs);
+
+			if (nrefs > XFS_MAXLINK_1)  {
+				ASSERT(fs_inode_nlink);
+				do_warn(
+"nlinks %d will overflow v1 ino, ino %llu will be converted to version 2\n",
+					nrefs, ino);
+
+			}
+			INT_SET(dinoc->di_nlink, ARCH_NOCONVERT, nrefs);
+		}
+	} else  {
+		if (INT_GET(dinoc->di_nlink, ARCH_NOCONVERT) != nrefs)
+			do_warn(
+			"would have reset inode %llu nlinks from %d to %d\n",
+				ino, INT_GET(dinoc->di_nlink, ARCH_NOCONVERT), nrefs);
+	}
+}
+
+void
+phase7(xfs_mount_t *mp)
+{
+	ino_tree_node_t		*irec;
+	xfs_inode_t		*ip;
+	xfs_trans_t		*tp;
+	int			i;
+	int			j;
+	int			error;
+	int			dirty;
+	xfs_ino_t		ino;
+	__uint32_t		nrefs;
+
+	if (!no_modify)
+		printf("Phase 7 - verify and correct link counts...\n");
+	else
+		printf("Phase 7 - verify link counts...\n");
+
+	tp = libxfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+
+	error = libxfs_trans_reserve(tp, (no_modify ? 0 : 10),
+		XFS_REMOVE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES,
+		XFS_REMOVE_LOG_COUNT);
+
+	ASSERT(error == 0);
+
+	/*
+	 * for each ag, look at each inode 1 at a time using the
+	 * sim code.  if the number of links is bad, reset it,
+	 * log the inode core, commit the transaction, and
+	 * allocate a new transaction
+	 */
+	for (i = 0; i < glob_agcount; i++)  {
+		irec = findfirst_inode_rec(i);
+
+		while (irec != NULL)  {
+			for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
+				ASSERT(is_inode_confirmed(irec, j));
+
+				if (is_inode_free(irec, j))
+					continue;
+
+				ASSERT(no_modify || is_inode_reached(irec, j));
+				ASSERT(no_modify ||
+						is_inode_referenced(irec, j));
+
+				nrefs = num_inode_references(irec, j);
+
+				ino = XFS_AGINO_TO_INO(mp, i,
+					irec->ino_startnum + j);
+
+				error = libxfs_trans_iget(mp, tp, ino, 0, &ip);
+
+				if (error)  {
+					if (!no_modify)
+						do_error(
+					"couldn't map inode %llu, err = %d\n",
+							ino, error);
+					else  {
+						do_warn(
+	"couldn't map inode %llu, err = %d, can't compare link counts\n",
+							ino, error);
+						continue;
+					}
+				}
+
+				dirty = 0;
+
+				/*
+				 * compare and set links for all inodes
+				 * but the lost+found inode.  we keep
+				 * that correct as we go.
+				 */
+				if (ino != orphanage_ino)
+					set_nlinks(&ip->i_d, ino, nrefs,
+							&dirty);
+				
+				if (!dirty)  {
+					libxfs_trans_iput(tp, ip, 0);
+				} else  {
+					libxfs_trans_log_inode(tp, ip,
+							XFS_ILOG_CORE);
+					/*
+					 * no need to do a bmap finish since
+					 * we're not allocating anything
+					 */
+					ASSERT(error == 0);
+					error = libxfs_trans_commit(tp,
+						XFS_TRANS_RELEASE_LOG_RES|
+						XFS_TRANS_SYNC, NULL);
+
+					ASSERT(error == 0);
+
+					tp = libxfs_trans_alloc(mp,
+							XFS_TRANS_REMOVE);
+
+					error = libxfs_trans_reserve(tp,
+						(no_modify ? 0 : 10),
+						XFS_REMOVE_LOG_RES(mp),
+						0, XFS_TRANS_PERM_LOG_RES,
+						XFS_REMOVE_LOG_COUNT);
+					ASSERT(error == 0);
+				}
+			}
+			irec = next_ino_rec(irec);
+		}
+	}
+
+	/* 
+	 * always have one unfinished transaction coming out
+	 * of the loop.  cancel it.
+	 */
+	libxfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+}
diff --git a/repair/protos.h b/repair/protos.h
new file mode 100644
index 000000000..4f3f8d506
--- /dev/null
+++ b/repair/protos.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+void	xfs_init(libxfs_init_t *args);
+void	io_init(void);
+
+int	verify_sb(xfs_sb_t		*sb,
+		int			is_primary_sb);
+int	verify_set_primary_sb(xfs_sb_t	*root_sb,
+			int		sb_index,
+			int		*sb_modified);
+int	get_sb(xfs_sb_t			*sbp,
+		xfs_off_t			off,
+		int			size,
+		xfs_agnumber_t		agno);
+void	write_primary_sb(xfs_sb_t	*sbp,
+			int		size);
+
+int	find_secondary_sb(xfs_sb_t	*sb);
+
+int	check_growfs(xfs_off_t off, int bufnum, xfs_agnumber_t agnum);
+
+void	get_sb_geometry(fs_geometry_t	*geo,
+			xfs_sb_t	*sbp);
+
+char	*alloc_ag_buf(int size);
+
+void	print_inode_list(xfs_agnumber_t i);
+char *	err_string(int err_code);
+
diff --git a/repair/rt.c b/repair/rt.c
new file mode 100644
index 000000000..ac4e8c377
--- /dev/null
+++ b/repair/rt.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "dinode.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "rt.h"
+
+#define xfs_highbit64 libxfs_highbit64	/* for XFS_RTBLOCKLOG macro */
+
+void
+rtinit(xfs_mount_t *mp)
+{
+	if (mp->m_sb.sb_rblocks == 0)
+		return;
+	
+	/*
+	 * realtime init -- blockmap initialization is
+	 * handled by incore_init()
+	 */
+	/*
+	sumfile = calloc(mp->m_rsumsize, 1);
+	*/
+	if ((btmcompute = calloc(mp->m_sb.sb_rbmblocks *
+			mp->m_sb.sb_blocksize, 1)) == NULL)
+		do_error(
+		"couldn't allocate memory for incore realtime bitmap.\n");
+
+	if ((sumcompute = calloc(mp->m_rsumsize, 1)) == NULL)
+		do_error(
+		"couldn't allocate memory for incore realtime summary info.\n");
+}
+
+/*
+ * generate the real-time bitmap and summary info based on the
+ * incore realtime extent map.
+ */
+int
+generate_rtinfo(xfs_mount_t	*mp,
+		xfs_rtword_t	*words,
+		xfs_suminfo_t	*sumcompute)
+{
+	xfs_drtbno_t	extno;
+	xfs_drtbno_t	start_ext;
+	int		bitsperblock;
+	int		bmbno;
+	xfs_rtword_t	freebit;
+	xfs_rtword_t	bits;
+	int		start_bmbno;
+	int		i;
+	int		offs;
+	int		log;
+	int		len;
+	int		in_extent;
+
+	ASSERT(mp->m_rbmip == NULL);
+
+	bitsperblock = mp->m_sb.sb_blocksize * NBBY;
+	extno = start_ext = 0;
+	bmbno = in_extent = start_bmbno = 0;
+
+	/*
+	 * slower but simple, don't play around with trying to set
+	 * things one word at a time, just set bit as required.
+	 * Have to * track start and end (size) of each range of
+	 * free extents to set the summary info properly.
+	 */
+	while (extno < mp->m_sb.sb_rextents)  {
+		freebit = 1;
+		*words = 0;
+		bits = 0;
+		for (i = 0; i < sizeof(xfs_rtword_t) * NBBY &&
+				extno < mp->m_sb.sb_rextents; i++, extno++)  {
+			if (get_rtbno_state(mp, extno) == XR_E_FREE)  {
+				sb_frextents++;
+				bits |= freebit;
+
+				if (in_extent == 0) {
+					start_ext = extno;
+					start_bmbno = bmbno;
+					in_extent = 1;
+				}
+			} else if (in_extent == 1) {
+				len = (int) (extno - start_ext);
+				log = XFS_RTBLOCKLOG(len);
+				offs = XFS_SUMOFFS(mp, log, start_bmbno);
+				sumcompute[offs]++;
+				in_extent = 0;
+			}
+
+			freebit <<= 1;
+		}
+		*words = bits;
+		words++;
+
+		if (extno % bitsperblock == 0)
+			bmbno++;
+	}
+	if (in_extent == 1) {
+		len = (int) (extno - start_ext);
+		log = XFS_RTBLOCKLOG(len);
+		offs = XFS_SUMOFFS(mp, log, start_bmbno);
+		sumcompute[offs]++;
+	}
+
+	return(0);
+}
+
+#if 0
+/*
+ * returns 1 if bad, 0 if good
+ */
+int
+check_summary(xfs_mount_t *mp)
+{
+	xfs_drfsbno_t	bno;
+	xfs_suminfo_t	*csp;
+	xfs_suminfo_t	*fsp;
+	int		log;
+	int		error = 0;
+
+	error = 0;
+	csp = sumcompute;
+	fsp = sumfile;
+	for (log = 0; log < mp->m_rsumlevels; log++) {
+		for (bno = 0;
+		     bno < mp->m_sb.sb_rbmblocks;
+		     bno++, csp++, fsp++) {
+			if (*csp != *fsp) {
+				do_warn(
+	"rt summary mismatch, size %d block %llu, file: %d, computed: %d\n",
+						log, bno, *fsp, *csp);
+				error = 1;
+			}
+		}
+	}
+
+	return(error);
+}
+
+/*
+ * examine the real-time bitmap file and compute summary
+ * info off it.  Should probably be changed to compute
+ * the summary information off the incore computed bitmap
+ * instead of the realtime bitmap file
+ */
+void
+process_rtbitmap(xfs_mount_t	*mp,
+		xfs_dinode_t	*dino,
+		blkmap_t	*blkmap)
+{
+	int		error;
+	int		bit;
+	int		bitsperblock;
+	int		bmbno;
+	int		end_bmbno;
+	xfs_dfsbno_t	bno;
+	xfs_buf_t	*bp;
+	xfs_drtbno_t	extno;
+	int		i;
+	int		len;
+	int		log;
+	int		offs;
+	int		prevbit;
+	int		start_bmbno;
+	int		start_bit;
+	xfs_rtword_t	*words;
+
+	ASSERT(mp->m_rbmip == NULL);
+
+	bitsperblock = mp->m_sb.sb_blocksize * NBBY;
+	prevbit = 0;
+	extno = 0;
+	error = 0;
+
+	end_bmbno = howmany(INT_GET(dino->di_core.di_size, ARCH_CONVERT), mp->m_sb.sb_blocksize);
+
+	for (bmbno = 0; bmbno < end_bmbno; bmbno++) {
+		bno = blkmap_get(blkmap, bmbno);
+
+		if (bno == NULLDFSBNO) {
+			do_warn("can't find block %d for rtbitmap inode\n",
+					bmbno);
+			error = 1;
+			continue;
+		}
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+				XFS_FSB_TO_BB(mp, 1));
+		if (!bp) {
+			do_warn("can't read block %d for rtbitmap inode\n",
+					bmbno);
+			error = 1;
+			continue;
+		}
+		words = (xfs_rtword_t *)bp->b_un.b_addr;
+		for (bit = 0;
+		     bit < bitsperblock && extno < mp->m_sb.sb_rextents;
+		     bit++, extno++) {
+			if (isset(words, bit)) {
+				set_rtbno_state(mp, extno, XR_E_FREE);
+				sb_frextents++;
+				if (prevbit == 0) {
+					start_bmbno = bmbno;
+					start_bit = bit;
+					prevbit = 1;
+				}
+			} else if (prevbit == 1) {
+				len = (bmbno - start_bmbno) * bitsperblock +
+					(bit - start_bit);
+				log = XFS_RTBLOCKLOG(len);
+				offs = XFS_SUMOFFS(mp, log, start_bmbno);
+				sumcompute[offs]++;
+				prevbit = 0;
+			}
+		}
+		libxfs_putbuf(bp);
+		if (extno == mp->m_sb.sb_rextents)
+			break;
+	}
+	if (prevbit == 1) {
+		len = (bmbno - start_bmbno) * bitsperblock + (bit - start_bit);
+		log = XFS_RTBLOCKLOG(len);
+		offs = XFS_SUMOFFS(mp, log, start_bmbno);
+		sumcompute[offs]++;
+	}
+}
+
+/*
+ * copy the real-time summary file data into memory
+ */
+void
+process_rtsummary(xfs_mount_t	*mp,
+		xfs_dinode_t	*dino,
+		blkmap_t	*blkmap)
+{
+	xfs_fsblock_t	bno;
+	xfs_buf_t	*bp;
+	char		*bytes;
+	int		sumbno;
+
+	for (sumbno = 0; sumbno < blkmap->count; sumbno++) {
+		bno = blkmap_get(blkmap, sumbno);
+		if (bno == NULLDFSBNO) {
+			do_warn("block %d for rtsummary inode is missing\n",
+					sumbno);
+			error++;
+			continue;
+		}
+		bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+				XFS_FSB_TO_BB(mp, 1));
+		if (!bp) {
+			do_warn("can't read block %d for rtsummary inode\n",
+					sumbno);
+			error++;
+			continue;
+		}
+		bytes = bp->b_un.b_addr;
+		bcopy(bytes, (char *)sumfile + sumbno * mp->m_sb.sb_blocksize,
+			mp->m_sb.sb_blocksize);
+		libxfs_putbuf(bp);
+	}
+}
+#endif
diff --git a/repair/rt.h b/repair/rt.h
new file mode 100644
index 000000000..d29241d2a
--- /dev/null
+++ b/repair/rt.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct blkmap;
+
+void
+rtinit(xfs_mount_t		*mp);
+
+int
+generate_rtinfo(xfs_mount_t	*mp,
+		xfs_rtword_t	*words,
+		xfs_suminfo_t	*sumcompute);
+
+#if 0
+
+int
+check_summary(xfs_mount_t	*mp);
+
+void
+process_rtbitmap(xfs_mount_t	*mp,
+		xfs_dinode_t	*dino,
+		struct blkmap	*blkmap);
+
+void
+process_rtsummary(xfs_mount_t	*mp,
+		struct blkmap	*blkmap);
+#endif
diff --git a/repair/sb.c b/repair/sb.c
new file mode 100644
index 000000000..5133f2063
--- /dev/null
+++ b/repair/sb.c
@@ -0,0 +1,824 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <errno.h>
+#include <libxfs.h>
+#include <malloc.h>
+#include "agheader.h"
+#include "globals.h"
+#include "protos.h"
+#include "err_protos.h"
+
+
+/*
+ * copy the fields of a superblock that are present in primary and
+ * secondaries -- preserve fields that are different in the primary.
+ */
+void
+copy_sb(xfs_sb_t *source, xfs_sb_t *dest)
+{
+	xfs_ino_t	rootino;
+	xfs_ino_t	rbmino;
+	xfs_ino_t	rsumino;
+	xfs_ino_t	uquotino;
+	xfs_ino_t	pquotino;
+	__uint16_t	versionnum;
+
+	rootino = dest->sb_rootino;
+	rbmino = dest->sb_rbmino;
+	rsumino = dest->sb_rsumino;
+	uquotino = dest->sb_uquotino;
+	pquotino = dest->sb_pquotino;
+
+	versionnum = dest->sb_versionnum;
+
+	*dest = *source;
+
+	dest->sb_rootino = rootino;
+	dest->sb_rbmino = rbmino;
+	dest->sb_rsumino = rsumino;
+	dest->sb_uquotino = uquotino;
+	dest->sb_pquotino = pquotino;
+
+	dest->sb_versionnum = versionnum;
+
+	/*
+	 * copy over version bits that are stamped into all
+	 * secondaries and cannot be changed at run time in
+	 * the primary superblock
+	 */
+	if (XFS_SB_VERSION_HASDALIGN(source))
+		XFS_SB_VERSION_ADDDALIGN(dest);
+	if (XFS_SB_VERSION_HASEXTFLGBIT(source))
+		XFS_SB_VERSION_ADDEXTFLGBIT(dest);
+
+	/*
+	 * these are all supposed to be zero or will get reset anyway
+	 */
+	dest->sb_icount = 0;
+	dest->sb_ifree = 0;
+	dest->sb_fdblocks = 0;
+	dest->sb_frextents = 0;
+
+	bzero(source->sb_fname, 12);
+}
+
+#define BSIZE	(1024 * 1024)
+
+/*
+ * find a secondary superblock, copy it into the sb buffer
+ */
+int
+find_secondary_sb(xfs_sb_t *rsb)
+{
+	xfs_off_t	off;
+	xfs_sb_t	*sb;
+	xfs_sb_t	bufsb;
+	char		*c_bufsb;
+	int		done;
+	int		i;
+	int		dirty;
+	int		retval;
+	int		bsize;
+
+	do_warn("\nattempting to find secondary superblock...\n");
+
+	sb = (xfs_sb_t *) memalign(MEM_ALIGN, BSIZE);
+	if (!sb) {
+		do_error(
+	"error finding secondary superblock -- failed to memalign buffer\n");
+		exit(1);
+	}
+
+	bzero(&bufsb, sizeof(xfs_sb_t));
+	retval = 0;
+	dirty = 0;
+	bsize = 0;
+
+	/*
+	 * skip first sector since we know that's bad
+	 */
+	for (done = 0, off = XFS_AG_MIN_BYTES; !done ; off += bsize)  {
+		/*
+		 * read disk 1 MByte at a time.
+		 */
+		if (lseek64(fs_fd, off, SEEK_SET) != off)  {
+			done = 1;
+		}
+
+		if (!done && (bsize = read(fs_fd, sb, BSIZE)) == 0)  {
+			done = 1;
+		}
+
+		do_warn(".");
+
+		/*
+		 * check the buffer 512 bytes at a time since
+		 * we don't know how big the sectors really are.
+		 */
+		for (i = 0; !done && i < bsize; i += BBSIZE)  {
+			c_bufsb = (char *) sb + i;
+			libxfs_xlate_sb(c_bufsb, &bufsb, 1, ARCH_CONVERT,
+				XFS_SB_ALL_BITS);
+
+			if (verify_sb(&bufsb, 0) != XR_OK)
+				continue;
+
+			do_warn("found candidate secondary superblock...\n");
+
+			/*
+			 * found one.  now verify it by looking
+			 * for other secondaries.
+			 */
+			bcopy(&bufsb, rsb, bufsb.sb_sectsize);
+			rsb->sb_inprogress = 0;
+			clear_sunit = 1;
+
+			if (verify_set_primary_sb(rsb, 0, &dirty) == XR_OK)  {
+				do_warn("verified secondary superblock...\n");
+				done = 1;
+				retval = 1;
+			} else  {
+				do_warn(
+				"unable to verify superblock, continuing...\n");
+			}
+		}
+	}
+
+	free(sb);
+	return(retval);
+}
+
+/*
+ * calculate what inode alignment field ought to be
+ * based on internal superblock info
+ */
+int
+calc_ino_align(xfs_sb_t *sb)
+{
+	xfs_extlen_t align;
+
+	align = XFS_INODE_BIG_CLUSTER_SIZE >> sb->sb_blocklog;
+
+	return(align);
+}
+
+/*
+ * verify a superblock -- does not verify root inode #
+ *	can only check that geometry info is internally
+ *	consistent.  because of growfs, that's no guarantee
+ *	of correctness (e.g. geometry may have changed)
+ *
+ * fields verified or consistency checked:
+ *
+ *			sb_magicnum
+ *
+ *			sb_versionnum
+ *
+ *			sb_inprogress
+ *
+ *			sb_blocksize	(as a group)
+ *			sb_blocklog
+ *
+ * geometry info -	sb_dblocks	(as a group)
+ *			sb_agcount
+ *			sb_agblocks
+ *			sb_agblklog
+ *
+ * inode info -		sb_inodesize	(x-checked with geo info)
+ *			sb_inopblock
+ *
+ * sector size info -
+ *			sb_sectsize
+ *			sb_sectlog
+ *
+ * not checked here -
+ *			sb_rootino
+ *			sb_fname
+ *			sb_fpack
+ *			sb_logstart
+ *			sb_uuid
+ *
+ *			ALL real-time fields
+ *			final 4 summary counters
+ */
+
+int
+verify_sb(xfs_sb_t *sb, int is_primary_sb)
+{
+	__uint32_t	bsize;
+	xfs_extlen_t	align;
+	int		i;
+	
+	/* check magic number and version number */
+
+	if (sb->sb_magicnum != XFS_SB_MAGIC)
+		return(XR_BAD_MAGIC);
+
+	if (!XFS_SB_GOOD_VERSION(sb))
+		return(XR_BAD_VERSION);
+
+	/* does sb think mkfs really finished ? */
+
+	if (is_primary_sb && sb->sb_inprogress == 1)
+		return(XR_BAD_INPROGRESS);
+
+	/* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */
+
+	if (sb->sb_blocksize == 0)
+		return(XR_BAD_BLOCKSIZE);
+
+	bsize = 1;
+
+	for (i = 0; bsize < sb->sb_blocksize && i < 32; i++)  {
+		bsize <<= 1;
+	}
+
+	if (i < XR_LOG2BSIZE_MIN || i > XR_LOG2BSIZE_MAX)
+		return(XR_BAD_BLOCKSIZE);
+
+	/* check sb blocksize field against sb blocklog field */
+
+	if (i != sb->sb_blocklog)
+		return(XR_BAD_BLOCKLOG);
+
+	/* sanity check ag count, size fields against data size field */
+
+	if (sb->sb_dblocks == 0 ||
+		sb->sb_dblocks > sb->sb_agcount * sb->sb_agblocks ||
+		sb->sb_dblocks < (sb->sb_agcount - 1)
+			* sb->sb_agblocks + XFS_MIN_AG_BLOCKS)
+		return(XR_BAD_FS_SIZE_DATA);
+
+	if (sb->sb_agblklog != (__uint8_t)libxfs_log2_roundup(sb->sb_agblocks))
+		return(XR_BAD_FS_SIZE_DATA);
+
+	if (sb->sb_inodesize < XFS_DINODE_MIN_SIZE ||
+		sb->sb_inodesize > XFS_DINODE_MAX_SIZE ||
+		sb->sb_inopblock != howmany(sb->sb_blocksize,sb->sb_inodesize))
+		return(XR_BAD_INO_SIZE_DATA);
+
+	/* check sector size against log(sector size) field */
+
+	bsize = 1;
+
+	for (i = 0; bsize < sb->sb_sectsize && i < 15; i++)  {
+		bsize <<= 1;
+	}
+
+	if (sb->sb_sectsize == 0 || i == 16 ||
+			sb->sb_sectsize != (1 << i))
+		return(XR_BAD_SECT_SIZE_DATA);
+
+	/*
+	 * real-time extent size is always set
+	 */
+	if (sb->sb_rextsize * sb->sb_blocksize > XFS_MAX_RTEXTSIZE)
+		return(XR_BAD_RT_GEO_DATA);
+
+	if (sb->sb_rextsize * sb->sb_blocksize < XFS_MIN_RTEXTSIZE)
+			return(XR_BAD_RT_GEO_DATA);
+
+	if (sb->sb_rblocks == 0)  {
+		if (sb->sb_rextents != 0)
+			return(XR_BAD_RT_GEO_DATA);
+
+		if (sb->sb_rbmblocks != 0)
+			return(XR_BAD_RT_GEO_DATA);
+
+		if (sb->sb_rextslog != 0)
+			return(XR_BAD_RT_GEO_DATA);
+
+		if (sb->sb_frextents != 0)
+			return(XR_BAD_RT_GEO_DATA);
+	} else  {
+		/*
+		 * if we have a real-time partition, sanity-check geometry
+		 */
+		if (sb->sb_rblocks / sb->sb_rextsize != sb->sb_rextents)
+			return(XR_BAD_RT_GEO_DATA);
+
+		if (sb->sb_rextslog !=
+				libxfs_highbit32((unsigned int)sb->sb_rextents))
+			return(XR_BAD_RT_GEO_DATA);
+
+		if (sb->sb_rbmblocks != (xfs_extlen_t) howmany(sb->sb_rextents,
+						NBBY * sb->sb_blocksize))
+			return(XR_BAD_RT_GEO_DATA);
+	}
+
+	/*
+	 * verify correctness of inode alignment if it's there
+	 */
+	if (XFS_SB_VERSION_HASALIGN(sb))  {
+		align = calc_ino_align(sb);
+
+		if (align != sb->sb_inoalignmt)
+			return(XR_BAD_INO_ALIGN);
+	}
+
+	/*
+	 * verify max. % of inodes (sb_imax_pct)
+	 */
+	if (sb->sb_imax_pct > 100)
+		return(XR_BAD_INO_MAX_PCT);
+
+	/*
+	 * verify stripe alignment fields if present
+	 */
+	if (XFS_SB_VERSION_HASDALIGN(sb)) {
+		if ((!sb->sb_unit && sb->sb_width) || 
+		    (sb->sb_unit && sb->sb_agblocks % sb->sb_unit)) 
+			return(XR_BAD_SB_UNIT);
+		if ((sb->sb_unit && !sb->sb_width) ||
+		    (sb->sb_width && sb->sb_unit && sb->sb_width % sb->sb_unit))
+			return(XR_BAD_SB_WIDTH);
+	}
+
+	/*
+	 * if shared bit is set, verify that the version number is sane
+	 */
+	if (XFS_SB_VERSION_HASSHARED(sb))  {
+		if (sb->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
+			return(XR_BAD_SVN);
+	}
+
+	/*
+	 * mkfs's that stamped a feature bit besides the ones in the
+	 * mask below could leave garbage in the secondary superblock
+	 * sectors.  Anything stamping the shared fs bit or better into
+	 * the secondaries is ok and should generate clean secondary
+	 * superblock sectors.
+	 *
+	 * check primary and clean secondary superblocks more strictly
+	 */
+	if (is_primary_sb || sb->sb_versionnum & XR_PART_SECSB_VNMASK)  {
+		/*
+		 * return errors if shared vn or alignment fields
+		 * are set without their feature bits being set
+		 */
+		if (!pre_65_beta && sb->sb_versionnum & XR_PART_SECSB_VNMASK ||
+		    pre_65_beta && sb->sb_versionnum & XR_ALPHA_SECSB_VNMASK)  {
+			/*
+			 * shared version # and inode alignment fields
+			 * should be valid
+			 */
+			if (sb->sb_shared_vn && !XFS_SB_VERSION_HASSHARED(sb))
+				return(XR_BAD_SVN);
+			if (sb->sb_inoalignmt && !XFS_SB_VERSION_HASALIGN(sb))
+				return(XR_BAD_INO_ALIGN);
+		}
+		if ((!pre_65_beta &&
+		     (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK)) ||
+		    (pre_65_beta &&
+		     (sb->sb_versionnum & XFS_SB_VERSION_DALIGNBIT)))  {
+			/*
+			 * stripe alignment values should be valid
+			 */
+			if (sb->sb_unit && !XFS_SB_VERSION_HASDALIGN(sb))
+				return(XR_BAD_SB_UNIT);
+			if (sb->sb_width && !XFS_SB_VERSION_HASDALIGN(sb))
+				return(XR_BAD_SB_WIDTH);
+		}
+
+#if 0
+		/*
+		 * checks involving later superblock fields get added here...
+		 */
+		if (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK)  {
+		}
+#endif
+	}
+
+	return(XR_OK);
+}
+
+void
+write_primary_sb(xfs_sb_t *sbp, int size)
+{
+        void *buf;
+        
+	if (no_modify)
+		return;
+        
+        if ((buf = calloc(size, 1)) == NULL) {
+		do_error("failed to malloc superblock buffer\n");
+                return;
+	}
+
+	if (lseek64(fs_fd, 0LL, SEEK_SET) != 0LL) {
+                free(buf);
+		do_error("couldn't seek to offset 0 in filesystem\n");
+        }
+        
+	libxfs_xlate_sb(buf, sbp, -1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+
+	if (write(fs_fd, buf, size) != size) {
+                free(buf);
+		do_error("primary superblock write failed!\n");
+        }
+
+        free(buf);
+}
+
+/*
+ * get a possible superblock -- don't check for internal consistency
+ */
+int
+get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno)
+{
+	int error, rval;
+        void *buf;
+        
+        if ((buf = calloc(size, 1)) == NULL) {
+		do_error(
+	"error reading superblock %u -- failed to malloc buffer\n",
+			agno, off);
+		exit(1);
+	}
+
+	/* try and read it first */
+
+	if (lseek64(fs_fd, off, SEEK_SET) != off)  {
+		do_warn(
+	"error reading superblock %u -- seek to offset %lld failed\n",
+			agno, off);
+		return(XR_EOF);
+	}
+
+	if ((rval = read(fs_fd, buf, size)) != size)  {
+		error = errno;
+		do_warn(
+"superblock read failed, offset %lld, size %d, ag %u, rval %d\n",
+			off, size, rval, agno);
+		do_error("%s\n", strerror(error));
+	}
+	libxfs_xlate_sb(buf, sbp, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+	free(buf);
+
+	return (verify_sb(sbp, 0));
+}
+
+#if 0
+int
+check_growfs(xfs_off_t off, int bufnum, xfs_agnumber_t agnum)
+{
+	int rval;
+
+	ASSERT(bufnum < NUM_SBS);
+
+	/* try and read it first */
+
+	if (lseek64(fs_fd, off, SEEK_SET) != off)
+		return(XR_EOF);
+
+	if ((rval = read(fs_fd, sb_bufs[bufnum], sbbuf_size)) != sbbuf_size)  {
+		/*
+		 * we didn't get a full block so the filesystem
+		 * could not have been grown.  return a non-XR_OK
+		 * result code.
+		 */
+		return(XR_EOF);
+	}
+
+	return(get_sb(off, bufnum, agnum));
+}
+#endif
+/* returns element on list with highest reference count */
+
+fs_geo_list_t *
+get_best_geo(fs_geo_list_t *list)
+{
+	int cnt = 0;
+	fs_geo_list_t *current, *rval = NULL;
+
+	current = list;
+
+	while (current != NULL)  {
+		if (current->refs > cnt)  {
+			rval = current;
+			cnt = current->refs;
+		}
+		current = current->next;
+	}
+
+	return(rval);
+}
+
+/* adds geometry info to linked list.  returns (sometimes new) head of list */
+
+fs_geo_list_t *
+add_geo(fs_geo_list_t *list, fs_geometry_t *geo_p, int index)
+{
+	fs_geo_list_t	*current = list;
+	
+	while (current != NULL)  {
+		if (memcmp(geo_p, &current->geo, sizeof(fs_geometry_t)) == 0)  {
+			current->refs++;
+			return(list);
+		}
+
+		current = current->next;
+	}
+
+	if ((current = malloc(sizeof(fs_geo_list_t))) == NULL) {
+		do_error("couldn't malloc geometry structure\n");
+		exit(1);
+	}
+
+	current->geo = *geo_p;
+	current->refs = 1;
+	current->next = list;
+	current->index = index;
+
+	return(current);
+}
+
+void
+free_geo(fs_geo_list_t *list)
+{
+	fs_geo_list_t	*next;
+	fs_geo_list_t	*current;
+
+	current = list;
+
+	for (current = list; current != NULL; current = next)  {
+		next = current->next;
+		free(current);
+	}
+}
+
+void
+get_sb_geometry(fs_geometry_t *geo, xfs_sb_t *sbp)
+{
+	bzero(geo, sizeof(fs_geometry_t));
+
+	/*
+	 * blindly set fields that we know are always good
+	 */
+	geo->sb_blocksize = sbp->sb_blocksize;
+	geo->sb_dblocks = sbp->sb_dblocks;
+	geo->sb_rblocks = sbp->sb_rblocks;
+	geo->sb_rextents = sbp->sb_rextents;
+	geo->sb_logstart = sbp->sb_logstart;
+	geo->sb_rextsize = sbp->sb_rextsize;
+	geo->sb_agblocks = sbp->sb_agblocks;
+	geo->sb_agcount = sbp->sb_agcount;
+	geo->sb_rbmblocks = sbp->sb_rbmblocks;
+	geo->sb_logblocks = sbp->sb_logblocks;
+	geo->sb_sectsize = sbp->sb_sectsize;
+	geo->sb_inodesize = sbp->sb_inodesize;
+
+	if (XFS_SB_VERSION_HASALIGN(sbp))
+		geo->sb_ialignbit = 1;
+
+	if (XFS_SB_VERSION_HASSHARED(sbp) ||
+	    sbp->sb_versionnum & XR_PART_SECSB_VNMASK)
+		geo->sb_sharedbit = 1;
+
+	if (XFS_SB_VERSION_HASDALIGN(sbp))
+		geo->sb_salignbit = 1;
+
+	if (XFS_SB_VERSION_HASEXTFLGBIT(sbp))
+		geo->sb_extflgbit = 1;
+
+	/*
+	 * protect against pre-6.5 mkfs-generated garbaged
+	 * fields in the secondary superblocks.  pay attention
+	 * to those fields if and only if their corresponding
+	 * feature bits are set in the feature bits of the
+	 * version number or we can deduce from the version bits
+	 * that are set that our field was properly initialized
+	 * because a field after the field we care about was
+	 * properly initialized as well.
+	 */
+
+	/*
+	 * inode alignment field lives before the data alignment field
+	 */
+	if (!pre_65_beta && sbp->sb_versionnum & XR_PART_SECSB_VNMASK ||
+	    pre_65_beta && sbp->sb_versionnum & XR_ALPHA_SECSB_VNMASK)
+		geo->sb_inoalignmt = sbp->sb_inoalignmt;
+
+	if (!pre_65_beta && sbp->sb_versionnum & XR_GOOD_SECSB_VNMASK ||
+	    pre_65_beta && XFS_SB_VERSION_HASDALIGN(sbp))  {
+		geo->sb_unit = sbp->sb_unit;
+		geo->sb_width = sbp->sb_width;
+	}
+
+	/*
+	 * shared vn always set if either ino or data alignment is on
+	 * since that field lives between the quota and inode alignment
+	 * fields
+	 */
+	if (sbp->sb_versionnum & XR_PART_SECSB_VNMASK)
+		geo->sb_shared_vn = sbp->sb_shared_vn;
+
+	/*
+	 * superblock fields located after sb_widthfields get set
+	 * into the geometry structure only if we can determine
+	 * from the features enabled in this superblock whether
+	 * or not the sector was bzero'd at mkfs time.
+	 */
+	if (!pre_65_beta && sbp->sb_versionnum & XR_GOOD_SECSB_VNMASK ||
+	    pre_65_beta && sbp->sb_versionnum & XR_ALPHA_SECSB_VNMASK)  {
+		geo->sb_fully_zeroed = 1;
+	}
+}
+
+/*
+ * the way to verify that a primary sb is consistent with the
+ * filesystem is find the secondaries given the info in the
+ * primary and compare the geometries in the secondaries against
+ * the geometry indicated by the primary.
+ *
+ * returns 1 if bad, 0 if ok
+ */
+int
+verify_set_primary_sb(xfs_sb_t		*rsb,
+			int		sb_index,
+			int		*sb_modified)
+{
+	xfs_off_t		off;
+	fs_geometry_t	geo;
+	xfs_sb_t	*sb;
+	fs_geo_list_t	*list;
+	fs_geo_list_t	*current;
+	char		*checked;
+	xfs_agnumber_t	agno;
+	int		num_sbs;
+	int		skip;
+	int		size;
+	int		num_ok;
+	int		retval;
+	int		round;
+
+	/*
+	 * select the number of secondaries to try for
+	 */
+	num_sbs = MIN(NUM_SBS, rsb->sb_agcount);
+	skip = howmany(num_sbs, rsb->sb_agcount);
+	size = NUM_AGH_SECTS * rsb->sb_sectsize;
+	retval = 0;
+	list = NULL;
+	num_ok = 0;
+	*sb_modified = 0;
+
+	sb = (xfs_sb_t *) alloc_ag_buf(size);
+	checked = calloc(rsb->sb_agcount, sizeof(char));
+	if (!checked) {
+		do_error("calloc failed in verify_set_primary_sb\n");
+		exit(1);
+	}
+
+	/*
+	 * put the primary sb geometry info onto the geometry list
+	 */
+	checked[sb_index] = 1;
+	get_sb_geometry(&geo, rsb);
+	list = add_geo(list, &geo, sb_index);
+
+	/*
+	 * grab N secondaries.  check them off as we get them
+	 * so we only process each one once
+	 */
+	for (round = 0; round < skip; round++)  {
+		for (agno = round; agno < rsb->sb_agcount; agno += skip)  {
+			if (checked[agno])
+				continue;
+
+			off = (xfs_off_t)agno * rsb->sb_agblocks << rsb->sb_blocklog;
+
+			checked[agno] = 1;
+
+			if (get_sb(sb, off, size, agno) == XR_EOF)  {
+				retval = 1;
+				goto out;
+			}
+
+			if (verify_sb(sb, 0) == XR_OK)  {
+				/*
+				 * save away geometry info.
+				 * don't bother checking the sb
+				 * against the agi/agf as the odds
+				 * of the sb being corrupted in a way
+				 * that it is internally consistent
+				 * but not consistent with the rest
+				 * of the filesystem is really really low.
+				 */
+				get_sb_geometry(&geo, sb);
+				list = add_geo(list, &geo, agno);
+				num_ok++;
+			}
+		}
+	}
+
+	/*
+	 * see if we have enough superblocks to bother with
+	 */
+	if (num_ok < num_sbs / 2)
+		return(XR_INSUFF_SEC_SB);
+
+	current = get_best_geo(list);
+
+	/*
+	 * check that enough sbs agree that we're willing to
+	 * go with this geometry.  if not, print out the
+	 * geometry and a message about the force option.
+	 */
+	switch (num_sbs)  {
+	case 2:
+		/*
+		 * all them have to be right.  if not, report geometry
+		 * and get out unless force option is in effect (-F)
+		 */
+		if (current->refs != 2)  {
+			if (!force_geo)  {
+				do_warn("Only two AGs detected and they do not match - cannot proceed.\n");
+				exit(1);
+			}
+		}
+		break;
+	case 1:
+		/*
+		 * just report the geometry info and get out.
+		 * refuse to run further unless the force (-F)
+		 * option is in effect.
+		 */
+		if (!force_geo)  {
+			do_warn("Only one AG detected - cannot proceed.\n");
+			exit(1);
+		}
+	default:
+		/*
+		 * at least half of the probed superblocks have
+		 * to agree.  if they don't, this fs is probably
+		 * too far gone anyway considering the fact that
+		 * XFS normally doesn't alter the secondary superblocks.
+		 */
+		if (current->refs < num_sbs / 2)  {
+			do_warn("Not enough matching superblocks - cannot proceed.\n");
+			exit(1);
+		}
+	}
+
+	/*
+	 * set the geometry into primary superblock if necessary.
+	 */
+
+	if (current->index != sb_index)  {
+		*sb_modified = 1;
+		off = current->index * current->geo.sb_agblocks 
+			* current->geo.sb_blocksize;
+		if (get_sb(sb, off, current->geo.sb_sectsize,
+				current->index) != XR_OK)
+			do_error("could not read superblock\n");
+
+		copy_sb(sb, rsb);
+
+		/*
+		 * turn off inprogress bit since this is the primary.
+		 * also save away values that we need to ensure are
+		 * consistent in the other secondaries.
+		 */
+		rsb->sb_inprogress = 0;
+		sb_inoalignmt = sb->sb_inoalignmt;
+		sb_unit = sb->sb_unit;
+		sb_width = sb->sb_width;
+	}
+
+	free_geo(list);
+out:
+	free(sb);
+	free(checked);
+	return(retval);
+}
diff --git a/repair/scan.c b/repair/scan.c
new file mode 100644
index 000000000..e6228a230
--- /dev/null
+++ b/repair/scan.c
@@ -0,0 +1,1279 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "scan.h"
+#include "versions.h"
+#include "bmap.h"
+
+extern int verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
+		xfs_agf_t *agf, xfs_agi_t *agi, xfs_agnumber_t i);
+
+static xfs_mount_t	*mp = NULL;
+static xfs_extlen_t	bno_agffreeblks;
+static xfs_extlen_t	cnt_agffreeblks;
+static xfs_extlen_t	bno_agflongest;
+static xfs_extlen_t	cnt_agflongest;
+static xfs_agino_t	agicount;
+static xfs_agino_t	agifreecount;
+
+void
+set_mp(xfs_mount_t *mpp)
+{
+	mp = mpp;
+}
+
+void
+scan_sbtree(
+	xfs_agblock_t	root,
+	int		nlevels,
+	xfs_agnumber_t	agno,
+	int		suspect,
+	void		(*func)(xfs_btree_sblock_t	*block,
+				int			level,
+				xfs_agblock_t		bno,
+				xfs_agnumber_t		agno,
+				int			suspect,
+				int			isroot),
+	int		isroot)
+{
+	xfs_buf_t	*bp;
+
+	bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, root),
+			XFS_FSB_TO_BB(mp, 1), 0);
+	if (!bp) {
+		do_error("can't read btree block %d/%d\n", agno, root);
+		return;
+	}
+	(*func)((xfs_btree_sblock_t *)XFS_BUF_PTR(bp),
+		nlevels - 1, root, agno, suspect, isroot);
+	libxfs_putbuf(bp);
+}
+
+/*
+ * returns 1 on bad news (inode needs to be cleared), 0 on good
+ */
+int
+scan_lbtree(
+	xfs_dfsbno_t	root,
+	int		nlevels,
+	int		(*func)(xfs_btree_lblock_t	*block,
+				int			level,
+				int			type,
+				int			whichfork,
+				xfs_dfsbno_t		bno,
+				xfs_ino_t		ino,
+				xfs_drfsbno_t		*tot,
+				__uint64_t		*nex,
+				blkmap_t		**blkmapp,
+				bmap_cursor_t		*bm_cursor,
+				int			isroot,
+				int			check_dups,
+				int			*dirty),
+	int		type,
+	int		whichfork,
+	xfs_ino_t	ino,
+	xfs_drfsbno_t	*tot,
+	__uint64_t	*nex,
+	blkmap_t	**blkmapp,
+	bmap_cursor_t	*bm_cursor,
+	int		isroot,
+	int		check_dups)
+{
+	xfs_buf_t	*bp;
+	int		err;
+	int		dirty = 0;
+
+	bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root),
+		      XFS_FSB_TO_BB(mp, 1), 0);
+	if (!bp)  {
+		do_error("can't read btree block %d/%d\n",
+			XFS_FSB_TO_AGNO(mp, root),
+			XFS_FSB_TO_AGBNO(mp, root));
+		return(1);
+	}
+	err = (*func)((xfs_btree_lblock_t *)XFS_BUF_PTR(bp), nlevels - 1,
+			type, whichfork, root, ino, tot, nex, blkmapp,
+			bm_cursor, isroot, check_dups, &dirty);
+
+	ASSERT(dirty == 0 || dirty && !no_modify);
+
+	if (dirty && !no_modify)
+		libxfs_writebuf(bp, 0);
+	else
+		libxfs_putbuf(bp);
+
+	return(err);
+}
+
+int
+scanfunc_bmap(
+	xfs_btree_lblock_t	*ablock,
+	int			level,
+	int			type,
+	int			whichfork,
+	xfs_dfsbno_t		bno,
+	xfs_ino_t		ino,
+	xfs_drfsbno_t		*tot,
+	__uint64_t		*nex,
+	blkmap_t		**blkmapp,
+	bmap_cursor_t		*bm_cursor,
+	int			isroot,
+	int			check_dups,
+	int			*dirty)
+{
+	xfs_bmbt_block_t	*block = (xfs_bmbt_block_t *)ablock;
+	int			i;
+	int			err;
+	xfs_bmbt_ptr_t		*pp;
+	xfs_bmbt_key_t		*pkey;
+	xfs_bmbt_rec_32_t	*rp;
+	xfs_dfiloff_t		first_key;
+	xfs_dfiloff_t		last_key;
+	char			*forkname;
+
+	if (whichfork == XFS_DATA_FORK)
+		forkname = "data";
+	else
+		forkname = "attr";
+
+	/*
+	 * unlike the ag freeblock btrees, if anything looks wrong 
+	 * in an inode bmap tree, just bail.  it's possible that
+	 * we'll miss a case where the to-be-toasted inode and
+	 * another inode are claiming the same block but that's
+	 * highly unlikely.
+	 */
+	if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_BMAP_MAGIC) {
+		do_warn(
+		"bad magic # %#x in inode %llu (%s fork) bmbt block %llu\n",
+			INT_GET(block->bb_magic, ARCH_CONVERT), ino, forkname, bno);
+		return(1);
+	}
+	if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+		do_warn(
+	"expected level %d got %d in inode %llu, (%s fork) bmbt block %llu\n",
+			level, INT_GET(block->bb_level, ARCH_CONVERT), ino, forkname, bno);
+		return(1);
+	}
+
+	if (check_dups == 0)  {
+		/*
+		 * check sibling pointers. if bad we have a conflict
+		 * between the sibling pointers and the child pointers
+		 * in the parent block.  blow out the inode if that happens
+		 */
+		if (bm_cursor->level[level].fsbno != NULLDFSBNO)  {
+			/*
+			 * this is not the first block on this level
+			 * so the cursor for this level has recorded the
+			 * values for this's block left-sibling.
+			 */
+			if (bno != bm_cursor->level[level].right_fsbno)  {
+				do_warn(
+	"bad fwd (right) sibling pointer (saw %llu parent block says %llu)\n",
+					bm_cursor->level[level].right_fsbno,
+					bno);
+				do_warn(
+		"\tin inode %llu (%s fork) bmap btree block %llu\n",
+					ino, forkname,
+					bm_cursor->level[level].fsbno);
+				return(1);
+			}
+			if (INT_GET(block->bb_leftsib, ARCH_CONVERT) !=
+					bm_cursor->level[level].fsbno)  {
+				do_warn(
+	"bad back (left) sibling pointer (saw %llu parent block says %llu)\n",
+					INT_GET(block->bb_leftsib, ARCH_CONVERT),
+					bm_cursor->level[level].fsbno);
+				do_warn(
+		"\tin inode %llu (%s fork) bmap btree block %llu\n",
+					ino, forkname, bno);
+				return(1);
+			}
+		} else {
+			/*
+			 * This is the first or only block on this level.
+			 * Check that the left sibling pointer is NULL
+			 */
+			if (INT_GET(block->bb_leftsib, ARCH_CONVERT) !=
+					NULLDFSBNO)  {
+				do_warn(
+	"bad back (left) sibling pointer (saw %llu should be NULL (0))\n",
+				INT_GET(block->bb_leftsib, ARCH_CONVERT));
+				do_warn(
+		"\tin inode %llu (%s fork) bmap btree block %llu\n",
+					ino, forkname, bno);
+				return(1);
+			}
+		}
+
+		/*
+		 * update cursor block pointers to reflect this block
+		 */
+		bm_cursor->level[level].fsbno = bno;
+		bm_cursor->level[level].left_fsbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+		bm_cursor->level[level].right_fsbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+
+		switch (get_fsbno_state(mp, bno))  {
+		case XR_E_UNKNOWN:
+		case XR_E_FREE1:
+		case XR_E_FREE:
+			set_fsbno_state(mp, bno, XR_E_INUSE);
+			break;
+		case XR_E_FS_MAP:
+		case XR_E_INUSE:
+			/*
+			 * we'll try and continue searching here since
+			 * the block looks like it's been claimed by file
+			 * to store user data, a directory to store directory
+			 * data, or the space allocation btrees but since
+			 * we made it here, the block probably
+			 * contains btree data.
+			 */
+			set_fsbno_state(mp, bno, XR_E_MULT);
+			do_warn(
+		"inode 0x%llx bmap block 0x%llx claimed, state is %d\n",
+				ino, (__uint64_t) bno,
+				get_fsbno_state(mp, bno));
+			break;
+		case XR_E_MULT:
+		case XR_E_INUSE_FS:
+			set_fsbno_state(mp, bno, XR_E_MULT);
+			do_warn(
+		"inode 0x%llx bmap block 0x%llx claimed, state is %d\n",
+				ino, (__uint64_t) bno,
+				get_fsbno_state(mp, bno));
+			/*
+			 * if we made it to here, this is probably a bmap block
+			 * that is being used by *another* file as a bmap block
+			 * so the block will be valid.  Both files should be
+			 * trashed along with any other file that impinges on
+			 * any blocks referenced by either file.  So we
+			 * continue searching down this btree to mark all
+			 * blocks duplicate
+			 */
+			break;
+		case XR_E_BAD_STATE:
+		default:
+			do_warn(
+		"bad state %d, inode 0x%llx bmap block 0x%llx\n",
+				get_fsbno_state(mp, bno),
+				ino, (__uint64_t) bno);
+			break;
+		}
+	} else  {
+		/*
+		 * attribute fork for realtime files is in the regular
+		 * filesystem
+		 */
+		if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK)  {
+			if (search_dup_extent(mp, XFS_FSB_TO_AGNO(mp, bno),
+					XFS_FSB_TO_AGBNO(mp, bno)))
+				return(1);
+		} else  {
+			if (search_rt_dup_extent(mp, bno))
+				return(1);
+		}
+	}
+	(*tot)++;
+	if (level == 0) {
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0] ||
+		    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[0])  {
+do_warn("inode 0x%llx bad # of bmap records (%u, min - %u, max - %u)\n",
+				ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+				mp->m_bmap_dmnr[0], mp->m_bmap_dmxr[0]);
+			return(1);
+		}
+		rp = (xfs_bmbt_rec_32_t *)
+			XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+			block, 1, mp->m_bmap_dmxr[0]);
+		*nex += INT_GET(block->bb_numrecs, ARCH_CONVERT);
+		/*
+		 * XXX - if we were going to fix up the btree record,
+		 * we'd do it right here.  For now, if there's a problem,
+		 * we'll bail out and presumably clear the inode.
+		 */
+		if (check_dups == 0)  {
+			err = process_bmbt_reclist(mp, rp, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+					type, ino, tot, blkmapp,
+					&first_key, &last_key,
+					whichfork);
+			if (err)
+				return(1);
+			/*
+			 * check that key ordering is monotonically increasing.
+			 * if the last_key value in the cursor is set to
+			 * NULLDFILOFF, then we know this is the first block
+			 * on the leaf level and we shouldn't check the
+			 * last_key value.
+			 */
+			if (first_key <= bm_cursor->level[level].last_key &&
+					bm_cursor->level[level].last_key !=
+					NULLDFILOFF)  {
+				do_warn(
+"out-of-order bmap key (file offset) in inode %llu, %s fork, fsbno %llu\n",
+					ino, forkname, bno);
+				return(1);
+			}
+			/*
+			 * update cursor keys to reflect this block.
+			 * don't have to check if last_key is > first_key
+			 * since that gets checked by process_bmbt_reclist.
+			 */
+			bm_cursor->level[level].first_key = first_key;
+			bm_cursor->level[level].last_key = last_key;
+
+			return(0);
+		} else
+			return(scan_bmbt_reclist(mp, rp, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+						type, ino, tot, whichfork));
+	}
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[1] ||
+	    isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[1])  {
+do_warn("inode 0x%llx bad # of bmap records (%u, min - %u, max - %u)\n",
+			ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+			mp->m_bmap_dmnr[1], mp->m_bmap_dmxr[1]);
+		return(1);
+	}
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+		mp->m_bmap_dmxr[1]);
+	pkey = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+		mp->m_bmap_dmxr[1]);
+
+	last_key = NULLDFILOFF;
+
+	for (i = 0, err = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)  {
+		/*
+		 * XXX - if we were going to fix up the interior btree nodes,
+		 * we'd do it right here.  For now, if there's a problem,
+		 * we'll bail out and presumably clear the inode.
+		 */
+		if (!verify_dfsbno(mp, INT_GET(pp[i], ARCH_CONVERT)))  {
+			do_warn("bad bmap btree ptr 0x%llx in ino %llu\n",
+				INT_GET(pp[i], ARCH_CONVERT), ino);
+			return(1);
+		}
+
+		err = scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, type, whichfork,
+				ino, tot, nex, blkmapp, bm_cursor, 0,
+				check_dups);
+		if (err)
+			return(1);
+
+		/*
+		 * fix key (offset) mismatches between the first key
+		 * in the child block (as recorded in the cursor) and the
+		 * key in the interior node referencing the child block.
+		 *
+		 * fixes cases where entries have been shifted between
+		 * child blocks but the parent hasn't been updated.  We
+		 * don't have to worry about the key values in the cursor
+		 * not being set since we only look at the key values of
+		 * our child and those are guaranteed to be set by the
+		 * call to scan_lbtree() above.
+		 */
+		if (check_dups == 0 && INT_GET(pkey[i].br_startoff, ARCH_CONVERT) !=
+					bm_cursor->level[level-1].first_key)  {
+			if (!no_modify)  {
+				do_warn(
+		"correcting bt key (was %llu, now %llu) in inode %llu\n",
+					INT_GET(pkey[i].br_startoff, ARCH_CONVERT),
+					bm_cursor->level[level-1].first_key,
+					ino);
+				do_warn("\t\t%s fork, btree block %llu\n",
+					forkname, bno);
+				*dirty = 1;
+				INT_SET(pkey[i].br_startoff, ARCH_CONVERT, bm_cursor->level[level-1].first_key);
+			} else  {
+				do_warn(
+"bad btree key (is %llu, should be %llu) in inode %llu\n",
+					INT_GET(pkey[i].br_startoff, ARCH_CONVERT),
+					bm_cursor->level[level-1].first_key,
+					ino);
+				do_warn("\t\t%s fork, btree block %llu\n",
+					forkname, bno);
+			}
+		}
+	}
+
+	/*
+	 * Check that the last child block's forward sibling pointer
+	 * is NULL.
+	 */
+	if (check_dups == 0 && 
+		bm_cursor->level[level - 1].right_fsbno != NULLDFSBNO)  {
+		do_warn(
+	"bad fwd (right) sibling pointer (saw %llu should be NULLDFSBNO)\n",
+			bm_cursor->level[level - 1].right_fsbno);
+		do_warn(
+		"\tin inode %llu (%s fork) bmap btree block %llu\n",
+			ino, forkname,
+			bm_cursor->level[level].fsbno);
+		return(1);
+	}
+
+	/*
+	 * update cursor keys to reflect this block
+	 */
+	if (check_dups == 0)  {
+		bm_cursor->level[level].first_key =
+				INT_GET(pkey[0].br_startoff, ARCH_CONVERT);
+		i = INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1;
+		bm_cursor->level[level].last_key =
+				INT_GET(pkey[i].br_startoff, ARCH_CONVERT);
+	}
+
+	return(0);
+}
+
+void
+scanfunc_bno(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot
+	)
+{
+	xfs_agblock_t		b;
+	xfs_alloc_block_t	*block = (xfs_alloc_block_t *)ablock;
+	int			i;
+	xfs_alloc_ptr_t		*pp;
+	xfs_alloc_rec_t		*rp;
+	int			hdr_errors = 0;
+	int			numrecs;
+	int			state;
+
+	if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTB_MAGIC) {
+		do_warn("bad magic # %#x in btbno block %d/%d\n",
+			INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno);
+		hdr_errors++;
+		if (suspect)
+			return;
+	}
+	if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+		do_warn("expected level %d got %d in btbno block %d/%d\n",
+			level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno);
+		hdr_errors++;
+		if (suspect)
+			return;
+	}
+
+	/*
+	 * check for btree blocks multiply claimed
+	 */
+	state = get_agbno_state(mp, agno, bno);
+
+	switch (state)  {
+	case XR_E_UNKNOWN:
+		set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
+		break;
+	default:
+		set_agbno_state(mp, agno, bno, XR_E_MULT);
+		do_warn(
+"bno freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n",
+				state, agno, bno, suspect);
+		return;
+	}
+
+	if (level == 0) {
+		numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0])  {
+			numrecs = mp->m_alloc_mxr[0];
+			hdr_errors++;
+		}
+		if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0])  {
+			numrecs = mp->m_alloc_mnr[0];
+			hdr_errors++;
+		}
+
+		if (hdr_errors)
+			suspect++;
+
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+			1, mp->m_alloc_mxr[0]);
+		for (i = 0; i < numrecs; i++) {
+			if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) == 0 ||
+				INT_GET(rp[i].ar_startblock, ARCH_CONVERT) == 0 ||
+				!verify_agbno(mp, agno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT)) ||
+				INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > MAXEXTLEN)
+				continue;
+
+			bno_agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+			if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > bno_agflongest)
+				bno_agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+			for (b = INT_GET(rp[i].ar_startblock, ARCH_CONVERT);
+			     b < INT_GET(rp[i].ar_startblock, ARCH_CONVERT) + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+			     b++)  {
+				if (get_agbno_state(mp, agno, b)
+							== XR_E_UNKNOWN)
+					set_agbno_state(mp, agno, b,
+							XR_E_FREE1);
+				else  {
+do_warn("block (%d,%d) multiply claimed by bno space tree, state - %d\n",
+					agno, b, get_agbno_state(mp, agno, b));
+				}
+			}
+		}
+		return;
+	}
+
+	/*
+	 * interior record
+	 */
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+		mp->m_alloc_mxr[1]);
+
+	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1])  {
+		numrecs = mp->m_alloc_mxr[1];
+		hdr_errors++;
+	}
+	if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1])  {
+		numrecs = mp->m_alloc_mnr[1];
+		hdr_errors++;
+	}
+
+	/*
+	 * don't pass bogus tree flag down further if this block
+	 * looked ok.  bail out if two levels in a row look bad.
+	 */
+
+	if (suspect && !hdr_errors)
+		suspect = 0;
+
+	if (hdr_errors)  {
+		if (suspect)
+			return;
+		else suspect++;
+	}
+
+	for (i = 0; i < numrecs; i++)  {
+		/*
+		 * XXX - put sibling detection right here.
+		 * we know our sibling chain is good.  So as we go,
+		 * we check the entry before and after each entry.
+		 * If either of the entries references a different block,
+		 * check the sibling pointer.  If there's a sibling
+		 * pointer mismatch, try and extract as much data
+		 * as possible.  
+		 */
+		if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT)))
+			scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno, suspect,
+				scanfunc_bno, 0);
+	}
+}
+
+void
+scanfunc_cnt(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot
+	)
+{
+	xfs_alloc_block_t	*block;
+	xfs_alloc_ptr_t		*pp;
+	xfs_alloc_rec_t		*rp;
+	xfs_agblock_t		b;
+	int			i;
+	int			hdr_errors;
+	int			numrecs;
+	int			state;
+
+	block = (xfs_alloc_block_t *)ablock;
+	hdr_errors = 0;
+
+	if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTC_MAGIC) {
+		do_warn("bad magic # %#x in btcnt block %d/%d\n",
+			INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno);
+		hdr_errors++;
+		if (suspect)
+			return;
+	}
+	if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+		do_warn("expected level %d got %d in btcnt block %d/%d\n",
+			level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno);
+		hdr_errors++;
+		if (suspect)
+			return;
+	}
+
+	/*
+	 * check for btree blocks multiply claimed
+	 */
+	state = get_agbno_state(mp, agno, bno);
+
+	switch (state)  {
+	case XR_E_UNKNOWN:
+		set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
+		break;
+	default:
+		set_agbno_state(mp, agno, bno, XR_E_MULT);
+		do_warn(
+"bcnt freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n",
+				state, agno, bno, suspect);
+		return;
+	}
+
+	if (level == 0) {
+		numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0])  {
+			numrecs = mp->m_alloc_mxr[0];
+			hdr_errors++;
+		}
+		if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0])  {
+			numrecs = mp->m_alloc_mnr[0];
+			hdr_errors++;
+		}
+
+		if (hdr_errors)
+			suspect++;
+
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+			1, mp->m_alloc_mxr[0]);
+		for (i = 0; i < numrecs; i++) {
+			if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) == 0 ||
+				INT_GET(rp[i].ar_startblock, ARCH_CONVERT) == 0 ||
+				!verify_agbno(mp, agno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT)) ||
+				INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > MAXEXTLEN)
+				continue;
+
+			cnt_agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+			if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > cnt_agflongest)
+				cnt_agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+			for (b = INT_GET(rp[i].ar_startblock, ARCH_CONVERT);
+			     b < INT_GET(rp[i].ar_startblock, ARCH_CONVERT) + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+			     b++)  {
+				state = get_agbno_state(mp, agno, b);
+				/*
+				 * no warning messages -- we'll catch
+				 * FREE1 blocks later
+				 */
+				switch (state)  {
+				case XR_E_FREE1:
+					set_agbno_state(mp, agno, b, XR_E_FREE);
+					break;
+				case XR_E_UNKNOWN:
+					set_agbno_state(mp, agno, b,
+							XR_E_FREE1);
+					break;
+				default:
+					do_warn(
+				"block (%d,%d) already used, state %d\n",
+						agno, b, state);
+					break;
+				}
+			}
+		}
+		return;
+	}
+
+	/*
+	 * interior record
+	 */
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+		mp->m_alloc_mxr[1]);
+
+	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1])  {
+		numrecs = mp->m_alloc_mxr[1];
+		hdr_errors++;
+	}
+	if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1])  {
+		numrecs = mp->m_alloc_mnr[1];
+		hdr_errors++;
+	}
+
+	/*
+	 * don't pass bogus tree flag down further if this block
+	 * looked ok.  bail out if two levels in a row look bad.
+	 */
+
+	if (suspect && !hdr_errors)
+		suspect = 0;
+
+	if (hdr_errors)  {
+		if (suspect)
+			return;
+		else suspect++;
+	}
+
+	for (i = 0; i < numrecs; i++)
+		if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT)))
+			scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno,
+				suspect, scanfunc_cnt, 0);
+}
+
+/*
+ * this one walks the inode btrees sucking the info there into
+ * the incore avl tree.  We try and rescue corrupted btree records
+ * to minimize our chances of losing inodes.  Inode info from potentially
+ * corrupt sources could be bogus so rather than put the info straight
+ * into the tree, instead we put it on a list and try and verify the
+ * info in the next phase by examining what's on disk.  At that point,
+ * we'll be able to figure out what's what and stick the corrected info
+ * into the tree.  We do bail out at some point and give up on a subtree
+ * so as to avoid walking randomly all over the ag.
+ *
+ * Note that it's also ok if the free/inuse info wrong, we can correct
+ * that when we examine the on-disk inode.  The important thing is to
+ * get the start and alignment of the inode chunks right.  Those chunks
+ * that we aren't sure about go into the uncertain list.
+ */
+void
+scanfunc_ino(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot
+	)
+{
+	xfs_ino_t		lino;
+	xfs_inobt_block_t	*block;
+	int			i;
+	xfs_agino_t		ino;
+	xfs_agblock_t		agbno;
+	int			j;
+	int			nfree;
+	int			off;
+	int			numrecs;
+	int			state;
+	xfs_inobt_ptr_t		*pp;
+	xfs_inobt_rec_t		*rp;
+	ino_tree_node_t		*ino_rec, *first_rec, *last_rec;
+	int			hdr_errors;
+
+	block = (xfs_inobt_block_t *)ablock;
+	hdr_errors = 0;
+
+	if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_IBT_MAGIC) {
+		do_warn("bad magic # %#x in inobt block %d/%d\n",
+			INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno);
+		hdr_errors++;
+		bad_ino_btree = 1;
+		if (suspect)
+			return;
+	}
+	if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+		do_warn("expected level %d got %d in inobt block %d/%d\n",
+				level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno);
+		hdr_errors++;
+		bad_ino_btree = 1;
+		if (suspect)
+			return;
+	}
+
+	/*
+	 * check for btree blocks multiply claimed, any unknown/free state
+	 * is ok in the bitmap block.
+	 */
+	state = get_agbno_state(mp, agno, bno);
+
+	switch (state)  {
+	case XR_E_UNKNOWN:
+	case XR_E_FREE1:
+	case XR_E_FREE:
+		set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
+		break;
+	default:
+		set_agbno_state(mp, agno, bno, XR_E_MULT);
+		do_warn(
+"inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n",
+				state, agno, bno, suspect);
+	}
+
+	numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+
+	/*
+	 * leaf record in btree
+	 */
+	if (level == 0) {
+		/* check for trashed btree block */
+
+		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[0])  {
+			numrecs = mp->m_inobt_mxr[0];
+			hdr_errors++;
+		}
+		if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[0])  {
+			numrecs = mp->m_inobt_mnr[0];
+			hdr_errors++;
+		}
+
+		if (hdr_errors)  {
+			bad_ino_btree = 1;
+			do_warn("dubious inode btree block header %d/%d\n",
+				agno, bno);
+			suspect++;
+		}
+
+		rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block,
+			1, mp->m_inobt_mxr[0]);
+
+		/*
+		 * step through the records, each record points to
+		 * a chunk of inodes.  The start of inode chunks should
+		 * be block-aligned.  Each inode btree rec should point
+		 * to the start of a block of inodes or the start of a group
+		 * of INODES_PER_CHUNK (64) inodes.  off is the offset into
+		 * the block.  skip processing of bogus records.
+		 */
+		for (i = 0; i < numrecs; i++) {
+			ino = INT_GET(rp[i].ir_startino, ARCH_CONVERT);
+			off = XFS_AGINO_TO_OFFSET(mp, ino);
+			agbno = XFS_AGINO_TO_AGBNO(mp, ino);
+			lino = XFS_AGINO_TO_INO(mp, agno, ino);
+			/*
+			 * on multi-block block chunks, all chunks start
+			 * at the beginning of the block.  with multi-chunk
+			 * blocks, all chunks must start on 64-inode boundaries
+			 * since each block can hold N complete chunks. if
+			 * fs has aligned inodes, all chunks must start
+			 * at a fs_ino_alignment*N'th agbno.  skip recs
+			 * with badly aligned starting inodes.
+			 */
+			if (ino == 0 ||
+			    (inodes_per_block <= XFS_INODES_PER_CHUNK &&
+			     off !=  0) ||
+			    (inodes_per_block > XFS_INODES_PER_CHUNK &&
+			     off % XFS_INODES_PER_CHUNK != 0) ||
+			    (fs_aligned_inodes &&
+			     agbno % fs_ino_alignment != 0))  {
+				do_warn(
+			"badly aligned inode rec (starting inode = %llu)\n",
+					lino);
+				suspect++;
+			}
+
+			/*
+			 * verify numeric validity of inode chunk first
+			 * before inserting into a tree.  don't have to
+			 * worry about the overflow case because the
+			 * starting ino number of a chunk can only get
+			 * within 255 inodes of max (NULLAGINO).  if it
+			 * gets closer, the agino number will be illegal
+			 * as the agbno will be too large.
+			 */
+			if (verify_aginum(mp, agno, ino))  {
+				do_warn(
+"bad starting inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n",
+					lino, agno, ino);
+				suspect++;
+				continue;
+			}
+
+			if (verify_aginum(mp, agno,
+					ino + XFS_INODES_PER_CHUNK - 1))  {
+				do_warn(
+"bad ending inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n",
+					lino + XFS_INODES_PER_CHUNK - 1,
+					agno, ino + XFS_INODES_PER_CHUNK - 1);
+				suspect++;
+				continue;
+			}
+
+			/*
+			 * set state of each block containing inodes
+			 */
+			if (off == 0 && !suspect)  {
+				for (j = 0;
+				     j < XFS_INODES_PER_CHUNK;
+				     j += mp->m_sb.sb_inopblock)  {
+					agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
+					state = get_agbno_state(mp,
+							agno, agbno);
+
+					if (state == XR_E_UNKNOWN)  {
+						set_agbno_state(mp, agno,
+							agbno, XR_E_INO);
+					} else if (state == XR_E_INUSE_FS &&
+						agno == 0 &&
+						ino + j >= first_prealloc_ino &&
+						ino + j < last_prealloc_ino)  {
+						set_agbno_state(mp, agno,
+							agbno, XR_E_INO);
+					} else  {
+						do_warn(
+"inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n",
+							agno, bno,
+							mp->m_sb.sb_inopblock);
+						suspect++;
+						/*
+						 * XXX - maybe should mark
+						 * block a duplicate
+						 */
+						continue;
+					}
+				}
+			}
+			/*
+			 * ensure only one avl entry per chunk
+			 */
+			find_inode_rec_range(agno, ino,
+					ino + XFS_INODES_PER_CHUNK,
+					&first_rec,
+					&last_rec);
+			if (first_rec != NULL)  {
+				/*
+				 * this chunk overlaps with one (or more)
+				 * already in the tree
+				 */
+				do_warn(
+"inode rec for ino %llu (%d/%d) overlaps existing rec (start %d/%d)\n",
+					lino, agno, ino,
+					agno, first_rec->ino_startnum);
+				suspect++;
+
+				/*
+				 * if the 2 chunks start at the same place,
+				 * then we don't have to put this one
+				 * in the uncertain list.  go to the next one.
+				 */
+				if (first_rec->ino_startnum == ino)
+					continue;
+			}
+
+			agicount += XFS_INODES_PER_CHUNK;
+			agifreecount += INT_GET(rp[i].ir_freecount, ARCH_CONVERT);
+			nfree = 0;
+
+			/*
+			 * now mark all the inodes as existing and free or used.
+			 * if the tree is suspect, put them into the uncertain
+			 * inode tree.
+			 */
+			if (!suspect)  {
+				if (XFS_INOBT_IS_FREE(&rp[i], 0, ARCH_CONVERT)) {
+					nfree++;
+					ino_rec = set_inode_free_alloc(agno,
+									ino);
+				} else  {
+					ino_rec = set_inode_used_alloc(agno,
+									ino);
+				}
+				for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
+					if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT)) {
+						nfree++;
+						set_inode_free(ino_rec, j);
+					} else  {
+						set_inode_used(ino_rec, j);
+					}
+				}
+			} else  {
+				for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+					if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT)) {
+						nfree++;
+						add_aginode_uncertain(agno,
+								ino + j, 1);
+					} else  {
+						add_aginode_uncertain(agno,
+								ino + j, 0);
+					}
+				}
+			}
+
+			if (nfree != INT_GET(rp[i].ir_freecount, ARCH_CONVERT)) {
+				do_warn( "ir_freecount/free mismatch, inode chunk \
+%d/%d, freecount %d nfree %d\n",
+					agno, ino, INT_GET(rp[i].ir_freecount, ARCH_CONVERT), nfree);
+			}
+		}
+
+		if (suspect)
+			bad_ino_btree = 1;
+
+		return;
+	}
+
+	/*
+	 * interior record, continue on
+	 */
+	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[1])  {
+		numrecs = mp->m_inobt_mxr[1];
+		hdr_errors++;
+	}
+	if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[1])  {
+		numrecs = mp->m_inobt_mnr[1];
+		hdr_errors++;
+	}
+
+	pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1,
+		mp->m_inobt_mxr[1]);
+
+	/*
+	 * don't pass bogus tree flag down further if this block
+	 * looked ok.  bail out if two levels in a row look bad.
+	 */
+
+	if (suspect && !hdr_errors)
+		suspect = 0;
+
+	if (hdr_errors)  {
+		bad_ino_btree = 1;
+		if (suspect)
+			return;
+		else suspect++;
+	}
+
+	for (i = 0; i < numrecs; i++)  {
+		if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT)))
+			scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno, suspect,
+					scanfunc_ino, 0);
+	}
+}
+
+void
+scan_freelist(
+	xfs_agf_t	*agf)
+{
+	xfs_agfl_t	*agfl;
+	xfs_buf_t	*agflbuf;
+	xfs_agblock_t	bno;
+	int		count;
+	int		i;
+
+	if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
+	    XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
+	    XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
+		set_agbno_state(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT),
+			XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
+	if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0)
+		return;
+	agflbuf = libxfs_readbuf(mp->m_dev,
+			XFS_AG_DADDR(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT),
+				XFS_AGFL_DADDR), 1, 0);
+	if (!agflbuf)  {
+		do_abort("can't read agfl block for ag %d\n",
+			INT_GET(agf->agf_seqno, ARCH_CONVERT));
+		return;
+	}
+	agfl = XFS_BUF_TO_AGFL(agflbuf);
+	i = INT_GET(agf->agf_flfirst, ARCH_CONVERT);
+	count = 0;
+	for (;;) {
+		bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT);
+		if (verify_agbno(mp, INT_GET(agf->agf_seqno,ARCH_CONVERT), bno))
+			set_agbno_state(mp,
+				INT_GET(agf->agf_seqno, ARCH_CONVERT),
+				bno, XR_E_FREE);
+		else
+			do_warn("bad agbno %u in agfl, agno %d\n",
+				bno, INT_GET(agf->agf_seqno, ARCH_CONVERT));
+		count++;
+		if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT))
+			break;
+		if (++i == XFS_AGFL_SIZE)
+			i = 0;
+	}
+	if (count != INT_GET(agf->agf_flcount, ARCH_CONVERT)) {
+		do_warn("freeblk count %d != flcount %d in ag %d\n", count,
+			INT_GET(agf->agf_flcount, ARCH_CONVERT),
+			INT_GET(agf->agf_seqno, ARCH_CONVERT));
+	}
+	libxfs_putbuf(agflbuf);
+}
+
+void
+scan_ag(
+	xfs_agnumber_t	agno)
+{
+	xfs_agf_t	*agf;
+	xfs_buf_t	*agfbuf;
+	int		agf_dirty;
+	xfs_agi_t	*agi;
+	xfs_buf_t	*agibuf;
+	int		agi_dirty;
+	xfs_sb_t	*sb;
+	xfs_buf_t	*sbbuf;
+	int		sb_dirty;
+	int		status;
+
+	cnt_agffreeblks = cnt_agflongest = 0;
+	bno_agffreeblks = bno_agflongest = 0;
+
+	agi_dirty = agf_dirty = sb_dirty = 0;
+
+	agicount = agifreecount = 0;
+
+	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
+				1, 0);
+	if (!sbbuf)  {
+		do_error("can't get root superblock for ag %d\n", agno);
+		return;
+	}
+
+        sb = (xfs_sb_t *)calloc(BBSIZE, 1);
+        if (!sb) {
+            do_error("can't allocate memory for superblock\n");
+            libxfs_putbuf(sbbuf);
+            return;
+        }
+	libxfs_xlate_sb(XFS_BUF_TO_SBP(sbbuf), sb, 1, ARCH_CONVERT,
+			XFS_SB_ALL_BITS);
+
+	agfbuf = libxfs_readbuf(mp->m_dev,
+			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1, 0);
+	if (!agfbuf)  {
+		do_error("can't read agf block for ag %d\n", agno);
+		libxfs_putbuf(sbbuf);
+                free(sb);
+		return;
+	}
+	agf = XFS_BUF_TO_AGF(agfbuf);
+
+	agibuf = libxfs_readbuf(mp->m_dev,
+			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1, 0);
+	if (!agibuf)  {
+		do_error("can't read agi block for ag %d\n", agno);
+		libxfs_putbuf(agfbuf);
+		libxfs_putbuf(sbbuf);
+                free(sb);
+		return;
+	}
+	agi = XFS_BUF_TO_AGI(agibuf);
+
+	/* fix up bad ag headers */
+
+	status = verify_set_agheader(mp, sbbuf, sb, agf, agi, agno);
+
+	if (status & XR_AG_SB_SEC)  {
+		if (!no_modify)
+			sb_dirty = 1;
+		/*
+		 * clear bad sector bit because we don't want
+		 * to skip further processing.  we just want to
+		 * ensure that we write out the modified sb buffer.
+		 */
+		status &= ~XR_AG_SB_SEC;
+	}
+	if (status & XR_AG_SB)  {
+		if (!no_modify)
+			sb_dirty = 1;
+		else
+			do_warn("would ");
+
+		do_warn("reset bad sb for ag %d\n", agno);
+	}
+	if (status & XR_AG_AGF)  {
+		if (!no_modify)
+			agf_dirty = 1;
+		else
+			do_warn("would ");
+
+		do_warn("reset bad agf for ag %d\n", agno);
+	}
+	if (status & XR_AG_AGI)  {
+		if (!no_modify)
+			agi_dirty = 1;
+		else
+			do_warn("would ");
+
+		do_warn("reset bad agi for ag %d\n", agno);
+	}
+
+	if (status && no_modify)  {
+		libxfs_putbuf(agibuf);
+		libxfs_putbuf(agfbuf);
+		libxfs_putbuf(sbbuf);
+                free(sb);
+
+		do_warn("bad uncorrected agheader %d, skipping ag...\n", agno);
+
+		return;
+	}
+
+	scan_freelist(agf);
+
+	if (INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT) != 0 &&
+			verify_agbno(mp, agno, INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT)))
+		scan_sbtree(INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT),
+			INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT),
+			agno, 0, scanfunc_bno, 1);
+	else
+		do_warn("bad agbno %u for btbno root, agno %d\n",
+			INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT), agno);
+
+	if (INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT) != 0 &&
+			verify_agbno(mp, agno, INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT)))
+		scan_sbtree(INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT),
+			INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT),
+			agno, 0, scanfunc_cnt, 1);
+	else
+		do_warn("bad agbno %u for btbcnt root, agno %d\n",
+			INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT), agno);
+
+	if (INT_GET(agi->agi_root, ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(agi->agi_root, ARCH_CONVERT)))
+		scan_sbtree(INT_GET(agi->agi_root, ARCH_CONVERT), INT_GET(agi->agi_level, ARCH_CONVERT), agno, 0,
+				scanfunc_ino, 1);
+	else
+		do_warn("bad agbno %u for inobt root, agno %d\n",
+			INT_GET(agi->agi_root, ARCH_CONVERT), agno);
+
+	ASSERT(agi_dirty == 0 || agi_dirty && !no_modify);
+
+	if (agi_dirty && !no_modify)
+		libxfs_writebuf(agibuf, 0);
+	else
+		libxfs_putbuf(agibuf);
+
+	ASSERT(agf_dirty == 0 || agf_dirty && !no_modify);
+
+	if (agf_dirty && !no_modify)
+		libxfs_writebuf(agfbuf, 0);
+	else
+		libxfs_putbuf(agfbuf);
+
+	ASSERT(sb_dirty == 0 || sb_dirty && !no_modify);
+
+	if (sb_dirty && !no_modify) {
+		libxfs_xlate_sb(XFS_BUF_PTR(sbbuf), sb, -1, ARCH_CONVERT,
+				XFS_SB_ALL_BITS);
+		libxfs_writebuf(sbbuf, 0);
+        } else
+		libxfs_putbuf(sbbuf);
+        free(sb);
+}
diff --git a/repair/scan.h b/repair/scan.h
new file mode 100644
index 000000000..42e152647
--- /dev/null
+++ b/repair/scan.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef _XR_SCAN_H
+#define _XR_SCAN_H
+
+struct blkmap;
+
+void scan_sbtree(
+	xfs_agblock_t	root,
+	int		nlevels,
+	xfs_agnumber_t	agno,
+	int		suspect,
+	void		(*func)(xfs_btree_sblock_t	*block,
+				int			level,
+				xfs_agblock_t		bno,
+				xfs_agnumber_t		agno,
+				int			suspect,
+				int			isroot),
+	int		isroot);
+
+int scan_lbtree(
+	xfs_dfsbno_t	root,
+	int		nlevels,
+	int		(*func)(xfs_btree_lblock_t	*block,
+				int			level,
+				int			type,
+				int			whichfork,
+				xfs_dfsbno_t		bno,
+				xfs_ino_t		ino,
+				xfs_drfsbno_t		*tot,
+				__uint64_t		*nex,
+				struct blkmap		**blkmapp,
+				bmap_cursor_t		*bm_cursor,
+				int			isroot,
+				int			check_dups,
+				int			*dirty),
+	int		type,
+	int		whichfork,
+	xfs_ino_t	ino,
+	xfs_drfsbno_t	*tot,
+	__uint64_t	*nex,
+	struct blkmap	**blkmapp,
+	bmap_cursor_t	*bm_cursor,
+	int		isroot,
+	int		check_dups);
+
+int scanfunc_bmap(
+	xfs_btree_lblock_t	*ablock,
+	int			level,
+	int			type,
+	int			whichfork,
+	xfs_dfsbno_t		bno,
+	xfs_ino_t		ino,
+	xfs_drfsbno_t		*tot,
+	__uint64_t		*nex,
+	struct blkmap		**blkmapp,
+	bmap_cursor_t		*bm_cursor,
+	int			isroot,
+	int			check_dups,
+	int			*dirty);
+
+void scanfunc_bno(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot);
+
+void scanfunc_cnt(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot);
+
+void
+scanfunc_ino(
+	xfs_btree_sblock_t	*ablock,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot);
+
+#endif /* _XR_SCAN_H */
diff --git a/repair/versions.c b/repair/versions.c
new file mode 100644
index 000000000..526be2221
--- /dev/null
+++ b/repair/versions.c
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+
+#define EXTERN
+#include "versions.h"
+#undef EXTERN
+#include "err_protos.h"
+#include "globals.h"
+
+void
+update_sb_version(xfs_mount_t *mp)
+{
+	xfs_sb_t	*sb;
+	__uint16_t	vn;
+
+	sb = &mp->m_sb;
+
+	if (fs_attributes)  {
+		if (!XFS_SB_VERSION_HASATTR(sb))  {
+			ASSERT(fs_attributes_allowed);
+
+			XFS_SB_VERSION_ADDATTR(sb);
+		}
+	}
+
+	if (fs_inode_nlink)  {
+		if (!XFS_SB_VERSION_HASNLINK(sb))  {
+			ASSERT(fs_inode_nlink_allowed);
+
+			XFS_SB_VERSION_ADDNLINK(sb);
+		}
+	}
+
+	/*
+	 * fix up the superblock version number and feature bits,
+	 * turn off quota bits and flags if the filesystem doesn't
+	 * have quotas.
+	 */
+	if (fs_quotas)  {
+		if (!XFS_SB_VERSION_HASQUOTA(sb))  {
+			ASSERT(fs_quotas_allowed);
+
+			XFS_SB_VERSION_ADDQUOTA(sb);
+		}
+
+		/*
+		 * protect against stray bits in the quota flag field
+		 */
+		if (sb->sb_qflags & ~(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|
+				XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|
+				XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD))  {
+			/*
+			 * update the incore superblock, if we're in
+			 * no_modify mode, it'll never get flushed out
+			 * so this is ok.
+			 */
+			do_warn("bogus quota flags 0x%x set in superblock",
+				sb->sb_qflags & ~(XFS_UQUOTA_ACCT|
+				XFS_UQUOTA_ENFD|
+				XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|
+				XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD));
+
+			sb->sb_qflags &= (XFS_UQUOTA_ACCT|
+				XFS_UQUOTA_ENFD|
+				XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|
+				XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD);
+
+			if (!no_modify)
+				do_warn(", bogus flags will be cleared\n");
+			else
+				do_warn(", bogus flags would be cleared\n");
+		}
+	} else  {
+		sb->sb_qflags = 0;
+
+		if (XFS_SB_VERSION_HASQUOTA(sb))  {
+			lost_quotas = 1;
+			vn = sb->sb_versionnum;
+			vn &= ~XFS_SB_VERSION_QUOTABIT;
+
+			if (!(vn & XFS_SB_VERSION_ALLFBITS))
+				vn = XFS_SB_VERSION_TOOLD(vn);
+			
+			ASSERT(vn != 0);
+			sb->sb_versionnum = vn;
+		}
+	}
+
+	if (!fs_aligned_inodes)  {
+		if (XFS_SB_VERSION_HASALIGN(sb))  {
+			if (XFS_SB_VERSION_NUM(sb) == XFS_SB_VERSION_4)
+				XFS_SB_VERSION_SUBALIGN(sb);
+		}
+	}
+
+	return;
+}
+
+/*
+ * returns 0 if things are fine, 1 if we don't understand
+ * this superblock version.  Sets superblock geometry-dependent
+ * global variables.
+ */
+int
+parse_sb_version(xfs_sb_t *sb)
+{
+	int issue_warning;
+
+	fs_attributes = 0;
+	fs_inode_nlink = 0;
+	fs_quotas = 0;
+	fs_aligned_inodes = 0;
+	fs_sb_feature_bits = 0;
+	fs_ino_alignment = 0;
+	fs_has_extflgbit = 0;
+	have_uquotino = 0;
+	have_pquotino = 0;
+	issue_warning = 0;
+
+	/*
+	 * ok, check to make sure that the sb isn't newer
+	 * than we are
+	 */
+	if (XFS_SB_VERSION_HASEXTFLGBIT(sb))  {
+		fs_has_extflgbit = 1;
+		if (!fs_has_extflgbit_allowed)  {
+			issue_warning = 1;
+			do_warn(
+			   "This filesystem has uninitialized extent flags.\n");
+		}
+	}
+
+	if (XFS_SB_VERSION_HASSHARED(sb))  {
+		fs_shared = 1;
+		if (!fs_shared_allowed)  {
+			issue_warning = 1;
+			do_warn("This filesystem is marked shared.\n");
+		}
+	}
+
+	if (issue_warning)  {
+		do_warn(
+"This filesystem uses 6.5 feature(s) not yet supported in this release.\n\
+Please run a 6.5 version of xfs_repair.\n");
+		return(1);
+	}
+
+	if (!XFS_SB_GOOD_VERSION(sb))  {
+		do_warn(
+	"WARNING:  unknown superblock version %d\n", XFS_SB_VERSION_NUM(sb));
+		do_warn(
+	"This filesystem contains features not understood by this program.\n");
+		return(1);
+	}
+
+	if (XFS_SB_VERSION_NUM(sb) == XFS_SB_VERSION_4)  {
+		if (!fs_sb_feature_bits_allowed)  {
+			do_warn(
+	"WARNING:  you have disallowed superblock feature bits disallowed\n");
+			do_warn(
+	"\tbut this superblock has feature bits.  The superblock\n");
+
+			if (!no_modify)  {
+				do_warn(
+	"\twill be downgraded.  This may cause loss of filesystem meta-data\n");
+			} else   {
+				do_warn(
+	"\twould be downgraded.  This might cause loss of filesystem\n");
+				do_warn(
+	"\tmeta-data.\n");
+			}
+		} else   {
+			fs_sb_feature_bits = 1;
+		}
+	}
+
+	if (XFS_SB_VERSION_HASATTR(sb))  {
+		if (!fs_attributes_allowed)  {
+			do_warn(
+	"WARNING:  you have disallowed attributes but this filesystem\n");
+			if (!no_modify)  {
+				do_warn(
+	"\thas attributes.  The filesystem will be downgraded and\n");
+				do_warn(
+	"\tall attributes will be removed.\n");
+			} else  {
+				do_warn(
+	"\thas attributes.  The filesystem would be downgraded and\n");
+				do_warn(
+	"\tall attributes would be removed.\n");
+			}
+		} else   {
+			fs_attributes = 1;
+		}
+	}
+
+	if (XFS_SB_VERSION_HASNLINK(sb))  {
+		if (!fs_inode_nlink_allowed)  {
+			do_warn(
+	"WARNING:  you have disallowed version 2 inodes but this filesystem\n");
+			if (!no_modify)  {
+				do_warn(
+	"\thas version 2 inodes.  The filesystem will be downgraded and\n");
+				do_warn(
+	"\tall version 2 inodes will be converted to version 1 inodes.\n");
+				do_warn(
+	"\tThis may cause some hard links to files to be destroyed\n");
+			} else  {
+				do_warn(
+	"\thas version 2 inodes.  The filesystem would be downgraded and\n");
+				do_warn(
+	"\tall version 2 inodes would be converted to version 1 inodes.\n");
+				do_warn(
+	"\tThis might cause some hard links to files to be destroyed\n");
+			}
+		} else   {
+			fs_inode_nlink = 1;
+		}
+	}
+
+	if (XFS_SB_VERSION_HASQUOTA(sb))  {
+		if (!fs_quotas_allowed)  {
+			do_warn(
+	"WARNING:  you have disallowed quotas but this filesystem\n");
+			if (!no_modify)  {
+				do_warn(
+	"\thas quotas.  The filesystem will be downgraded and\n");
+				do_warn(
+	"\tall quota information will be removed.\n");
+			} else  {
+				do_warn(
+	"\thas quotas.  The filesystem would be downgraded and\n");
+				do_warn(
+	"\tall quota information would be removed.\n");
+			}
+		} else   {
+			fs_quotas = 1;
+
+			if (sb->sb_uquotino != 0 &&
+					sb->sb_uquotino != NULLFSINO)
+				have_uquotino = 1;
+
+			if (sb->sb_pquotino != 0 &&
+					sb->sb_pquotino != NULLFSINO)
+				have_pquotino = 1;
+		}
+	}
+
+	if (XFS_SB_VERSION_HASALIGN(sb))  {
+		if (fs_aligned_inodes_allowed)  {
+			fs_aligned_inodes = 1;
+			fs_ino_alignment = sb->sb_inoalignmt;
+		} else   {
+			do_warn(
+	"WARNING:  you have disallowed aligned inodes but this filesystem\n");
+			if (!no_modify)  {
+				do_warn(
+	"\thas aligned inodes.  The filesystem will be downgraded.\n");
+				do_warn(
+"\tThis will permanently degrade the performance of this filesystem.\n");
+			} else  {
+				do_warn(
+	"\thas aligned inodes.  The filesystem would be downgraded.\n");
+				do_warn(
+"\tThis would permanently degrade the performance of this filesystem.\n");
+			}
+		}
+	}
+
+	/*
+	 * calculate maximum file offset for this geometry
+	 */
+	fs_max_file_offset = 0x7fffffffffffffffLL >> sb->sb_blocklog;
+
+	return(0);
+}
diff --git a/repair/versions.h b/repair/versions.h
new file mode 100644
index 000000000..5f592be41
--- /dev/null
+++ b/repair/versions.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XR_VERSIONS_H
+#define _XR_VERSIONS_H
+
+#ifndef EXTERN
+#define EXTERN extern
+#endif /* EXTERN */
+
+/*
+ * possible XFS filesystem features
+ *
+ * attributes					(6.2)
+ * inode version 2 (32-bit link counts)		(6.2)
+ * quotas					(6.2+)
+ * aligned inodes				(6.2+)
+ *
+ * bitmask fields happend after 6.2.
+ */
+
+/*
+ * filesystem feature global vars, set to 1 if the feature
+ * is *allowed*, 0 otherwise.  These can be set via command-line
+ * options
+ */
+
+EXTERN int		fs_attributes_allowed;
+EXTERN int		fs_inode_nlink_allowed;
+EXTERN int		fs_quotas_allowed;
+EXTERN int		fs_aligned_inodes_allowed;
+EXTERN int		fs_sb_feature_bits_allowed;
+EXTERN int		fs_has_extflgbit_allowed;
+EXTERN int		fs_shared_allowed;
+
+/*
+ * filesystem feature global vars, set to 1 if the feature
+ * is on, 0 otherwise
+ */
+
+EXTERN int		fs_attributes;
+EXTERN int		fs_inode_nlink;
+EXTERN int		fs_quotas;
+EXTERN int		fs_aligned_inodes;
+EXTERN int		fs_sb_feature_bits;
+EXTERN int		fs_has_extflgbit;
+EXTERN int		fs_shared;
+
+/*
+ * inode chunk alignment, fsblocks
+ */
+
+EXTERN xfs_extlen_t	fs_ino_alignment;
+
+/*
+ * modify superblock to reflect current state of global fs
+ * feature vars above
+ */
+void			update_sb_version(xfs_mount_t *mp);
+
+/*
+ * parse current sb to set above feature vars
+ */
+int			parse_sb_version(xfs_sb_t *sb);
+
+#endif /* _XR_VERSIONS_H */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
new file mode 100644
index 000000000..9f3203156
--- /dev/null
+++ b/repair/xfs_repair.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "avl64.h"
+#include "globals.h"
+#include "versions.h"
+#include "agheader.h"
+#include "protos.h"
+#include "incore.h"
+#include "err_protos.h"
+
+#define	rounddown(x, y)	(((x)/(y))*(y))
+
+extern void	phase1(xfs_mount_t *);
+extern void	phase2(xfs_mount_t *, libxfs_init_t *);
+extern void	phase3(xfs_mount_t *);
+extern void	phase4(xfs_mount_t *);
+extern void	phase5(xfs_mount_t *);
+extern void	phase6(xfs_mount_t *);
+extern void	phase7(xfs_mount_t *);
+extern void	incore_init(xfs_mount_t *);
+
+#define		XR_MAX_SECT_SIZE	(64 * 1024)
+
+/*
+ * option tables for getsubopt calls
+ */
+
+/*
+ * -o (user-supplied override options)
+ */
+
+char *o_opts[] = {
+#define ASSUME_XFS	0
+	"assume_xfs",
+#define PRE_65_BETA	1
+	"fs_is_pre_65_beta",
+	NULL
+};
+
+static void
+usage(void)
+{
+	do_warn("Usage: %s [-nV] [-o subopt[=value]] [-l logdevice] devname\n",
+		progname);
+	exit(1);
+}
+
+static char *err_message[] = {
+	"no error",
+	"bad magic number",
+	"bad blocksize field",
+	"bad blocksize log field",
+	"bad version number",
+	"filesystem mkfs-in-progress bit set",
+	"inconsistent filesystem geometry information",
+	"bad inode size or inconsistent with number of inodes/block",
+	"bad sector size",
+	"AGF geometry info conflicts with filesystem geometry",
+	"AGI geometry info conflicts with filesystem geometry",
+	"AG superblock geometry info conflicts with filesystem geometry",
+	"attempted to perform I/O beyond EOF",
+	"inconsistent filesystem geometry in realtime filesystem component",
+	"maximum indicated percentage of inodes > 100%",
+	"inconsistent inode alignment value",
+	"not enough secondary superblocks with matching geometry",
+	"bad stripe unit in superblock",
+	"bad stripe width in superblock",
+	"bad shared version number in superblock"
+};
+
+char *
+err_string(int err_code)
+{
+	if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
+		do_abort("bad error code - %d\n", err_code);
+
+	return(err_message[err_code]);
+}
+
+static void
+noval(char opt, char *tbl[], int idx)
+{
+	do_warn("-%c %s option cannot have a value\n", opt, tbl[idx]);
+	usage();
+}
+
+static void
+respec(char opt, char *tbl[], int idx)
+{
+	do_warn("-%c ", opt);
+	if (tbl)
+		do_warn("%s ", tbl[idx]);
+	do_warn("option respecified\n");
+	usage();
+}
+
+static void
+unknown(char opt, char *s)
+{
+	do_warn("unknown option -%c %s\n", opt, s);
+	usage();
+}
+
+/*
+ * sets only the global argument flags and variables
+ */
+void
+process_args(int argc, char **argv)
+{
+	char *p;
+	int c;
+
+	log_spec = 0;
+	fs_is_dirty = 0;
+	verbose = 0;
+	no_modify = 0;
+	isa_file = 0;
+	dumpcore = 0;
+	full_backptrs = 0;
+	delete_attr_ok = 1;
+	force_geo = 0;
+	assume_xfs = 0;
+	clear_sunit = 0;
+	sb_inoalignmt = 0;
+	sb_unit = 0;
+	sb_width = 0;
+	fs_attributes_allowed = 1;
+	fs_inode_nlink_allowed = 1;
+	fs_quotas_allowed = 1;
+	fs_aligned_inodes_allowed = 1;
+	fs_sb_feature_bits_allowed = 1;
+	fs_has_extflgbit_allowed = 1;
+	pre_65_beta = 0;
+	fs_shared_allowed = 1;
+
+	/*
+	 * XXX have to add suboption processing here
+	 * attributes, quotas, nlinks, aligned_inos, sb_fbits
+	 */
+	while ((c = getopt(argc, argv, "o:fnDvVl:")) != EOF)  {
+		switch (c) {
+		case 'D':
+			dumpcore = 1;
+			break;
+		case 'o':
+			p = optarg;
+			while (*p != '\0')  {
+				char *val;
+
+				switch (getsubopt(&p, (constpp)o_opts, &val))  {
+				case ASSUME_XFS:
+					if (val)
+						noval('o', o_opts, ASSUME_XFS);
+					if (assume_xfs)
+						respec('o', o_opts, ASSUME_XFS);
+					assume_xfs = 1;
+					break;
+				case PRE_65_BETA:
+					if (val)
+						noval('o', o_opts, PRE_65_BETA);
+					if (pre_65_beta)
+						respec('o', o_opts,
+							PRE_65_BETA);
+					pre_65_beta = 1;
+					break;
+				default:
+					unknown('o', val);
+					break;
+				}
+			}
+			break;
+		case 'l':
+			log_name = optarg;
+			log_spec = 1;
+			break;
+		case 'f':
+			isa_file = 1;
+			break;
+		case 'n':
+			no_modify = 1;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		case 'V':
+			printf("%s version %s\n", progname, VERSION);
+			break;
+		case '?':
+			usage();
+		}
+	}
+
+	if (argc - optind != 1)
+		usage();
+
+	if ((fs_name = argv[optind]) == NULL)
+		usage();
+}
+
+void
+do_msg(int do_abort, char const *msg, va_list args)
+{
+	vfprintf(stderr, msg, args);
+
+	if (do_abort)  {
+		if (dumpcore)
+			abort();
+		exit(1);
+	}
+}
+
+void
+do_error(char const *msg, ...)
+{
+	va_list args;
+
+	fprintf(stderr, "\nfatal error -- ");
+
+	va_start(args, msg);
+	do_msg(1, msg, args);
+}
+
+/*
+ * like do_error, only the error is internal, no system
+ * error so no oserror processing
+ */
+void
+do_abort(char const *msg, ...)
+{
+	va_list args;
+
+	va_start(args, msg);
+	do_msg(1, msg, args);
+}
+
+void
+do_warn(char const *msg, ...)
+{
+	va_list args;
+
+	fs_is_dirty = 1;
+
+	va_start(args, msg);
+	do_msg(0, msg, args);
+	va_end(args);
+}
+
+/* no formatting */
+
+void
+do_log(char const *msg, ...)
+{
+	va_list args;
+
+	va_start(args, msg);
+	do_msg(0, msg, args);
+	va_end(args);
+}
+
+void
+calc_mkfs(xfs_mount_t *mp)
+{
+	xfs_agblock_t	fino_bno;
+	int		do_inoalign;
+
+	do_inoalign = mp->m_sinoalign;
+
+	/*
+	 * pre-calculate geometry of ag 0.  We know what it looks
+	 * like because we know what mkfs does -- 3 btree roots,
+	 * and some number of blocks to prefill the agfl.
+	 */
+	bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+	bcntbt_root = bnobt_root + 1;
+	inobt_root = bnobt_root + 2;
+	fino_bno = inobt_root + XFS_MIN_FREELIST_RAW(1, 1, mp) + 1;
+
+	/*
+	 * ditto the location of the first inode chunks in the fs ('/')
+	 */
+	if (XFS_SB_VERSION_HASDALIGN(&mp->m_sb) && do_inoalign)  {
+		first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, roundup(fino_bno,
+					mp->m_sb.sb_unit), 0);
+	} else if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && 
+					mp->m_sb.sb_inoalignmt > 1)  {
+		first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp,
+					roundup(fino_bno,
+						mp->m_sb.sb_inoalignmt),
+					0);
+	} else  {
+		first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno, 0);
+	}
+
+	ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+
+	if (XFS_IALLOC_BLOCKS(mp) > 1)
+		last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
+	else
+		last_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno + 1, 0);
+
+	/*
+	 * now the first 3 inodes in the system
+	 */
+	if (mp->m_sb.sb_rootino != first_prealloc_ino)  {
+		do_warn(
+	"sb root inode value %llu inconsistent with calculated value %llu\n",
+		mp->m_sb.sb_rootino, first_prealloc_ino);
+
+		if (!no_modify)
+			do_warn(
+			"resetting superblock root inode pointer to %llu\n",
+				first_prealloc_ino);
+		else
+			do_warn(
+			"would reset superblock root inode pointer to %llu\n",
+				first_prealloc_ino);
+
+		/*
+		 * just set the value -- safe since the superblock
+		 * doesn't get flushed out if no_modify is set
+		 */
+		mp->m_sb.sb_rootino = first_prealloc_ino;
+	}
+
+	if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1)  {
+		do_warn(
+"sb realtime bitmap inode %llu inconsistent with calculated value %llu\n",
+		mp->m_sb.sb_rbmino, first_prealloc_ino + 1);
+
+		if (!no_modify)
+			do_warn(
+		"resetting superblock realtime bitmap ino pointer to %llu\n",
+				first_prealloc_ino + 1);
+		else
+			do_warn(
+		"would reset superblock realtime bitmap ino pointer to %llu\n",
+				first_prealloc_ino + 1);
+
+		/*
+		 * just set the value -- safe since the superblock
+		 * doesn't get flushed out if no_modify is set
+		 */
+		mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
+	}
+
+	if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2)  {
+		do_warn(
+"sb realtime summary inode %llu inconsistent with calculated value %llu\n",
+		mp->m_sb.sb_rsumino, first_prealloc_ino + 2);
+
+		if (!no_modify)
+			do_warn(
+		"resetting superblock realtime summary ino pointer to %llu\n",
+				first_prealloc_ino + 2);
+		else
+			do_warn(
+		"would reset superblock realtime summary ino pointer to %llu\n",
+				first_prealloc_ino + 2);
+
+		/*
+		 * just set the value -- safe since the superblock
+		 * doesn't get flushed out if no_modify is set
+		 */
+		mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
+	}
+
+}
+
+int
+main(int argc, char **argv)
+{
+	libxfs_init_t	args;
+	xfs_mount_t	*temp_mp;
+	xfs_mount_t	*mp;
+	xfs_sb_t	*sb;
+	xfs_buf_t	*sbp;
+	xfs_mount_t	xfs_m;
+
+	progname = basename(argv[0]);
+
+	temp_mp = &xfs_m;
+	setbuf(stdout, NULL);
+
+	process_args(argc, argv);
+	xfs_init(&args);
+
+	/* do phase1 to make sure we have a superblock */
+	phase1(temp_mp);
+
+	if (no_modify && primary_sb_modified)  {
+		do_warn("primary superblock would have been modified.\n");
+		do_warn("cannot proceed further in no_modify mode.\n");
+		do_warn("exiting now.\n");
+		exit(1);
+	}
+
+	/* prepare the mount structure */
+	sbp = libxfs_readbuf(args.ddev, XFS_SB_DADDR, 1, 0);
+	memset(&xfs_m, 0, sizeof(xfs_mount_t));
+	sb = &xfs_m.m_sb;
+	libxfs_xlate_sb(XFS_BUF_PTR(sbp), sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+
+	mp = libxfs_mount(&xfs_m, sb, args.ddev, args.logdev, args.rtdev, 0);
+
+	if (!mp)  {
+		fprintf(stderr, "%s: cannot repair this filesystem.  Sorry.\n",
+			progname);
+		exit(1);
+	}
+	libxfs_putbuf(sbp);
+
+	/*
+	 * set XFS-independent status vars from the mount/sb structure
+	 */
+	glob_agcount = mp->m_sb.sb_agcount;
+
+	chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
+	max_symlink_blocks = howmany(MAXPATHLEN - 1, mp->m_sb.sb_blocksize);
+	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
+
+	/*
+	 * calculate what mkfs would do to this filesystem
+	 */
+	calc_mkfs(mp);
+
+	/*
+	 * check sb filesystem stats and initialize in-core data structures
+	 */
+	incore_init(mp);
+
+	if (parse_sb_version(&mp->m_sb))  {
+		do_warn(
+		      "Found unsupported filesystem features.  Exiting now.\n");
+		return(1);
+	}
+
+	/* make sure the per-ag freespace maps are ok so we can mount the fs */
+
+	phase2(mp, &args);
+
+	phase3(mp);
+
+	phase4(mp);
+
+	if (no_modify)
+		printf("No modify flag set, skipping phase 5\n");
+	else
+		phase5(mp);
+
+	if (!bad_ino_btree)  {
+		phase6(mp);
+
+		phase7(mp);
+	} else  {
+		do_warn(
+	"Inode allocation btrees are too corrupted, skipping phases 6 and 7\n");
+	}
+
+	if (lost_quotas && !have_uquotino && !have_pquotino)  {
+		if (!no_modify)  {
+			do_warn(
+	"Warning:  no quota inodes were found.  Quotas disabled.\n");
+		} else  {
+			do_warn(
+	"Warning:  no quota inodes were found.  Quotas would be disabled.\n");
+		}
+	} else if (lost_quotas)  {
+		if (!no_modify)  {
+			do_warn(
+	"Warning:  quota inodes were cleared.  Quotas disabled.\n");
+		} else  {
+			do_warn(
+"Warning:  quota inodes would be cleared.  Quotas would be disabled.\n");
+		}
+	} else  {
+		if (lost_uquotino)  {
+			if (!no_modify)  {
+				do_warn(
+		"Warning:  user quota information was cleared.\n");
+				do_warn(
+"User quotas can not be enforced until limit information is recreated.\n");
+			} else  {
+				do_warn(
+		"Warning:  user quota information would be cleared.\n");
+				do_warn(
+"User quotas could not be enforced until limit information was recreated.\n");
+			}
+		}
+
+		if (lost_pquotino)  {
+			if (!no_modify)  {
+				do_warn(
+		"Warning:  project quota information was cleared.\n");
+				do_warn(
+"Project quotas can not be enforced until limit information is recreated.\n");
+			} else  {
+				do_warn(
+		"Warning:  project quota information would be cleared.\n");
+				do_warn(
+"Project quotas could not be enforced until limit information was recreated.\n");
+			}
+		}
+	}
+
+	if (no_modify)  {
+		do_log(
+	"No modify flag set, skipping filesystem flush and exiting.\n");
+		if (fs_is_dirty)
+			return(1);
+
+		return(0);
+	}
+
+	/*
+	 * Clear the quota flags if they're on.
+	 */
+	sbp = libxfs_getsb(mp, 0);
+	if (!sbp)
+		do_error("couldn't get superblock\n");
+
+	sb = XFS_BUF_TO_SBP(sbp);
+
+	if (sb->sb_qflags & (XFS_UQUOTA_CHKD|XFS_PQUOTA_CHKD))  {
+		do_warn(
+		"Note - quota info will be regenerated on next quota mount.\n");
+		sb->sb_qflags &= ~(XFS_UQUOTA_CHKD|XFS_PQUOTA_CHKD);
+	}
+
+	if (clear_sunit) {
+		do_warn(
+"Note - stripe unit (%d) and width (%d) fields have been reset.\n"
+"Please set with mount -o sunit=<value>,swidth=<value>\n", 
+			sb->sb_unit, sb->sb_width);
+		sb->sb_unit = 0;
+		sb->sb_width = 0;
+	} 
+
+	libxfs_writebuf(sbp, 0);
+
+	libxfs_umount(mp);
+	if (args.rtdev)
+		libxfs_device_close(args.rtdev);
+	if (args.logdev)
+		libxfs_device_close(args.logdev);
+	libxfs_device_close(args.ddev);
+
+	do_log("done\n");
+
+	return(0);
+}
-- 
2.39.5